Esempio n. 1
0
def search(request):
    form = SearchForm(request.GET)
    if not form.is_valid():
        return HttpResponseBadRequest(toJson(form.errors),
                                      content_type='application/json')
    logger.debug("parameters: %s", form.cleaned_data)

    collection = request.specify_collection
    express_search_config = get_express_search_config(collection,
                                                      request.specify_user)
    terms = parse_search_str(collection, form.cleaned_data['q'])
    specific_table = form.cleaned_data['name'].lower()
    limit = form.cleaned_data['limit']
    offset = form.cleaned_data['offset']

    with models.session_context() as session:
        results = [
            run_primary_search(session, searchtable, terms, collection, limit,
                               offset)
            for searchtable in express_search_config.findall(
                'tables/searchtable') if specific_table == ""
            or searchtable.find('tableName').text.lower() == specific_table
        ]

        result = {k: v for r in results for (k, v) in r.items()}
        return HttpResponse(toJson(result), content_type='application/json')
Esempio n. 2
0
def related_search(request):
    from . import related_searches
    form = RelatedSearchForm(request.GET)
    if not form.is_valid():
        return HttpResponseBadRequest(toJson(form.errors),
                                      content_type='application/json')
    logger.debug("parameters: %s", form.cleaned_data)

    related_search = getattr(related_searches, form.cleaned_data['name'])

    config = get_express_search_config(request.specify_collection,
                                       request.specify_user)
    terms = parse_search_str(request.specify_collection,
                             form.cleaned_data['q'])

    with models.session_context() as session:
        result = related_search.execute(session,
                                        config,
                                        terms,
                                        collection=request.specify_collection,
                                        user=request.specify_user,
                                        offset=form.cleaned_data['offset'],
                                        limit=form.cleaned_data['limit'])

        return HttpResponse(toJson(result), content_type='application/json')
Esempio n. 3
0
def tree_view(request, treedef, tree, parentid, sortfield):
    tree_table = datamodel.get_table(tree)
    parentid = None if parentid == 'null' else int(parentid)

    node = getattr(models, tree_table.name)
    child = aliased(node)
    accepted = aliased(node)
    id_col = getattr(node, node._id)
    child_id = getattr(child, node._id)
    treedef_col = getattr(node, tree_table.name + "TreeDefID")
    orderby = tree_table.name.lower() + '.' + sortfield

    with models.session_context() as session:
        query = session.query(id_col,
                              node.name,
                              node.fullName,
                              node.nodeNumber,
                              node.highestChildNodeNumber,
                              node.rankId,
                              node.AcceptedID,
                              accepted.fullName,
                              sql.functions.count(child_id)) \
                        .outerjoin(child, child.ParentID == id_col) \
                        .outerjoin(accepted, node.AcceptedID == getattr(accepted, node._id)) \
                        .group_by(id_col) \
                        .filter(treedef_col == int(treedef)) \
                        .filter(node.ParentID == parentid) \
                        .order_by(orderby)
        results = list(query)

    return HttpResponse(toJson(results), content_type='application/json')
Esempio n. 4
0
def tree_stats(request, treedef, tree, parentid):
    tree_table = datamodel.get_table(tree)
    parentid = None if parentid == 'null' else int(parentid)

    node = getattr(models, tree_table.name)
    descendant = aliased(node)
    node_id = getattr(node, node._id)
    descendant_id = getattr(descendant, node._id)
    treedef_col = tree_table.name + "TreeDefID"

    same_tree_p = getattr(descendant, treedef_col) == int(treedef)
    is_descendant_p = sql.and_(
        sql.between(descendant.nodeNumber, node.nodeNumber, node.highestChildNodeNumber),
        same_tree_p)

    target, make_joins = getattr(StatsQuerySpecialization, tree)()
    target_id = getattr(target, target._id)

    direct_count = sql.cast(
        sql.func.sum(sql.case([(sql.and_(target_id != None, descendant_id == node_id), 1)], else_=0)),
        types.Integer)

    all_count = sql.func.count(target_id)

    with models.session_context() as session:
        query = session.query(node_id, direct_count, all_count) \
                            .join(descendant, is_descendant_p) \
                            .filter(node.ParentID == parentid) \
                            .group_by(node_id)

        query = make_joins(request.specify_collection, query, descendant_id)
        results = list(query)

    return HttpResponse(toJson(results), content_type='application/json')
Esempio n. 5
0
def tree_view(request, treedef, tree, parentid):
    tree_table = datamodel.get_table(tree)
    parentid = None if parentid == 'null' else int(parentid)

    node = getattr(models, tree_table.name)
    child = aliased(node)
    id_col = getattr(node, node._id)
    child_id = getattr(child, node._id)
    treedef_col = getattr(node, tree_table.name + "TreeDefID")

    with models.session_context() as session:
        query = session.query(id_col,
                              node.name,
                              node.fullName,
                              node.nodeNumber,
                              node.highestChildNodeNumber,
                              node.rankId,
                              sql.functions.count(child_id)) \
                        .outerjoin(child, child.ParentID == id_col) \
                        .group_by(id_col) \
                        .filter(treedef_col == int(treedef)) \
                        .filter(node.ParentID == parentid) \
                        .order_by(node.name)
        results = list(query)

    return HttpResponse(toJson(results), content_type='application/json')
Esempio n. 6
0
def tree_stats(request, treedef, tree, parentid):
    tree_table = datamodel.get_table(tree)
    parentid = None if parentid == 'null' else int(parentid)
    treedef_col = tree_table.name + "TreeDefID"

    tree_node = getattr(models, tree_table.name)
    child = aliased(tree_node)

    def count_distinct(table):
        "Concision helper. Returns count distinct clause on ID field of table."
        return sql.func.count(distinct(getattr(table, table._id)))

    def make_joins(depth, query):
        "Depth is the number of tree level joins to be made."
        descendants = [child]
        for i in range(depth):
            descendant = aliased(tree_node)
            query = query.outerjoin(
                descendant,
                descendant.ParentID == getattr(descendants[-1], tree_node._id))
            descendants.append(descendant)

        # The target table is the one we will be counting distinct IDs on. E.g. Collection object.
        make_target_joins = getattr(
            StatsQuerySpecialization(request.specify_collection), tree)
        targets = []
        for d in descendants:
            query, target = make_target_joins(query, getattr(d, d._id))
            targets.append(target)

        query = query.add_columns(
            count_distinct(
                targets[0]
            ),  # Count distinct target ids at the immediate level
            reduce(lambda l, r: l + r,
                   [count_distinct(t) for t in targets])  # Sum all levels
        )

        return query

    with models.session_context() as session:
        # The join depth only needs to be enough to reach the bottom of the tree.
        # That will be the number of distinct rankID values not less than
        # the rankIDs of the children of parentid.
        highest_rank = session.query(sql.func.min(tree_node.rankId)).filter(
            tree_node.ParentID == parentid).as_scalar()
        depth, = session.query(sql.func.count(distinct(
            tree_node.rankId))).filter(tree_node.rankId >= highest_rank)[0]

        query = session.query(getattr(child, child._id)) \
                            .filter(child.ParentID == parentid) \
                            .filter(getattr(child, treedef_col) == int(treedef)) \
                            .group_by(getattr(child, child._id))

        query = make_joins(depth, query)
        results = list(query)

    return HttpResponse(toJson(results), content_type='application/json')
Esempio n. 7
0
def tree_stats(request, treedef, tree, parentid):
    tree_table = datamodel.get_table(tree)
    parentid = None if parentid == 'null' else int(parentid)
    treedef_col = tree_table.name + "TreeDefID"

    tree_node = getattr(models, tree_table.name)
    child = aliased(tree_node)

    def count_distinct(table):
        "Concision helper. Returns count distinct clause on ID field of table."
        return sql.func.count(distinct(getattr(table, table._id)))

    def make_joins(depth, query):
        "Depth is the number of tree level joins to be made."
        descendants = [child]
        for i in range(depth):
            descendant = aliased(tree_node)
            query = query.outerjoin(descendant, descendant.ParentID == getattr(descendants[-1], tree_node._id))
            descendants.append(descendant)

        # The target table is the one we will be counting distinct IDs on. E.g. Collection object.
        make_target_joins = getattr(StatsQuerySpecialization(request.specify_collection), tree)
        targets = []
        for d in descendants:
            query, target = make_target_joins(query, getattr(d, d._id))
            targets.append(target)

        query = query.add_columns(
            count_distinct(targets[0]),  # Count distinct target ids at the immediate level
            reduce(lambda l, r: l + r, [count_distinct(t) for t in targets]) # Sum all levels
        )

        return query

    with models.session_context() as session:
        # The join depth only needs to be enough to reach the bottom of the tree.
        # That will be the number of distinct rankID values not less than
        # the rankIDs of the children of parentid.
        highest_rank = session.query(sql.func.min(tree_node.rankId)).filter(tree_node.ParentID==parentid).as_scalar()
        depth, = session.query(sql.func.count(distinct(tree_node.rankId))).filter(tree_node.rankId >= highest_rank)[0]

        query = session.query(getattr(child, child._id)) \
                            .filter(child.ParentID == parentid) \
                            .filter(getattr(child, treedef_col) == int(treedef)) \
                            .group_by(getattr(child, child._id))

        query = make_joins(depth, query)
        results = list(query)

    return HttpResponse(toJson(results), content_type='application/json')
Esempio n. 8
0
def make_dwca(collection, user, definition, output_file, eml=None):
    output_dir = mkdtemp()
    try:
        element_tree = ET.fromstring(definition)

        core_stanza = Stanza.from_xml(element_tree.find('core'))
        extension_stanzas = [Stanza.from_xml(node) for node in element_tree.findall('extension')]

        output_node = ET.Element('archive')
        output_node.set('xmlns', "http://rs.tdwg.org/dwc/text/")
        output_node.set('xmlns:xsi', "http://www.w3.org/2001/XMLSchema-instance")
        output_node.set('xmlns:xs', "http://www.w3.org/2001/XMLSchema")
        output_node.set('xsi:schemaLocation', "http://rs.tdwg.org/dwc/text/ http://rs.tdwg.org/dwc/text/tdwg_dwc_text.xsd")

        if eml is not None:
            output_node.set('metadata', 'eml.xml')
            write_eml(eml, os.path.join(output_dir, 'eml.xml'))

        output_node.append(core_stanza.to_xml())
        for stanza in extension_stanzas:
            output_node.append(stanza.to_xml())

        with open(os.path.join(output_dir, 'meta.xml'), 'w') as meta_xml:
            meta_xml.write(prettify(output_node))

        core_ids = set()
        def collect_ids(row):
            core_ids.add(row[core_stanza.id_field_idx + 1])
            return True

        with session_context() as session:
            for query in core_stanza.queries:
                path = os.path.join(output_dir, query.file_name)
                query_to_csv(session, collection, user, query.tableid, query.get_field_specs(), path,
                             strip_id=True, row_filter=collect_ids)

            for stanza in extension_stanzas:
                def filter_ids(row):
                    return row[stanza.id_field_idx + 1] in core_ids

                for query in stanza.queries:
                    path = os.path.join(output_dir, query.file_name)
                    query_to_csv(session, collection, user, query.tableid, query.get_field_specs(), path,
                                 strip_id=True, row_filter=filter_ids)

        basename = re.sub(r'\.zip$', '', output_file)
        shutil.make_archive(basename, 'zip', output_dir, logger=logger)
    finally:
        shutil.rmtree(output_dir)
Esempio n. 9
0
def querycbx_search(request, modelname):
    table = datamodel.get_table(modelname)
    model = getattr(models, table.name)

    fields = [
        table.get_field(fieldname, strict=True) for fieldname in request.GET
        if fieldname not in ('limit', 'offset', 'forcecollection')
    ]

    if 'forcecollection' in request.GET:
        collection = Collection.objects.get(pk=request.GET['forcecollection'])
    else:
        collection = request.specify_collection

    filters = []
    for field in fields:
        filters_for_field = []
        terms = parse_search_str(collection, request.GET[field.name.lower()])
        logger.debug("found terms: %s for %s", terms, field)
        for term in terms:
            filter_for_term = term.create_filter(table, field)
            if filter_for_term is not None:
                filters_for_field.append(filter_for_term)

        logger.debug("filtering %s with %s", field, filters_for_field)
        if len(filters_for_field) > 0:
            filters.append(reduce(or_, filters_for_field))

    if len(filters) > 0:
        with models.session_context() as session:
            combined = reduce(and_, filters)
            query = session.query(getattr(model,
                                          table.idFieldName)).filter(combined)
            query = filter_by_collection(model, query, collection).limit(10)
            ids = [id for (id, ) in query]
    else:
        ids = []

    from specifyweb.specify.api import get_model_or_404, obj_to_data
    specify_model = get_model_or_404(modelname)
    qs = specify_model.objects.filter(id__in=ids)

    results = [obj_to_data(obj) for obj in qs]
    return HttpResponse(toJson(results), content_type='application/json')
Esempio n. 10
0
def make_dwca(collection, user, definition, output_file, eml=None):
    output_dir = mkdtemp()
    try:
        element_tree = ET.fromstring(definition)

        core_stanza = Stanza.from_xml(element_tree.find('core'))
        extension_stanzas = [
            Stanza.from_xml(node) for node in element_tree.findall('extension')
        ]

        output_node = ET.Element('archive')
        output_node.set('xmlns', "http://rs.tdwg.org/dwc/text/")
        output_node.set('xmlns:xsi',
                        "http://www.w3.org/2001/XMLSchema-instance")
        output_node.set('xmlns:xs', "http://www.w3.org/2001/XMLSchema")
        output_node.set(
            'xsi:schemaLocation',
            "http://rs.tdwg.org/dwc/text/ http://rs.tdwg.org/dwc/text/tdwg_dwc_text.xsd"
        )

        if eml is not None:
            output_node.set('metadata', 'eml.xml')
            write_eml(eml, os.path.join(output_dir, 'eml.xml'))

        output_node.append(core_stanza.to_xml())
        for stanza in extension_stanzas:
            output_node.append(stanza.to_xml())

        with open(os.path.join(output_dir, 'meta.xml'), 'w') as meta_xml:
            meta_xml.write(prettify(output_node))

        core_ids = set()

        def collect_ids(row):
            core_ids.add(row[core_stanza.id_field_idx + 1])
            return True

        with session_context() as session:
            for query in core_stanza.queries:
                path = os.path.join(output_dir, query.file_name)
                query_to_csv(session,
                             collection,
                             user,
                             query.tableid,
                             query.get_field_specs(),
                             path,
                             strip_id=True,
                             row_filter=collect_ids)

            for stanza in extension_stanzas:

                def filter_ids(row):
                    return row[stanza.id_field_idx + 1] in core_ids

                for query in stanza.queries:
                    path = os.path.join(output_dir, query.file_name)
                    query_to_csv(session,
                                 collection,
                                 user,
                                 query.tableid,
                                 query.get_field_specs(),
                                 path,
                                 strip_id=True,
                                 row_filter=filter_ids)

        basename = re.sub(r'\.zip$', '', output_file)
        shutil.make_archive(basename, 'zip', output_dir, logger=logger)
    finally:
        shutil.rmtree(output_dir)