def search(request): form = SearchForm(request.GET) if not form.is_valid(): return HttpResponseBadRequest(toJson(form.errors), content_type='application/json') logger.debug("parameters: %s", form.cleaned_data) collection = request.specify_collection express_search_config = get_express_search_config(collection, request.specify_user) terms = parse_search_str(collection, form.cleaned_data['q']) specific_table = form.cleaned_data['name'].lower() limit = form.cleaned_data['limit'] offset = form.cleaned_data['offset'] with models.session_context() as session: results = [ run_primary_search(session, searchtable, terms, collection, limit, offset) for searchtable in express_search_config.findall( 'tables/searchtable') if specific_table == "" or searchtable.find('tableName').text.lower() == specific_table ] result = {k: v for r in results for (k, v) in r.items()} return HttpResponse(toJson(result), content_type='application/json')
def related_search(request): from . import related_searches form = RelatedSearchForm(request.GET) if not form.is_valid(): return HttpResponseBadRequest(toJson(form.errors), content_type='application/json') logger.debug("parameters: %s", form.cleaned_data) related_search = getattr(related_searches, form.cleaned_data['name']) config = get_express_search_config(request.specify_collection, request.specify_user) terms = parse_search_str(request.specify_collection, form.cleaned_data['q']) with models.session_context() as session: result = related_search.execute(session, config, terms, collection=request.specify_collection, user=request.specify_user, offset=form.cleaned_data['offset'], limit=form.cleaned_data['limit']) return HttpResponse(toJson(result), content_type='application/json')
def tree_view(request, treedef, tree, parentid, sortfield): tree_table = datamodel.get_table(tree) parentid = None if parentid == 'null' else int(parentid) node = getattr(models, tree_table.name) child = aliased(node) accepted = aliased(node) id_col = getattr(node, node._id) child_id = getattr(child, node._id) treedef_col = getattr(node, tree_table.name + "TreeDefID") orderby = tree_table.name.lower() + '.' + sortfield with models.session_context() as session: query = session.query(id_col, node.name, node.fullName, node.nodeNumber, node.highestChildNodeNumber, node.rankId, node.AcceptedID, accepted.fullName, sql.functions.count(child_id)) \ .outerjoin(child, child.ParentID == id_col) \ .outerjoin(accepted, node.AcceptedID == getattr(accepted, node._id)) \ .group_by(id_col) \ .filter(treedef_col == int(treedef)) \ .filter(node.ParentID == parentid) \ .order_by(orderby) results = list(query) return HttpResponse(toJson(results), content_type='application/json')
def tree_stats(request, treedef, tree, parentid): tree_table = datamodel.get_table(tree) parentid = None if parentid == 'null' else int(parentid) node = getattr(models, tree_table.name) descendant = aliased(node) node_id = getattr(node, node._id) descendant_id = getattr(descendant, node._id) treedef_col = tree_table.name + "TreeDefID" same_tree_p = getattr(descendant, treedef_col) == int(treedef) is_descendant_p = sql.and_( sql.between(descendant.nodeNumber, node.nodeNumber, node.highestChildNodeNumber), same_tree_p) target, make_joins = getattr(StatsQuerySpecialization, tree)() target_id = getattr(target, target._id) direct_count = sql.cast( sql.func.sum(sql.case([(sql.and_(target_id != None, descendant_id == node_id), 1)], else_=0)), types.Integer) all_count = sql.func.count(target_id) with models.session_context() as session: query = session.query(node_id, direct_count, all_count) \ .join(descendant, is_descendant_p) \ .filter(node.ParentID == parentid) \ .group_by(node_id) query = make_joins(request.specify_collection, query, descendant_id) results = list(query) return HttpResponse(toJson(results), content_type='application/json')
def tree_view(request, treedef, tree, parentid): tree_table = datamodel.get_table(tree) parentid = None if parentid == 'null' else int(parentid) node = getattr(models, tree_table.name) child = aliased(node) id_col = getattr(node, node._id) child_id = getattr(child, node._id) treedef_col = getattr(node, tree_table.name + "TreeDefID") with models.session_context() as session: query = session.query(id_col, node.name, node.fullName, node.nodeNumber, node.highestChildNodeNumber, node.rankId, sql.functions.count(child_id)) \ .outerjoin(child, child.ParentID == id_col) \ .group_by(id_col) \ .filter(treedef_col == int(treedef)) \ .filter(node.ParentID == parentid) \ .order_by(node.name) results = list(query) return HttpResponse(toJson(results), content_type='application/json')
def tree_stats(request, treedef, tree, parentid): tree_table = datamodel.get_table(tree) parentid = None if parentid == 'null' else int(parentid) treedef_col = tree_table.name + "TreeDefID" tree_node = getattr(models, tree_table.name) child = aliased(tree_node) def count_distinct(table): "Concision helper. Returns count distinct clause on ID field of table." return sql.func.count(distinct(getattr(table, table._id))) def make_joins(depth, query): "Depth is the number of tree level joins to be made." descendants = [child] for i in range(depth): descendant = aliased(tree_node) query = query.outerjoin( descendant, descendant.ParentID == getattr(descendants[-1], tree_node._id)) descendants.append(descendant) # The target table is the one we will be counting distinct IDs on. E.g. Collection object. make_target_joins = getattr( StatsQuerySpecialization(request.specify_collection), tree) targets = [] for d in descendants: query, target = make_target_joins(query, getattr(d, d._id)) targets.append(target) query = query.add_columns( count_distinct( targets[0] ), # Count distinct target ids at the immediate level reduce(lambda l, r: l + r, [count_distinct(t) for t in targets]) # Sum all levels ) return query with models.session_context() as session: # The join depth only needs to be enough to reach the bottom of the tree. # That will be the number of distinct rankID values not less than # the rankIDs of the children of parentid. highest_rank = session.query(sql.func.min(tree_node.rankId)).filter( tree_node.ParentID == parentid).as_scalar() depth, = session.query(sql.func.count(distinct( tree_node.rankId))).filter(tree_node.rankId >= highest_rank)[0] query = session.query(getattr(child, child._id)) \ .filter(child.ParentID == parentid) \ .filter(getattr(child, treedef_col) == int(treedef)) \ .group_by(getattr(child, child._id)) query = make_joins(depth, query) results = list(query) return HttpResponse(toJson(results), content_type='application/json')
def tree_stats(request, treedef, tree, parentid): tree_table = datamodel.get_table(tree) parentid = None if parentid == 'null' else int(parentid) treedef_col = tree_table.name + "TreeDefID" tree_node = getattr(models, tree_table.name) child = aliased(tree_node) def count_distinct(table): "Concision helper. Returns count distinct clause on ID field of table." return sql.func.count(distinct(getattr(table, table._id))) def make_joins(depth, query): "Depth is the number of tree level joins to be made." descendants = [child] for i in range(depth): descendant = aliased(tree_node) query = query.outerjoin(descendant, descendant.ParentID == getattr(descendants[-1], tree_node._id)) descendants.append(descendant) # The target table is the one we will be counting distinct IDs on. E.g. Collection object. make_target_joins = getattr(StatsQuerySpecialization(request.specify_collection), tree) targets = [] for d in descendants: query, target = make_target_joins(query, getattr(d, d._id)) targets.append(target) query = query.add_columns( count_distinct(targets[0]), # Count distinct target ids at the immediate level reduce(lambda l, r: l + r, [count_distinct(t) for t in targets]) # Sum all levels ) return query with models.session_context() as session: # The join depth only needs to be enough to reach the bottom of the tree. # That will be the number of distinct rankID values not less than # the rankIDs of the children of parentid. highest_rank = session.query(sql.func.min(tree_node.rankId)).filter(tree_node.ParentID==parentid).as_scalar() depth, = session.query(sql.func.count(distinct(tree_node.rankId))).filter(tree_node.rankId >= highest_rank)[0] query = session.query(getattr(child, child._id)) \ .filter(child.ParentID == parentid) \ .filter(getattr(child, treedef_col) == int(treedef)) \ .group_by(getattr(child, child._id)) query = make_joins(depth, query) results = list(query) return HttpResponse(toJson(results), content_type='application/json')
def make_dwca(collection, user, definition, output_file, eml=None): output_dir = mkdtemp() try: element_tree = ET.fromstring(definition) core_stanza = Stanza.from_xml(element_tree.find('core')) extension_stanzas = [Stanza.from_xml(node) for node in element_tree.findall('extension')] output_node = ET.Element('archive') output_node.set('xmlns', "http://rs.tdwg.org/dwc/text/") output_node.set('xmlns:xsi', "http://www.w3.org/2001/XMLSchema-instance") output_node.set('xmlns:xs', "http://www.w3.org/2001/XMLSchema") output_node.set('xsi:schemaLocation', "http://rs.tdwg.org/dwc/text/ http://rs.tdwg.org/dwc/text/tdwg_dwc_text.xsd") if eml is not None: output_node.set('metadata', 'eml.xml') write_eml(eml, os.path.join(output_dir, 'eml.xml')) output_node.append(core_stanza.to_xml()) for stanza in extension_stanzas: output_node.append(stanza.to_xml()) with open(os.path.join(output_dir, 'meta.xml'), 'w') as meta_xml: meta_xml.write(prettify(output_node)) core_ids = set() def collect_ids(row): core_ids.add(row[core_stanza.id_field_idx + 1]) return True with session_context() as session: for query in core_stanza.queries: path = os.path.join(output_dir, query.file_name) query_to_csv(session, collection, user, query.tableid, query.get_field_specs(), path, strip_id=True, row_filter=collect_ids) for stanza in extension_stanzas: def filter_ids(row): return row[stanza.id_field_idx + 1] in core_ids for query in stanza.queries: path = os.path.join(output_dir, query.file_name) query_to_csv(session, collection, user, query.tableid, query.get_field_specs(), path, strip_id=True, row_filter=filter_ids) basename = re.sub(r'\.zip$', '', output_file) shutil.make_archive(basename, 'zip', output_dir, logger=logger) finally: shutil.rmtree(output_dir)
def querycbx_search(request, modelname): table = datamodel.get_table(modelname) model = getattr(models, table.name) fields = [ table.get_field(fieldname, strict=True) for fieldname in request.GET if fieldname not in ('limit', 'offset', 'forcecollection') ] if 'forcecollection' in request.GET: collection = Collection.objects.get(pk=request.GET['forcecollection']) else: collection = request.specify_collection filters = [] for field in fields: filters_for_field = [] terms = parse_search_str(collection, request.GET[field.name.lower()]) logger.debug("found terms: %s for %s", terms, field) for term in terms: filter_for_term = term.create_filter(table, field) if filter_for_term is not None: filters_for_field.append(filter_for_term) logger.debug("filtering %s with %s", field, filters_for_field) if len(filters_for_field) > 0: filters.append(reduce(or_, filters_for_field)) if len(filters) > 0: with models.session_context() as session: combined = reduce(and_, filters) query = session.query(getattr(model, table.idFieldName)).filter(combined) query = filter_by_collection(model, query, collection).limit(10) ids = [id for (id, ) in query] else: ids = [] from specifyweb.specify.api import get_model_or_404, obj_to_data specify_model = get_model_or_404(modelname) qs = specify_model.objects.filter(id__in=ids) results = [obj_to_data(obj) for obj in qs] return HttpResponse(toJson(results), content_type='application/json')
def make_dwca(collection, user, definition, output_file, eml=None): output_dir = mkdtemp() try: element_tree = ET.fromstring(definition) core_stanza = Stanza.from_xml(element_tree.find('core')) extension_stanzas = [ Stanza.from_xml(node) for node in element_tree.findall('extension') ] output_node = ET.Element('archive') output_node.set('xmlns', "http://rs.tdwg.org/dwc/text/") output_node.set('xmlns:xsi', "http://www.w3.org/2001/XMLSchema-instance") output_node.set('xmlns:xs', "http://www.w3.org/2001/XMLSchema") output_node.set( 'xsi:schemaLocation', "http://rs.tdwg.org/dwc/text/ http://rs.tdwg.org/dwc/text/tdwg_dwc_text.xsd" ) if eml is not None: output_node.set('metadata', 'eml.xml') write_eml(eml, os.path.join(output_dir, 'eml.xml')) output_node.append(core_stanza.to_xml()) for stanza in extension_stanzas: output_node.append(stanza.to_xml()) with open(os.path.join(output_dir, 'meta.xml'), 'w') as meta_xml: meta_xml.write(prettify(output_node)) core_ids = set() def collect_ids(row): core_ids.add(row[core_stanza.id_field_idx + 1]) return True with session_context() as session: for query in core_stanza.queries: path = os.path.join(output_dir, query.file_name) query_to_csv(session, collection, user, query.tableid, query.get_field_specs(), path, strip_id=True, row_filter=collect_ids) for stanza in extension_stanzas: def filter_ids(row): return row[stanza.id_field_idx + 1] in core_ids for query in stanza.queries: path = os.path.join(output_dir, query.file_name) query_to_csv(session, collection, user, query.tableid, query.get_field_specs(), path, strip_id=True, row_filter=filter_ids) basename = re.sub(r'\.zip$', '', output_file) shutil.make_archive(basename, 'zip', output_dir, logger=logger) finally: shutil.rmtree(output_dir)