def __init__(self, session: Session, taxon_ids: ClassifIDListT): tf = Taxonomy.__table__.alias('tf') # bind = None # For portable SQL, no 'ilike' bind = session.get_bind() select_list = [ tf.c.id, tf.c.display_name, tf.c.name, tf.c.nbrobj, tf.c.nbrobjcum ] select_list.extend([ text("t%d.name" % level) # type:ignore for level in range(1, TaxonomyBO.MAX_TAXONOMY_LEVELS) ]) qry = select(select_list, bind=bind) # Inject the recursive query, for getting parents _dumm, qry = TaxonomyBO._add_recursive_query(qry, tf, do_concat=False) qry = qry.where(tf.c.id == any_(taxon_ids)) # Add another join for getting children logger.info("Taxo query: %s with IDs %s", qry, taxon_ids) res: ResultProxy = session.execute(qry) self.taxa: List[TaxonBO] = [] for a_rec in res.fetchall(): lst_rec = list(a_rec) an_id, display_name, db_name, nbobj1, nbobj2 = lst_rec.pop(0), lst_rec.pop(0), lst_rec.pop(0), \ lst_rec.pop(0), lst_rec.pop(0) lineage = [db_name] + [name for name in lst_rec if name] self.taxa.append( TaxonBO(an_id, display_name, db_name, nbobj1, nbobj2, lineage)) # type:ignore self.get_children(session, self.taxa)
def __init__(self, session: Session, taxon_ids: ClassifIDListT): tf = WoRMS.__table__.alias('tf') # bind = None # Uncomment for portable SQL, no 'ilike' bind = session.get_bind() select_list = [tf.c.aphia_id, tf.c.scientificname] select_list.extend([ text("t%d.aphia_id, t%d.scientificname" % (level, level)) # type:ignore for level in range(1, TaxonBOSetFromWoRMS.MAX_TAXONOMY_LEVELS) ]) qry = select(select_list, bind=bind) # Inject a query on names and hierarchy # Produced SQL looks like: # left join worms t1 on tf.parent_name_usage_id=t1.aphia_id # left join worms t2 on t1.parent_name_usage_id=t2.aphia_id # ... # left join worms t14 on t13.parent_name_usage_id=t14.aphia_id lev_alias = WoRMS.__table__.alias('t1') # Chain outer joins on Taxonomy, for parents # hook the first OJ to main select chained_joins = tf.join( lev_alias, lev_alias.c.aphia_id == tf.c.parent_name_usage_id, isouter=True) prev_alias = lev_alias for level in range(2, self.MAX_TAXONOMY_LEVELS): lev_alias = WoRMS.__table__.alias('t%d' % level) # hook each following OJ to previous one chained_joins = chained_joins.join( lev_alias, lev_alias.c.aphia_id == prev_alias.c.parent_name_usage_id, isouter=True) # Collect expressions prev_alias = lev_alias qry = qry.select_from(chained_joins) qry = qry.where(tf.c.aphia_id == any_(taxon_ids)) logger.info("Taxo query: %s with IDs %s", qry, taxon_ids) res: Result = session.execute(qry) self.taxa = [] for a_rec in res.fetchall(): lst_rec = list(a_rec) lineage_id = [an_id for an_id in lst_rec[0::2] if an_id] lineage = [name for name in lst_rec[1::2] if name] biota_pos = lineage.index('Biota') + 1 lineage = lineage[:biota_pos] lineage_id = lineage_id[:biota_pos] self.taxa.append( TaxonBO('P', lineage[0], 0, 0, lineage, lineage_id)) # type:ignore self.get_children(session, self.taxa)
def __init__(self, session: Session, taxon_ids: ClassifIDListT): tf = WoRMS.__table__.alias('tf') # bind = None # For portable SQL, no 'ilike' bind = session.get_bind() select_list = [tf.c.aphia_id, tf.c.scientificname] select_list.extend([ text("t%d.scientificname" % level) # type:ignore for level in range(1, TaxonomyBO.MAX_TAXONOMY_LEVELS) ]) qry = select(select_list, bind=bind) # Inject a query on names and hierarchy # Produced SQL looks like: # left join worms t1 on tf.parent_name_usage_id=t1.aphia_id # left join worms t2 on t1.parent_name_usage_id=t2.aphia_id # ... # left join worms t14 on t13.parent_name_usage_id=t14.aphia_id lev_alias = WoRMS.__table__.alias('t1') # Chain outer joins on Taxonomy, for parents # hook the first OJ to main select chained_joins = tf.join( lev_alias, lev_alias.c.aphia_id == tf.c.parent_name_usage_id, isouter=True) prev_alias = lev_alias for level in range(2, self.MAX_TAXONOMY_LEVELS): lev_alias = WoRMS.__table__.alias('t%d' % level) # hook each following OJ to previous one chained_joins = chained_joins.join( lev_alias, lev_alias.c.aphia_id == prev_alias.c.parent_name_usage_id, isouter=True) # Collect expressions prev_alias = lev_alias qry = qry.select_from(chained_joins) qry = qry.where(tf.c.aphia_id == any_(taxon_ids)) logger.info("Taxo query: %s with IDs %s", qry, taxon_ids) res: ResultProxy = session.execute(qry) self.taxa = [] for a_rec in res.fetchall(): lst_rec = list(a_rec) an_id, display_name = lst_rec.pop(0), lst_rec.pop(0) lineage = [name for name in lst_rec if name] # In WoRMS, the root is signaled by having itself as parent while lineage and lineage[-1] == lineage[-2]: lineage.pop(-1) self.taxa.append( TaxonBO(an_id, display_name, display_name, 0, 0, lineage)) # type:ignore
def __init__(self, session: Session, taxon_ids: ClassifIDListT): tf = Taxonomy.__table__.alias('tf') # bind = None # For portable SQL, no 'ilike' bind = session.get_bind() select_list = [ tf.c.taxotype, tf.c.nbrobj, tf.c.nbrobjcum, tf.c.display_name, tf.c.id, tf.c.name, ] select_list.extend([ text("t%d.id, t%d.name" % (level, level)) # type:ignore for level in range(1, TaxonomyBO.MAX_TAXONOMY_LEVELS) ]) qry = select(select_list, bind=bind) # Inject the recursive query, for getting parents _dumm, qry = TaxonomyBO._add_recursive_query(qry, tf, do_concat=False) qry = qry.where(tf.c.id == any_(taxon_ids)) # Add another join for getting children logger.info("Taxo query: %s with IDs %s", qry, taxon_ids) res: Result = session.execute(qry) self.taxa: List[TaxonBO] = [] for a_rec in res.fetchall(): lst_rec = list(a_rec) cat_type, nbobj1, nbobj2, display_name = lst_rec.pop( 0), lst_rec.pop(0), lst_rec.pop(0), lst_rec.pop(0) lineage_id = [an_id for an_id in lst_rec[0::2] if an_id] lineage = [name for name in lst_rec[1::2] if name] # assert lineage_id[-1] in (1, 84960, 84959), "Unexpected root %s" % str(lineage_id[-1]) self.taxa.append( TaxonBO( cat_type, display_name, nbobj1, nbobj2, # type:ignore lineage, lineage_id # type:ignore )) self.get_children(session) self.get_cardinalities(session)
def query(cls, session: Session, restrict_to: ClassifIDListT, priority_set: ClassifIDListT, display_name_filter: str, name_filters: List[str]): """ :param session: :param restrict_to: If not None, limit the query to given IDs. :param priority_set: Regardless of MAX_MATCHES, these IDs must appear in the result if they match. :param display_name_filter: :param name_filters: :return: """ tf = Taxonomy.__table__.alias('tf') # bind = None # For portable SQL, no 'ilike' bind = session.get_bind() # noinspection PyTypeChecker priority = case([(tf.c.id == any_(priority_set), text('0'))], else_=text('1')).label('prio') qry = select([tf.c.taxotype, tf.c.id, tf.c.display_name, priority], bind=bind) if len(name_filters) > 0: # Add to the query enough to get the full hierarchy for filtering concat_all, qry = cls._add_recursive_query(qry, tf, do_concat=True) # Below is quite expensive taxo_lineage = func.concat(*concat_all) name_filter = "%<" + "".join( name_filters) # i.e. anywhere consecutively in the lineage qry = qry.where(taxo_lineage.ilike(name_filter)) if restrict_to is not None: qry = qry.where(tf.c.id == any_(restrict_to)) # We have index IS_TaxonomyDispNameLow so this lower() is for free qry = qry.where( func.lower(tf.c.display_name).like(display_name_filter)) qry = qry.order_by(priority, func.lower(tf.c.display_name)) qry = qry.limit(cls.MAX_MATCHES) logger.info("Taxo query: %s with params %s and %s ", qry, display_name_filter, name_filters) res: Result = session.execute(qry) return res.fetchall()