Example #1
0
 def __init__(self, a_table: ModelT, *args):
     super().__init__()
     table: Table = a_table.__table__
     self.table_name = table.name
     qry = select([text("%s.*" % table.name)])  # type:ignore
     for a_col, a_val in zip(args[::2], args[1::2]):
         qry = qry.where(a_col == a_val)
     self.qry = qry
     self.ref_val = self._get_result(self.session)
Example #2
0
 def strict_match_subquery(session, used_taxo_ids,
                           phylo_or_morpho: Optional[str]):
     subqry = session.query(Taxonomy.name,
                            func.max(Taxonomy.id).label("id"),
                            WoRMS.aphia_id)
     subqry = subqry.join(WoRMS,
                          TaxonomyChangeService.match_with_extension())
     subqry = subqry.filter(Taxonomy.id == any_(used_taxo_ids))
     if phylo_or_morpho is not None:
         subqry = subqry.filter(Taxonomy.taxotype == text("'%s'" %
                                                          phylo_or_morpho))
     subqry = subqry.filter(WoRMS.status == text("'accepted'"))
     # Group to exclude multiple matches
     subqry = subqry.group_by(Taxonomy.name, WoRMS.aphia_id)
     subqry = subqry.having(
         and_(
             func.count(Taxonomy.name) == 1,
             func.count(WoRMS.aphia_id) == 1))
     subqry = subqry.subquery().alias("ids")
     return subqry
Example #3
0
 def match_with_extension():
     # We also match if these are trailing on EcoTaxa side
     # ok_ext = [" X", " sp.", " X sp."]
     # ok_ext_txt = [text("'" + ext.lower() + "'") for ext in ok_ext]
     # match_name = [func.lower(WoRMS.scientificname)]
     # match_name += [func.concat(func.lower(WoRMS.scientificname), ext) for ext in ok_ext_txt]
     return or_(
         func.lower(Taxonomy.name) == func.lower(WoRMS.scientificname),
         and_(
             Taxonomy.name.like(text("'% X'")),
             func.lower(Taxonomy.name) == func.concat(
                 func.lower(WoRMS.scientificname), text("' x'"))),
         and_(
             Taxonomy.name.like(text("'% sp.'")),
             func.lower(Taxonomy.name) == func.concat(
                 func.lower(WoRMS.scientificname), text("' sp.'"))),
         and_(
             Taxonomy.name.like(text("'% X sp.'")),
             func.lower(Taxonomy.name) == func.concat(
                 func.lower(WoRMS.scientificname), text("' x sp.'"))))
Example #4
0
    def matching(
            self, _current_user_id: int,
            params: Dict[str, Any]) -> List[Tuple[Taxonomy, Optional[WoRMS]]]:
        """
            Return the list of matching entries b/w Taxonomy and WoRMS.
        """
        ret: List[Tuple[Taxonomy, Optional[WoRMS]]] = []
        taxo_ids_qry: Query = self.session.query(ProjectTaxoStat.id).distinct()
        taxo_ids_qry = taxo_ids_qry.filter(ProjectTaxoStat.nbr > 0)
        used_taxo_ids = [an_id for (an_id, ) in taxo_ids_qry.all()]

        # No security check. TODO?
        case1 = "case1" in params
        """ Taxa with same name on both sides, Phylo in EcoTaxa and accepted in WoRMS """
        case2 = "case2" in params
        """ Taxa with same name on both sides, Morpho in EcoTaxa and accepted in WoRMS """
        case3 = "case3" in params
        """ Taxa with same name on both sides, Phylo in EcoTaxa and NOT accepted in WoRMS,
         and there is no equivalent accepted match """
        case31 = "case31" in params
        case4 = "case4" in params
        case5 = "case5" in params
        """ No match, phylo """
        case6 = "case6" in params

        if case1:
            res = self.strict_match(self.session, used_taxo_ids)
            # Format result
            for taxo, worms in res:
                ret.append((taxo, worms))
        elif case2:
            subqry = TaxonomyChangeService.strict_match_subquery(
                self.session, used_taxo_ids, phylo_or_morpho="M")
            qry: Query = self.session.query(Taxonomy, WoRMS)
            qry = qry.join(subqry, subqry.c.id == Taxonomy.id)
            qry = qry.join(WoRMS, subqry.c.aphia_id == WoRMS.aphia_id)
            logger.info("matching qry:%s", str(qry))
            res = qry.all()
            # Format result
            for taxo, worms in res:
                ret.append((taxo, worms))
        elif case3:
            # statuses = ["temporary name", "nomen nudum", "interim unpublished",
            #             "nomen dubium", "unaccepted", "taxon inquirendum",
            #             "accepted", "uncertain", "alternate representation"]
            # Match but the match/all matches are not accepted
            subqry = self.full_match_aggregated(used_taxo_ids)

            qry3: Query = self.session.query(Taxonomy, WoRMS)
            qry3 = qry3.join(subqry, subqry.c.id == Taxonomy.id)
            qry3 = qry3.join(WoRMS, subqry.c.aphia_id == WoRMS.aphia_id)
            qry3 = qry3.filter(
                not_(
                    subqry.c.acc.op('@>')(text("ARRAY['accepted'::varchar]"))))
            qry3 = qry3.filter(WoRMS.valid_name != None)
            # Status filter for clarity
            # flt = statuses[4]
            # status_filt = text("ARRAY['%s'::varchar]" % flt)
            # qry3 = qry3.filter(subqry.c.acc.op('@>')(status_filt))
            logger.info("matching qry:%s", str(qry3))
            res3 = qry3.all()
            # Format result
            for taxo, worms in res3:
                ret.append((taxo, worms))
        elif case31:
            # Match but the match/all matches are not accepted
            subqry = self.full_match_aggregated(used_taxo_ids)

            qry31: Query = self.session.query(Taxonomy, WoRMS)
            qry31 = qry31.join(subqry, subqry.c.id == Taxonomy.id)
            qry31 = qry31.join(WoRMS, subqry.c.aphia_id == WoRMS.aphia_id)
            qry31 = qry31.filter(
                not_(
                    subqry.c.acc.op('@>')(text("ARRAY['accepted'::varchar]"))))
            qry31 = qry31.filter(WoRMS.valid_name == None)
            logger.info("matching qry:%s", str(qry31))
            res31 = qry31.all()
            # Format result
            for taxo, worms in res31:
                ret.append((taxo, worms))
        elif case4:
            subqry = self.full_match_aggregated(used_taxo_ids)

            qry4: Query = self.session.query(Taxonomy, WoRMS)
            qry4 = qry4.join(subqry, subqry.c.id == Taxonomy.id)
            qry4 = qry4.join(WoRMS, subqry.c.aphia_id == WoRMS.aphia_id)
            qry4 = qry4.filter(subqry.c.cnt > 1)
            # Several accepted matches
            # subqry = self.session.query(Taxonomy.name, func.max(Taxonomy.id).label("id"), WoRMS.aphia_id)
            # subqry = subqry.join(WoRMS, TaxonomyChangeService.match_with_extension())
            # subqry = subqry.filter(Taxonomy.id == any_(used_taxo_ids))
            # subqry = subqry.filter(Taxonomy.taxotype == 'P')
            # subqry = subqry.filter(WoRMS.status == 'accepted')
            # # Group to compute multiple matches
            # subqry = subqry.group_by(Taxonomy.name, WoRMS.aphia_id)
            # subqry = subqry.having(or_(func.count(Taxonomy.name) > 1,
            #                            func.count(WoRMS.aphia_id) > 1))
            # subqry = subqry.subquery().alias("ids")
            #
            # qry4: Query = self.session.query(Taxonomy, WoRMS)
            # qry4 = qry4.join(subqry, subqry.c.id == Taxonomy.id)
            # qry4 = qry4.join(WoRMS, subqry.c.aphia_id == WoRMS.aphia_id)
            logger.info("matching qry:%s", str(qry4))
            res = qry4.all()
            # Format result
            for taxo, worms in res:
                ret.append((taxo, worms))
        elif case5:
            # No match, phylo
            subqry = TaxonomyChangeService.strict_match_subquery(
                self.session, used_taxo_ids, phylo_or_morpho=None)
            subqry2 = self.full_match_aggregated(used_taxo_ids)

            qry5: Query = self.session.query(Taxonomy)
            qry5 = qry5.filter(Taxonomy.id == any_(used_taxo_ids))
            qry5 = qry5.filter(Taxonomy.taxotype == 'P')
            qry5 = qry5.filter(
                not_(Taxonomy.id.in_(self.session.query(subqry.c.id))))
            qry5 = qry5.filter(
                not_(Taxonomy.id.in_(self.session.query(subqry2.c.id))))
            logger.info("matching qry:%s", str(qry5))
            res5 = qry5.all()
            # Format result
            for taxo in res5:
                ret.append((taxo, None))
        elif case6:
            # No match, morpho
            subqry = TaxonomyChangeService.strict_match_subquery(
                self.session, used_taxo_ids, phylo_or_morpho=None)
            qry6: Query = self.session.query(Taxonomy)
            qry6 = qry6.filter(Taxonomy.id == any_(used_taxo_ids))
            qry6 = qry6.filter(Taxonomy.taxotype == 'M')
            qry6 = qry6.filter(
                not_(Taxonomy.id.in_(self.session.query(subqry.c.id))))
            logger.info("matching qry:%s", str(qry6))
            res6 = qry6.all()
            # Format result
            for taxo in res6:
                ret.append((taxo, None))

        return ret