예제 #1
0
    def _delete_chunk(session: Session,
                      a_chunk: ObjectIDListT) -> Tuple[int, int, List[str]]:
        """
            Delete a chunk from self's object list.
            Technical Note: We use SQLA Core as we don't want to fetch the rows
        """
        # Start with images which are not deleted via a CASCADE on DB side
        # This is maybe due to relationship cycle b/w ObjectHeader and Images @See comment in Image class
        img_del_qry: Delete = Image.__table__.delete()
        img_del_qry = img_del_qry.where(Image.objid == any_(a_chunk))
        img_del_qry = img_del_qry.returning(Image.file_name,
                                            Image.thumb_file_name)
        with CodeTimer("DELETE for %d images: " % len(a_chunk), logger):
            files_res = session.execute(img_del_qry)
            img_files = []
            nb_img_rows = 0
            for a_file_tuple in files_res:
                # We have main file and optionally the thumbnail one
                for a_file in a_file_tuple:
                    if a_file:
                        img_files.append(a_file)
                nb_img_rows += 1
            logger.info("Removed: %d rows, to remove: %d files", nb_img_rows,
                        len(img_files))

        obj_del_qry: Delete = ObjectHeader.__table__.delete()
        obj_del_qry = obj_del_qry.where(ObjectHeader.objid == any_(a_chunk))
        with CodeTimer("DELETE for %d objs: " % len(a_chunk), logger):
            nb_objs = session.execute(obj_del_qry).rowcount

        session.commit()
        # TODO: Cache delete
        return nb_objs, nb_img_rows, img_files
예제 #2
0
 def historize_classification(self, only_qual=None, manual=True):
     """
        Copy current classification information into history table, for all rows in self.
        :param only_qual: If set, only historize for current rows with this classification.
        :param manual: If set, historize manual entries, otherwise, pick automatic ones.
     """
     # Light up a bit the SQLA expressions
     oh = ObjectHeader
     och = ObjectsClassifHisto
     # What we want to historize, as a subquery
     if manual:
         # What we want to historize, as a subquery
         sel_subqry = select([
             oh.objid, oh.classif_when,
             text("'M'"), oh.classif_id, oh.classif_qual, oh.classif_who
         ])
         if only_qual is not None:
             qual_cond = oh.classif_qual.in_(only_qual)
         else:
             qual_cond = true()
         sel_subqry = sel_subqry.where(
             and_(oh.objid == any_(self.object_ids),
                  oh.classif_when.isnot(None), qual_cond))
         ins_columns = [
             och.objid, och.classif_date, och.classif_type, och.classif_id,
             och.classif_qual, och.classif_who
         ]
     else:
         # What we want to historize, as a subquery
         sel_subqry = select([
             oh.objid, oh.classif_auto_when,
             text("'A'"), oh.classif_auto_id, oh.classif_qual,
             oh.classif_auto_score
         ])
         sel_subqry = sel_subqry.where(
             and_(oh.objid == any_(self.object_ids),
                  oh.classif_auto_id.isnot(None),
                  oh.classif_auto_when.isnot(None)))
         ins_columns = [
             och.objid, och.classif_date, och.classif_type, och.classif_id,
             och.classif_qual, och.classif_score
         ]
     # Insert into the log table
     ins_qry: Insert = pg_insert(och.__table__)
     ins_qry = ins_qry.from_select(ins_columns, sel_subqry)
     ins_qry = ins_qry.on_conflict_do_nothing(
         constraint='objectsclassifhisto_pkey')
     # TODO: mypy crashes due to pg_dialect below
     # logger.info("Histo query: %s", ins_qry.compile(dialect=pg_dialect()))
     nb_objs = self.session.execute(ins_qry).rowcount
     logger.info(" %d out of %d rows copied to log", nb_objs,
                 len(self.object_ids))
     return oh
예제 #3
0
    def _get_last_classif_history(self, from_user_id: Optional[int], but_not_from_user_id: Optional[int]) \
            -> List[HistoricalLastClassif]:
        """
            Query for last classification history on all objects of self, mixed with present state in order
            to have restore-able lines.
        """
        # Get the histo entries
        subqry: Query = self.session.query(
            ObjectsClassifHisto,
            func.rank().over(
                partition_by=ObjectsClassifHisto.objid,
                order_by=ObjectsClassifHisto.classif_date.desc()).label("rnk"))
        if from_user_id:
            subqry = subqry.filter(
                ObjectsClassifHisto.classif_who == from_user_id)
        if but_not_from_user_id:
            subqry = subqry.filter(
                ObjectsClassifHisto.classif_who != but_not_from_user_id)
        subqry = subqry.filter(ObjectsClassifHisto.classif_type == "M")
        subq_alias: Alias = subqry.filter(
            ObjectsClassifHisto.objid == any_(self.object_ids)).subquery()

        # Also get some fields from ObjectHeader for referencing, info, and fallback
        qry = self.session.query(
            ObjectHeader.objid, ObjectHeader.classif_id,
            func.coalesce(subq_alias.c.classif_date,
                          ObjectHeader.classif_auto_when),
            subq_alias.c.classif_type,
            func.coalesce(subq_alias.c.classif_id,
                          ObjectHeader.classif_auto_id).label("h_classif_id"),
            func.coalesce(
                subq_alias.c.classif_qual,
                case([(ObjectHeader.classif_auto_id.isnot(None), 'P')])),
            subq_alias.c.classif_who)
        qry = qry.join(subq_alias,
                       ObjectHeader.objid == subq_alias.c.objid,
                       isouter=(from_user_id is None))
        if from_user_id is not None:
            # If taking history from a user, don't apply to the objects he/she classsified
            # in last already.
            qry = qry.filter(ObjectHeader.classif_who != from_user_id)
            qry = qry.filter(subq_alias.c.rnk == 1)
        else:
            # Taking any history, including nothing, so emit blank history (see isouter above)
            qry = qry.filter(ObjectHeader.objid == any_(self.object_ids))
            qry = qry.filter(
                or_(subq_alias.c.rnk == 1, subq_alias.c.rnk.is_(None)))
        logger.info("_get_last_classif_history qry:%s", str(qry))
        with CodeTimer("HISTORY for %d objs: " % len(self.object_ids), logger):
            ret = [HistoricalLastClassif(rec) for rec in qry.all()]
        logger.info("_get_last_classif_history qry: %d rows", len(ret))
        return ret
예제 #4
0
 def __init__(self, session: Session, taxon_ids: ClassifIDListT):
     tf = Taxonomy.__table__.alias('tf')
     # bind = None  # For portable SQL, no 'ilike'
     bind = session.get_bind()
     select_list = [
         tf.c.id, tf.c.display_name, tf.c.name, tf.c.nbrobj, tf.c.nbrobjcum
     ]
     select_list.extend([
         text("t%d.name" % level)  # type:ignore
         for level in range(1, TaxonomyBO.MAX_TAXONOMY_LEVELS)
     ])
     qry = select(select_list, bind=bind)
     # Inject the recursive query, for getting parents
     _dumm, qry = TaxonomyBO._add_recursive_query(qry, tf, do_concat=False)
     qry = qry.where(tf.c.id == any_(taxon_ids))
     # Add another join for getting children
     logger.info("Taxo query: %s with IDs %s", qry, taxon_ids)
     res: ResultProxy = session.execute(qry)
     self.taxa: List[TaxonBO] = []
     for a_rec in res.fetchall():
         lst_rec = list(a_rec)
         an_id, display_name, db_name, nbobj1, nbobj2 = lst_rec.pop(0), lst_rec.pop(0), lst_rec.pop(0), \
                                                        lst_rec.pop(0), lst_rec.pop(0)
         lineage = [db_name] + [name for name in lst_rec if name]
         self.taxa.append(
             TaxonBO(an_id, display_name, db_name, nbobj1, nbobj2,
                     lineage))  # type:ignore
     self.get_children(session, self.taxa)
예제 #5
0
    def _db_fetch(self, objids: List[int]) -> List[DBObjectTupleT]:
        """
            Do a DB read of given objects, with auxiliary objects.
            Thanks to 'contains_eager' calls, the objects are loaded into SQLAlchemy session.
            :param objids:
            :return:
        """
        ret: Query = self.session.query(Project, Sample, Acquisition, Process, ObjectHeader, ObjectFields, Image)
        ret = ret.join(Sample, Project.all_samples).options(contains_eager(Project.all_samples))
        ret = ret.join(Acquisition, Sample.all_acquisitions)
        ret = ret.join(Process, Acquisition.process)
        ret = ret.join(ObjectHeader, Acquisition.all_objects)
        # Natural joins
        ret = ret.join(ObjectFields)
        ret = ret.join(Image, ObjectHeader.all_images).options(contains_eager(ObjectHeader.all_images))
        ret = ret.filter(ObjectHeader.objid == any_(objids))
        ret = ret.order_by(ObjectHeader.objid)
        ret = ret.order_by(Image.imgrank)

        if self.first_query:
            logger.info("Query: %s", str(ret))
            self.first_query = False

        with CodeTimer("Get Objects:", logger):
            objs = [an_obj for an_obj in ret.all()]

        # We get as many lines as images
        logger.info("NB ROWS JOIN=%d", len(objs))

        return objs
예제 #6
0
 def __init__(self, session: Session, prj_ids: ProjectIDListT, public: bool = False):
     # Query the project and load neighbours as well
     qry: Query = session.query(Project, ProjectPrivilege)
     qry = qry.outerjoin(ProjectPrivilege, Project.privs_for_members).options(
         contains_eager(Project.privs_for_members))
     qry = qry.outerjoin(User, ProjectPrivilege.user).options(
         contains_eager(ProjectPrivilege.user))
     qry = qry.filter(Project.projid == any_(prj_ids))
     self.projects = []
     done = set()
     with CodeTimer("%s BO projects query & init:" % len(prj_ids), logger):
         for a_proj, a_pp in qry.all():
             # The query yields duplicates so we need to filter
             if a_proj.projid not in done:
                 if public:
                     self.projects.append(ProjectBO(a_proj))
                 else:
                     self.projects.append(ProjectBO(a_proj).enrich())
                 done.add(a_proj.projid)
     # Add instruments
     with CodeTimer("%s set instruments:" % len(prj_ids), logger):
         instruments = DescribedInstrumentSet(session, prj_ids)
         for a_project in self.projects:
             instrums = instruments.by_project.get(a_project.projid)
             if instrums is not None:
                 a_project.instrument = ",".join(instrums)
예제 #7
0
 def get_children(self, session: Session, taxa_list: List[TaxonBO]):
     # Enrich TaxonBOs with children
     bos_per_id = {a_bo.id: a_bo for a_bo in taxa_list}
     tch = Taxonomy.__table__.alias('tch')
     qry: Query = session.query(Taxonomy.id, tch.c.id)
     qry = qry.join(tch, tch.c.parent_id == Taxonomy.id)
     qry = qry.filter(Taxonomy.id == any_(list(bos_per_id.keys())))
     for an_id, a_child_id in qry.all():
         bos_per_id[an_id].children.append(a_child_id)
예제 #8
0
 def get_cardinalities(self, session: Session):
     # Enrich TaxonBOs with number of objects. Due to ecotaxa/ecotaxa_dev#648, pick data from projects stats.
     bos_per_id = {a_bo.id: a_bo for a_bo in self.taxa}
     qry: Query = session.query(ProjectTaxoStat.id,
                                func.sum(ProjectTaxoStat.nbr_v))
     qry = qry.filter(ProjectTaxoStat.id == any_(list(bos_per_id.keys())))
     qry = qry.group_by(ProjectTaxoStat.id)
     for an_id, a_sum in qry.all():
         bos_per_id[an_id].nb_objects = a_sum
예제 #9
0
 def get_projects_ids(self) -> ProjectIDListT:
     """
         Return the project IDs for the held sample IDs.
     """
     qry: Query = self.session.query(Project.projid).distinct(Project.projid)
     qry = qry.join(Sample, Project.all_samples)
     qry = qry.filter(Sample.sampleid == any_(self.ids))
     with CodeTimer("Prjs for %d samples: " % len(self.ids), logger):
         return [an_id[0] for an_id in qry.all()]
예제 #10
0
 def update(self, params: Dict) -> int:
     """
         Update self's objects using given parameters, dict of column names and values.
     """
     # Update objects table
     obj_upd_qry: Update = ObjectHeader.__table__.update()
     obj_upd_qry = obj_upd_qry.where(
         ObjectHeader.objid == any_(self.object_ids))
     obj_upd_qry = obj_upd_qry.values(params)
     updated_objs = self.session.execute(obj_upd_qry).rowcount
     return updated_objs
예제 #11
0
 def get_projects_ids(self) -> ProjectIDListT:
     """
         Return the project IDs for the owned objectsIDs.
     """
     qry: Query = self.session.query(Project.projid).distinct(
         Project.projid)
     qry = qry.join(Sample)
     qry = qry.join(Acquisition)
     qry = qry.join(ObjectHeader)
     qry = qry.filter(ObjectHeader.objid == any_(self.object_ids))
     with CodeTimer("Prjs for %d objs: " % len(self.object_ids), logger):
         return [an_id for an_id, in qry.all()]
예제 #12
0
 def query(cls, session: Session, restrict_to: ClassifIDListT,
           priority_set: ClassifIDListT, display_name_filter: str,
           name_filters: List[str]):
     """
     :param session:
     :param restrict_to: If not None, limit the query to given IDs.
     :param priority_set: Regardless of MAX_MATCHES, these IDs must appear in the result if they match.
     :param display_name_filter:
     :param name_filters:
     :return:
     """
     tf = Taxonomy.__table__.alias('tf')
     # bind = None  # For portable SQL, no 'ilike'
     bind = session.get_bind()
     # noinspection PyTypeChecker
     priority = case([(tf.c.id == any_(priority_set), text('0'))],
                     else_=text('1')).label('prio')
     qry = select([tf.c.taxotype, tf.c.id, tf.c.display_name, priority],
                  bind=bind)
     if len(name_filters) > 0:
         # Add to the query enough to get the full hierarchy for filtering
         concat_all, qry = cls._add_recursive_query(qry, tf, do_concat=True)
         # Below is quite expensive
         taxo_lineage = func.concat(*concat_all)
         name_filter = "%<" + "".join(
             name_filters)  # i.e. anywhere consecutively in the lineage
         qry = qry.where(taxo_lineage.ilike(name_filter))
     if restrict_to is not None:
         qry = qry.where(tf.c.id == any_(restrict_to))
     # We have index IS_TaxonomyDispNameLow so this lower() is for free
     qry = qry.where(
         func.lower(tf.c.display_name).like(display_name_filter))
     qry = qry.order_by(priority, func.lower(tf.c.display_name))
     qry = qry.limit(cls.MAX_MATCHES)
     logger.info("Taxo query: %s with params %s and %s ", qry,
                 display_name_filter, name_filters)
     res: Result = session.execute(qry)
     return res.fetchall()
예제 #13
0
 def __init__(self, session: Session, taxon_ids: ClassifIDListT):
     tf = WoRMS.__table__.alias('tf')
     # bind = None  # Uncomment for portable SQL, no 'ilike'
     bind = session.get_bind()
     select_list = [tf.c.aphia_id, tf.c.scientificname]
     select_list.extend([
         text("t%d.aphia_id, t%d.scientificname" %
              (level, level))  # type:ignore
         for level in range(1, TaxonBOSetFromWoRMS.MAX_TAXONOMY_LEVELS)
     ])
     qry = select(select_list, bind=bind)
     # Inject a query on names and hierarchy
     # Produced SQL looks like:
     #       left join worms t1 on tf.parent_name_usage_id=t1.aphia_id
     #       left join worms t2 on t1.parent_name_usage_id=t2.aphia_id
     # ...
     #       left join worms t14 on t13.parent_name_usage_id=t14.aphia_id
     lev_alias = WoRMS.__table__.alias('t1')
     # Chain outer joins on Taxonomy, for parents
     # hook the first OJ to main select
     chained_joins = tf.join(
         lev_alias,
         lev_alias.c.aphia_id == tf.c.parent_name_usage_id,
         isouter=True)
     prev_alias = lev_alias
     for level in range(2, self.MAX_TAXONOMY_LEVELS):
         lev_alias = WoRMS.__table__.alias('t%d' % level)
         # hook each following OJ to previous one
         chained_joins = chained_joins.join(
             lev_alias,
             lev_alias.c.aphia_id == prev_alias.c.parent_name_usage_id,
             isouter=True)
         # Collect expressions
         prev_alias = lev_alias
     qry = qry.select_from(chained_joins)
     qry = qry.where(tf.c.aphia_id == any_(taxon_ids))
     logger.info("Taxo query: %s with IDs %s", qry, taxon_ids)
     res: Result = session.execute(qry)
     self.taxa = []
     for a_rec in res.fetchall():
         lst_rec = list(a_rec)
         lineage_id = [an_id for an_id in lst_rec[0::2] if an_id]
         lineage = [name for name in lst_rec[1::2] if name]
         biota_pos = lineage.index('Biota') + 1
         lineage = lineage[:biota_pos]
         lineage_id = lineage_id[:biota_pos]
         self.taxa.append(
             TaxonBO('P', lineage[0], 0, 0, lineage,
                     lineage_id))  # type:ignore
     self.get_children(session, self.taxa)
예제 #14
0
 def update_all(self, params: Dict) -> int:
     """
         Update all self's objects using given parameters, dict of column names and values.
     """
     # Update objects table
     obj_upd_qry: Update = ObjectHeader.__table__.update()
     obj_upd_qry = obj_upd_qry.where(
         ObjectHeader.objid == any_(self.object_ids))
     obj_upd_qry = obj_upd_qry.values(params)
     updated_objs = self.session.execute(obj_upd_qry).rowcount
     # TODO: Cache upd
     # prj_id = self.get_projects_ids()[0]
     # ObjectCacheUpdater(prj_id).update_objects(self.object_ids, params)
     return updated_objs
예제 #15
0
 def __init__(self, session: Session, taxon_ids: ClassifIDListT):
     tf = WoRMS.__table__.alias('tf')
     # bind = None  # For portable SQL, no 'ilike'
     bind = session.get_bind()
     select_list = [tf.c.aphia_id, tf.c.scientificname]
     select_list.extend([
         text("t%d.scientificname" % level)  # type:ignore
         for level in range(1, TaxonomyBO.MAX_TAXONOMY_LEVELS)
     ])
     qry = select(select_list, bind=bind)
     # Inject a query on names and hierarchy
     # Produced SQL looks like:
     #       left join worms t1 on tf.parent_name_usage_id=t1.aphia_id
     #       left join worms t2 on t1.parent_name_usage_id=t2.aphia_id
     # ...
     #       left join worms t14 on t13.parent_name_usage_id=t14.aphia_id
     lev_alias = WoRMS.__table__.alias('t1')
     # Chain outer joins on Taxonomy, for parents
     # hook the first OJ to main select
     chained_joins = tf.join(
         lev_alias,
         lev_alias.c.aphia_id == tf.c.parent_name_usage_id,
         isouter=True)
     prev_alias = lev_alias
     for level in range(2, self.MAX_TAXONOMY_LEVELS):
         lev_alias = WoRMS.__table__.alias('t%d' % level)
         # hook each following OJ to previous one
         chained_joins = chained_joins.join(
             lev_alias,
             lev_alias.c.aphia_id == prev_alias.c.parent_name_usage_id,
             isouter=True)
         # Collect expressions
         prev_alias = lev_alias
     qry = qry.select_from(chained_joins)
     qry = qry.where(tf.c.aphia_id == any_(taxon_ids))
     logger.info("Taxo query: %s with IDs %s", qry, taxon_ids)
     res: ResultProxy = session.execute(qry)
     self.taxa = []
     for a_rec in res.fetchall():
         lst_rec = list(a_rec)
         an_id, display_name = lst_rec.pop(0), lst_rec.pop(0)
         lineage = [name for name in lst_rec if name]
         # In WoRMS, the root is signaled by having itself as parent
         while lineage and lineage[-1] == lineage[-2]:
             lineage.pop(-1)
         self.taxa.append(
             TaxonBO(an_id, display_name, display_name, 0, 0,
                     lineage))  # type:ignore
예제 #16
0
 def _fetch_classifs_and_lock(self) -> Dict[int, Dict]:
     """
         Fetch, and DB lock, self's objects
     :return:
     """
     qry = select([
         ObjectHeader.objid, ObjectHeader.classif_auto_id,
         ObjectHeader.classif_auto_when, ObjectHeader.classif_auto_score,
         ObjectHeader.classif_id, ObjectHeader.classif_qual,
         ObjectHeader.classif_who, ObjectHeader.classif_when
     ]).with_for_update(key_share=True)
     qry = qry.where(ObjectHeader.objid == any_(self.object_ids))
     logger.info("Fetch with lock: %s", qry)
     res: Result = self.session.execute(qry)
     prev = {rec['objid']: rec for rec in res.fetchall()}
     return prev
예제 #17
0
 def full_match_aggregated(self, used_taxo_ids):
     subqry = self.session.query(
         Taxonomy.id,
         WoRMS.aphia_id,
         func.array_agg(
             WoRMS.status).over(partition_by=(Taxonomy.id,
                                              WoRMS.aphia_id)).label("acc"),
         func.count(Taxonomy.name).over(
             partition_by=(WoRMS.aphia_id)).label("cnt"),
     )
     subqry = subqry.join(WoRMS,
                          TaxonomyChangeService.match_with_extension())
     subqry = subqry.filter(Taxonomy.id == any_(used_taxo_ids))
     subqry = subqry.filter(Taxonomy.taxotype == 'P')
     subqry = subqry.subquery().alias("ids")
     return subqry
예제 #18
0
    def reset_to_predicted(self):
        """
            Reset to Predicted state, keeping log, i.e. history, of previous change.
        """
        oh = ObjectHeader
        self.historize_classification(['V', 'D'])

        # Update objects table
        obj_upd_qry: Update = oh.__table__.update()
        obj_upd_qry = obj_upd_qry.where(
            and_(oh.objid == any_(self.object_ids),
                 (oh.classif_qual.in_(['V', 'D']))))
        obj_upd_qry = obj_upd_qry.values(classif_qual='P')
        nb_objs = self.session.execute(obj_upd_qry).rowcount
        logger.info(" %d out of %d rows reset to predicted", nb_objs,
                    len(self.object_ids))

        self.session.commit()
예제 #19
0
 def __init__(self, session: Session, taxon_ids: ClassifIDListT):
     tf = Taxonomy.__table__.alias('tf')
     # bind = None  # For portable SQL, no 'ilike'
     bind = session.get_bind()
     select_list = [
         tf.c.taxotype,
         tf.c.nbrobj,
         tf.c.nbrobjcum,
         tf.c.display_name,
         tf.c.id,
         tf.c.name,
     ]
     select_list.extend([
         text("t%d.id, t%d.name" % (level, level))  # type:ignore
         for level in range(1, TaxonomyBO.MAX_TAXONOMY_LEVELS)
     ])
     qry = select(select_list, bind=bind)
     # Inject the recursive query, for getting parents
     _dumm, qry = TaxonomyBO._add_recursive_query(qry, tf, do_concat=False)
     qry = qry.where(tf.c.id == any_(taxon_ids))
     # Add another join for getting children
     logger.info("Taxo query: %s with IDs %s", qry, taxon_ids)
     res: Result = session.execute(qry)
     self.taxa: List[TaxonBO] = []
     for a_rec in res.fetchall():
         lst_rec = list(a_rec)
         cat_type, nbobj1, nbobj2, display_name = lst_rec.pop(
             0), lst_rec.pop(0), lst_rec.pop(0), lst_rec.pop(0)
         lineage_id = [an_id for an_id in lst_rec[0::2] if an_id]
         lineage = [name for name in lst_rec[1::2] if name]
         # assert lineage_id[-1] in (1, 84960, 84959), "Unexpected root %s" % str(lineage_id[-1])
         self.taxa.append(
             TaxonBO(
                 cat_type,
                 display_name,
                 nbobj1,
                 nbobj2,  # type:ignore
                 lineage,
                 lineage_id  # type:ignore
             ))
     self.get_children(session)
     self.get_cardinalities(session)
예제 #20
0
 def strict_match_subquery(session, used_taxo_ids,
                           phylo_or_morpho: Optional[str]):
     subqry = session.query(Taxonomy.name,
                            func.max(Taxonomy.id).label("id"),
                            WoRMS.aphia_id)
     subqry = subqry.join(WoRMS,
                          TaxonomyChangeService.match_with_extension())
     subqry = subqry.filter(Taxonomy.id == any_(used_taxo_ids))
     if phylo_or_morpho is not None:
         subqry = subqry.filter(Taxonomy.taxotype == text("'%s'" %
                                                          phylo_or_morpho))
     subqry = subqry.filter(WoRMS.status == text("'accepted'"))
     # Group to exclude multiple matches
     subqry = subqry.group_by(Taxonomy.name, WoRMS.aphia_id)
     subqry = subqry.having(
         and_(
             func.count(Taxonomy.name) == 1,
             func.count(WoRMS.aphia_id) == 1))
     subqry = subqry.subquery().alias("ids")
     return subqry
예제 #21
0
    def _db_fetch(self, object_ids: ObjectIDListT) -> List[DBObjectTupleT]:
        """
            Do a DB read of given objects, with auxiliary objects.
            :param object_ids: The list of IDs
            :return:
        """
        # TODO: Depending on filter, the joins could be plain (not outer)
        # E.g. if asked for a set of samples
        ret: Query = self.session.query(ObjectHeader)
        ret = ret.join(Acquisition).join(Process).join(Sample)
        ret = ret.outerjoin(Image, ObjectHeader.all_images).outerjoin(ObjectCNNFeature).join(ObjectFields)
        ret = ret.filter(ObjectHeader.objid == any_(object_ids))
        ret = ret.order_by(ObjectHeader.objid, Image.imgid)
        ret = ret.with_entities(ObjectHeader, ObjectFields, ObjectCNNFeature, Image, Sample, Acquisition,
                                Process)

        if self.first_query:
            logger.info("Query: %s", str(ret))
            self.first_query = False

        return ret.all()
예제 #22
0
 def read_user_stats(session: Session, prj_ids: ProjectIDListT) -> List[ProjectUserStats]:
     """
         Read the users (annotators) involved in each project.
         Also compute a summary of their activity. This can only be an estimate since, e.g.
         imported data contains exact same data as the one obtained from live actions.
     """
     # Activity count: Count 1 for present classification for a user per object.
     #  Of course, the classification date is the latest for the user.
     pqry: Query = session.query(Project.projid, User.id, User.name,
                                 func.count(ObjectHeader.objid),
                                 func.max(ObjectHeader.classif_when))
     pqry = pqry.join(Sample).join(Acquisition).join(ObjectHeader)
     pqry = pqry.join(User, User.id == ObjectHeader.classif_who)
     pqry = pqry.filter(Project.projid == any_(prj_ids))
     pqry = pqry.filter(ObjectHeader.classif_who == User.id)
     pqry = pqry.group_by(Project.projid, User.id)
     pqry = pqry.order_by(Project.projid, User.name)
     ret = []
     user_activities: Dict[UserIDT, UserActivity] = {}
     user_activities_per_project = {}
     stats_per_project = {}
     with CodeTimer("user present stats for %d projects, qry: %s:" % (len(prj_ids), str(pqry)), logger):
         last_prj = None
         for projid, user_id, user_name, cnt, last_date in pqry.all():
             last_date_str = last_date.replace(microsecond=0).isoformat()
             if projid != last_prj:
                 last_prj = projid
                 prj_stat = ProjectUserStats((projid, [], []))
                 ret.append(prj_stat)
                 user_activities = {}
                 # Store for second pass with history
                 stats_per_project[projid] = prj_stat
                 user_activities_per_project[projid] = user_activities
             prj_stat.annotators.append(MinimalUserBO((user_id, user_name)))
             user_activity = UserActivity((user_id, cnt, last_date_str))
             prj_stat.activities.append(user_activity)
             # Store for second pass
             user_activities[user_id] = user_activity
     # Activity count update: Add 1 for each entry in history for each user.
     # The dates in history are ignored, except for users which do not appear in first resultset.
     hqry: Query = session.query(Project.projid, User.id, User.name,
                                 func.count(ObjectsClassifHisto.objid),
                                 func.max(ObjectsClassifHisto.classif_date))
     hqry = hqry.join(Sample).join(Acquisition).join(ObjectHeader).join(ObjectsClassifHisto)
     hqry = hqry.join(User, User.id == ObjectsClassifHisto.classif_who)
     hqry = hqry.filter(Project.projid == any_(prj_ids))
     hqry = hqry.group_by(Project.projid, User.id)
     hqry = hqry.order_by(Project.projid, User.name)
     with CodeTimer("user history stats for %d projects, qry: %s:" % (len(prj_ids), str(hqry)), logger):
         last_prj = None
         for projid, user_id, user_name, cnt, last_date in hqry.all():
             last_date_str = last_date.replace(microsecond=0).isoformat()
             if projid != last_prj:
                 last_prj = projid
                 # Just in case
                 if projid not in user_activities_per_project:
                     continue
                 # Get stored data for the project
                 user_activities = user_activities_per_project[projid]
                 prj_stat = stats_per_project[projid]
             already_there = user_activities.get(user_id)
             if already_there is not None:
                 # A user in both history and present classification
                 already_there.nb_actions += cnt
             else:
                 # A user _only_ in history
                 prj_stat.annotators.append(MinimalUserBO((user_id, user_name)))
                 user_activity = UserActivity((user_id, cnt, last_date_str))
                 prj_stat.activities.append(user_activity)
                 user_activities[user_id] = user_activity
     return ret
예제 #23
0
    def _do_merge(self, dest_prj: Project):
        """
            Real merge operation.
        """
        # Loop over involved tables and remap free columns
        for a_mapped_tbl in MAPPED_TABLES:
            remaps = self.remap_operations.get(a_mapped_tbl)
            # Do the remappings if any
            if remaps is not None:
                logger.info("Doing re-mapping in %s: %s",
                            a_mapped_tbl.__tablename__, remaps)
                ProjectBO.remap(self.session, self.src_prj_id, a_mapped_tbl,
                                remaps)

        # Collect orig_id
        dest_parents = InBundle.fetch_existing_parents(self.ro_session,
                                                       prj_id=self.prj_id)
        src_parents = InBundle.fetch_existing_parents(self.ro_session,
                                                      prj_id=self.src_prj_id)

        # Compute needed projections in order to keep orig_id unicity
        common_samples = self.get_ids_for_common_orig_id(
            Sample, dest_parents, src_parents)
        common_acquisitions = self.get_ids_for_common_orig_id(
            Acquisition, dest_parents, src_parents)

        # Align foreign keys, to Project, Sample and Acquisition
        for a_fk_to_proj_tbl in [
                Sample, Acquisition, ObjectHeader, ParticleProject
        ]:
            upd: Query = self.session.query(a_fk_to_proj_tbl)
            if a_fk_to_proj_tbl == Sample:
                # Move (i.e. change project) samples which are 'new' from merged project,
                #    so take all of them from src project...
                upd = upd.filter(
                    a_fk_to_proj_tbl.projid == self.src_prj_id)  # type: ignore
                # ...but not the ones with same orig_id, which are presumably equal.
                upd = upd.filter(
                    Sample.sampleid != all_(list(common_samples.keys())))
                # And update the column
                upd_values = {'projid': self.prj_id}
            elif a_fk_to_proj_tbl == Acquisition:
                # Acquisitions which were created, in source, under new samples, will 'follow'
                #    them during above move, thanks to the FK on acq_sample_id.
                # BUT some acquisitions were potentially created in source project, inside
                #    forked samples. They need to be attached to the dest (self) corresponding sample.
                if len(common_samples) > 0:
                    # Build a CTE with values for the update
                    smp_cte = values_cte("upd_smp", ("src_id", "dst_id"),
                                         [(k, v)
                                          for k, v in common_samples.items()])
                    smp_subqry = self.session.query(smp_cte.c.column2).filter(
                        smp_cte.c.column1 == Acquisition.acq_sample_id)
                    upd_values = {
                        'acq_sample_id':
                        func.coalesce(
                            smp_subqry.scalar_subquery(),  # type: ignore
                            Acquisition.acq_sample_id)
                    }
                    upd = upd.filter(Acquisition.acq_sample_id == any_(
                        list(common_samples.keys())))
                    # upd = upd.filter(Acquisition.acquisid != all_(list(common_acquisitions.keys())))
                if len(common_samples) == 0:
                    # Nothing to do. There were only new samples, all of them moved to self.
                    continue
            elif a_fk_to_proj_tbl == ObjectHeader:
                # Generated SQL looks like:
                # with upd_acq (src_id, dst_id) as (values (5,6), (7,8))
                # update obj_head
                #    set acquisid = coalesce((select dst_id from upd_acq where acquisid=src_id), acquisid)
                #  where acquisid in (select src_id from upd_acq)
                if len(common_acquisitions) > 0:
                    # Object must follow its acquisition
                    acq_cte = values_cte(
                        "upd_acq", ("src_id", "dst_id"),
                        [(k, v) for k, v in common_acquisitions.items()])
                    acq_subqry = self.session.query(acq_cte.c.column2).filter(
                        acq_cte.c.column1 == ObjectHeader.acquisid)
                    upd_values = {
                        'acquisid':
                        func.coalesce(
                            acq_subqry.scalar_subquery(),  # type:ignore
                            ObjectHeader.acquisid)
                    }
                    upd = upd.filter(ObjectHeader.acquisid == any_(
                        list(common_acquisitions.keys())))
                if len(common_acquisitions) == 0:
                    # Nothing to do. There were only new acquisitions, all of them moved to self.
                    continue
            else:
                # For Particle project
                upd = upd.filter(
                    ParticleProject.projid == self.src_prj_id)  # type: ignore
                upd_values = {'projid': self.prj_id}
            rowcount = upd.update(values=upd_values, synchronize_session=False)
            table_name = a_fk_to_proj_tbl.__tablename__  # type: ignore
            logger.info("Update in %s: %s rows", table_name, rowcount)

        # Acquisition & twin Process have followed their enclosing Sample

        # Remove the parents which are duplicate from orig_id point of view
        for a_fk_to_proj_tbl in [Acquisition, Sample]:
            to_del: Query = self.session.query(a_fk_to_proj_tbl)
            if a_fk_to_proj_tbl == Acquisition:
                # Remove conflicting acquisitions, they should be empty?
                to_del = to_del.filter(Acquisition.acquisid == any_(
                    list(common_acquisitions.keys())))  # type: ignore
            elif a_fk_to_proj_tbl == Sample:
                # Remove conflicting samples
                to_del = to_del.filter(Sample.sampleid == any_(
                    list(common_samples.keys())))  # type: ignore
            rowcount = to_del.delete(synchronize_session=False)
            table_name = a_fk_to_proj_tbl.__tablename__  # type: ignore
            logger.info("Delete in %s: %s rows", table_name, rowcount)

        self.dest_augmented_mappings.write_to_project(dest_prj)

        ProjectPrivilegeBO.generous_merge_into(self.session, self.prj_id,
                                               self.src_prj_id)

        # Completely erase the source project
        ProjectBO.delete(self.session, self.src_prj_id)
예제 #24
0
 def add_filter(self, upd):
     return upd.filter(Acquisition.acquisid == any_(self.ids))
예제 #25
0
    def matching(
            self, _current_user_id: int,
            params: Dict[str, Any]) -> List[Tuple[Taxonomy, Optional[WoRMS]]]:
        """
            Return the list of matching entries b/w Taxonomy and WoRMS.
        """
        ret: List[Tuple[Taxonomy, Optional[WoRMS]]] = []
        taxo_ids_qry: Query = self.session.query(ProjectTaxoStat.id).distinct()
        taxo_ids_qry = taxo_ids_qry.filter(ProjectTaxoStat.nbr > 0)
        used_taxo_ids = [an_id for (an_id, ) in taxo_ids_qry.all()]

        # No security check. TODO?
        case1 = "case1" in params
        """ Taxa with same name on both sides, Phylo in EcoTaxa and accepted in WoRMS """
        case2 = "case2" in params
        """ Taxa with same name on both sides, Morpho in EcoTaxa and accepted in WoRMS """
        case3 = "case3" in params
        """ Taxa with same name on both sides, Phylo in EcoTaxa and NOT accepted in WoRMS,
         and there is no equivalent accepted match """
        case31 = "case31" in params
        case4 = "case4" in params
        case5 = "case5" in params
        """ No match, phylo """
        case6 = "case6" in params

        if case1:
            res = self.strict_match(self.session, used_taxo_ids)
            # Format result
            for taxo, worms in res:
                ret.append((taxo, worms))
        elif case2:
            subqry = TaxonomyChangeService.strict_match_subquery(
                self.session, used_taxo_ids, phylo_or_morpho="M")
            qry: Query = self.session.query(Taxonomy, WoRMS)
            qry = qry.join(subqry, subqry.c.id == Taxonomy.id)
            qry = qry.join(WoRMS, subqry.c.aphia_id == WoRMS.aphia_id)
            logger.info("matching qry:%s", str(qry))
            res = qry.all()
            # Format result
            for taxo, worms in res:
                ret.append((taxo, worms))
        elif case3:
            # statuses = ["temporary name", "nomen nudum", "interim unpublished",
            #             "nomen dubium", "unaccepted", "taxon inquirendum",
            #             "accepted", "uncertain", "alternate representation"]
            # Match but the match/all matches are not accepted
            subqry = self.full_match_aggregated(used_taxo_ids)

            qry3: Query = self.session.query(Taxonomy, WoRMS)
            qry3 = qry3.join(subqry, subqry.c.id == Taxonomy.id)
            qry3 = qry3.join(WoRMS, subqry.c.aphia_id == WoRMS.aphia_id)
            qry3 = qry3.filter(
                not_(
                    subqry.c.acc.op('@>')(text("ARRAY['accepted'::varchar]"))))
            qry3 = qry3.filter(WoRMS.valid_name != None)
            # Status filter for clarity
            # flt = statuses[4]
            # status_filt = text("ARRAY['%s'::varchar]" % flt)
            # qry3 = qry3.filter(subqry.c.acc.op('@>')(status_filt))
            logger.info("matching qry:%s", str(qry3))
            res3 = qry3.all()
            # Format result
            for taxo, worms in res3:
                ret.append((taxo, worms))
        elif case31:
            # Match but the match/all matches are not accepted
            subqry = self.full_match_aggregated(used_taxo_ids)

            qry31: Query = self.session.query(Taxonomy, WoRMS)
            qry31 = qry31.join(subqry, subqry.c.id == Taxonomy.id)
            qry31 = qry31.join(WoRMS, subqry.c.aphia_id == WoRMS.aphia_id)
            qry31 = qry31.filter(
                not_(
                    subqry.c.acc.op('@>')(text("ARRAY['accepted'::varchar]"))))
            qry31 = qry31.filter(WoRMS.valid_name == None)
            logger.info("matching qry:%s", str(qry31))
            res31 = qry31.all()
            # Format result
            for taxo, worms in res31:
                ret.append((taxo, worms))
        elif case4:
            subqry = self.full_match_aggregated(used_taxo_ids)

            qry4: Query = self.session.query(Taxonomy, WoRMS)
            qry4 = qry4.join(subqry, subqry.c.id == Taxonomy.id)
            qry4 = qry4.join(WoRMS, subqry.c.aphia_id == WoRMS.aphia_id)
            qry4 = qry4.filter(subqry.c.cnt > 1)
            # Several accepted matches
            # subqry = self.session.query(Taxonomy.name, func.max(Taxonomy.id).label("id"), WoRMS.aphia_id)
            # subqry = subqry.join(WoRMS, TaxonomyChangeService.match_with_extension())
            # subqry = subqry.filter(Taxonomy.id == any_(used_taxo_ids))
            # subqry = subqry.filter(Taxonomy.taxotype == 'P')
            # subqry = subqry.filter(WoRMS.status == 'accepted')
            # # Group to compute multiple matches
            # subqry = subqry.group_by(Taxonomy.name, WoRMS.aphia_id)
            # subqry = subqry.having(or_(func.count(Taxonomy.name) > 1,
            #                            func.count(WoRMS.aphia_id) > 1))
            # subqry = subqry.subquery().alias("ids")
            #
            # qry4: Query = self.session.query(Taxonomy, WoRMS)
            # qry4 = qry4.join(subqry, subqry.c.id == Taxonomy.id)
            # qry4 = qry4.join(WoRMS, subqry.c.aphia_id == WoRMS.aphia_id)
            logger.info("matching qry:%s", str(qry4))
            res = qry4.all()
            # Format result
            for taxo, worms in res:
                ret.append((taxo, worms))
        elif case5:
            # No match, phylo
            subqry = TaxonomyChangeService.strict_match_subquery(
                self.session, used_taxo_ids, phylo_or_morpho=None)
            subqry2 = self.full_match_aggregated(used_taxo_ids)

            qry5: Query = self.session.query(Taxonomy)
            qry5 = qry5.filter(Taxonomy.id == any_(used_taxo_ids))
            qry5 = qry5.filter(Taxonomy.taxotype == 'P')
            qry5 = qry5.filter(
                not_(Taxonomy.id.in_(self.session.query(subqry.c.id))))
            qry5 = qry5.filter(
                not_(Taxonomy.id.in_(self.session.query(subqry2.c.id))))
            logger.info("matching qry:%s", str(qry5))
            res5 = qry5.all()
            # Format result
            for taxo in res5:
                ret.append((taxo, None))
        elif case6:
            # No match, morpho
            subqry = TaxonomyChangeService.strict_match_subquery(
                self.session, used_taxo_ids, phylo_or_morpho=None)
            qry6: Query = self.session.query(Taxonomy)
            qry6 = qry6.filter(Taxonomy.id == any_(used_taxo_ids))
            qry6 = qry6.filter(Taxonomy.taxotype == 'M')
            qry6 = qry6.filter(
                not_(Taxonomy.id.in_(self.session.query(subqry.c.id))))
            logger.info("matching qry:%s", str(qry6))
            res6 = qry6.all()
            # Format result
            for taxo in res6:
                ret.append((taxo, None))

        return ret
예제 #26
0
 def add_filter(self, upd):
     if "obj_head." in str(upd):
         ret = upd.filter(ObjectHeader.objid == any_(self.object_ids))
     else:
         ret = upd.filter(ObjectFields.objfid == any_(self.object_ids))
     return ret
예제 #27
0
    def classify_validate(self, user_id: UserIDT, classif_ids: ClassifIDListT, wanted_qualif: str) \
            -> Tuple[int, Dict[Tuple, ObjectIDListT]]:
        """
            Set current classifications in self and/or validate current classification.
            :param user_id: The User who did these changes.
            :param classif_ids: One category id for each of the object ids in self. -1 means "keep current".
            :param wanted_qualif: Validate or Dubious
            :returns updated rows and a summary of changes, for MRU and logging.
        """
        # Gather state of classification, for impacted objects, before the change. Keep a lock on rows.
        qry = select([
            ObjectHeader.objid, ObjectHeader.classif_auto_id,
            ObjectHeader.classif_auto_when, ObjectHeader.classif_auto_score,
            ObjectHeader.classif_id, ObjectHeader.classif_qual,
            ObjectHeader.classif_who, ObjectHeader.classif_when
        ]).with_for_update(key_share=True)
        qry = qry.where(ObjectHeader.objid == any_(self.object_ids))
        logger.info("Fetch with lock: %s", qry)
        res: ResultProxy = self.session.execute(qry)
        prev = {rec['objid']: rec for rec in res.fetchall()}

        # Cook a diff b/w present and wanted values, both for the update of obj_head and preparing the ones on _stat
        # Group the updates as lots of them are identical
        updates: Dict[Tuple, EnumeratedObjectSet] = {}
        all_changes: OrderedDict[Tuple, List[int]] = OrderedDict()
        # A bit of obsessive optimization
        classif_id_col = ObjectHeader.classif_id.name
        classif_qual_col = ObjectHeader.classif_qual.name
        classif_who_col = ObjectHeader.classif_who.name
        classif_when_col = ObjectHeader.classif_when.name
        for obj_id, v in zip(self.object_ids, classif_ids):
            prev_obj = prev[obj_id]
            prev_classif_id: Optional[int] = prev_obj['classif_id']
            new_classif_id: Optional[int]
            if v == -1:  # special value from validate all
                # Arrange that no change can happen for this field
                # Note: prev_classif_id can be None
                new_classif_id = prev_classif_id
            else:
                new_classif_id = v
            prev_classif_qual = prev_obj['classif_qual']
            if (prev_classif_id == new_classif_id
                    and prev_classif_qual == wanted_qualif
                    and prev_obj['classif_who'] == user_id):
                continue
            # There was at least 1 field change for this object
            an_update = updates.setdefault(
                (new_classif_id, wanted_qualif),
                EnumeratedObjectSet(self.session, []))
            an_update.add(obj_id)
            # Compact changes, grouped by operation
            change_key = (prev_classif_id, prev_classif_qual, new_classif_id,
                          wanted_qualif)
            for_this_change = all_changes.setdefault(change_key, [])
            for_this_change.append(obj_id)
            # Keep the recently used in first
            all_changes.move_to_end(change_key, last=False)

        if len(updates) == 0:
            # Nothing to do
            return 0, all_changes

        # Update of obj_head, grouped by similar operations.
        nb_updated = 0
        sql_now = text("now()")
        for (new_classif_id, wanted_qualif), an_obj_set in updates.items():
            # Historize the updated rows (can be a lot!)
            an_obj_set.historize_classification()
            row_upd = {
                classif_id_col: new_classif_id,
                classif_qual_col: wanted_qualif,
                classif_who_col: user_id,
                classif_when_col: sql_now
            }
            # Do the update itsef
            nb_updated += an_obj_set.update(row_upd)

        logger.info("%d rows updated in %d queries", nb_updated, len(updates))

        # Return statuses
        return nb_updated, all_changes
예제 #28
0
 def add_filter(self, upd):
     return upd.filter(Sample.sampleid == any_(self.ids))
예제 #29
0
 def add_filter(self, upd):
     return upd.filter(Process.processid == any_(self.ids))