Example #1
0
    def generous_merge_into(cls, session: Session, dest_prj_id: int,
                            src_prj_id: int):
        """
            Merge privileges from source project into destination project.
        """
        # Each user who is present in both projects, gets the highest privilege from both projects.
        # TODO: Arguable
        sql = text("""
               UPDATE projectspriv ppdst
                  SET privilege = CASE WHEN 'Manage' IN (ppsrc.privilege, ppdst.privilege) 
                                           THEN 'Manage'
                                       WHEN 'Annotate' IN (ppsrc.privilege, ppdst.privilege) 
                                           THEN 'Annotate'
                                       ELSE 'View' 
                                  END
                 FROM projectspriv ppsrc
                WHERE ppsrc.projid = :src_prj 
                  AND ppdst.projid = :dst_prj 
                  AND ppsrc.member = ppdst.member""")
        session.execute(sql, {"dst_prj": dest_prj_id, "src_prj": src_prj_id})
        # Users who were only in source project get their privileges transferred into destination
        # TODO: Arguable
        sql = text("""
                UPDATE projectspriv
                   SET projid = :dst_prj 
                 WHERE projid = :src_prj 
                   AND member NOT IN (SELECT member 
                                        FROM projectspriv 
                                       WHERE projid = :dst_prj)""")

        session.execute(sql, {"dst_prj": dest_prj_id, "src_prj": src_prj_id})
Example #2
0
 def incremental_update_taxo_stats(cls, session: Session, prj_id: int, collated_changes: Dict):
     """
         Do not recompute the full stats for a project (which can be long).
         Instead, apply deltas because in this context we know them.
         TODO: All SQL to SQLAlchemy form
     """
     needed_ids = list(collated_changes.keys())
     # Lock taxo lines to prevent re-entering, during validation it's often a handful of them.
     pts_sql = """SELECT id
                        FROM taxonomy
                       WHERE id = ANY(:ids)
                      FOR NO KEY UPDATE
         """
     session.execute(text(pts_sql), {"ids": needed_ids})
     # Lock the rows we are going to update, including -1 for unclassified
     pts_sql = """SELECT id, nbr
                        FROM projects_taxo_stat 
                       WHERE projid = :prj
                         AND id = ANY(:ids)
                      FOR NO KEY UPDATE"""
     res = session.execute(text(pts_sql), {"prj": prj_id, "ids": needed_ids})
     ids_in_db = {classif_id: nbr for (classif_id, nbr) in res.fetchall()}
     ids_not_in_db = set(needed_ids).difference(ids_in_db.keys())
     if len(ids_not_in_db) > 0:
         # Insert rows for missing IDs
         pts_ins = """INSERT INTO projects_taxo_stat(projid, id, nbr, nbr_v, nbr_d, nbr_p) 
                          SELECT :prj, COALESCE(obh.classif_id, -1), COUNT(*) nbr, 
                                 COUNT(CASE WHEN obh.classif_qual = 'V' THEN 1 END) nbr_v,
                                 COUNT(CASE WHEN obh.classif_qual = 'D' THEN 1 END) nbr_d,
                                 COUNT(CASE WHEN obh.classif_qual = 'P' THEN 1 END) nbr_p
                            FROM obj_head obh
                            JOIN acquisitions acq ON acq.acquisid = obh.acquisid 
                            JOIN samples sam ON sam.sampleid = acq.acq_sample_id AND sam.projid = :prj 
                           WHERE COALESCE(obh.classif_id, -1) = ANY(:ids)
                        GROUP BY obh.classif_id"""
         session.execute(text(pts_ins), {'prj': prj_id, 'ids': list(ids_not_in_db)})
     # Apply delta
     for classif_id, chg in collated_changes.items():
         if classif_id in ids_not_in_db:
             # The line was created just above, with OK values
             continue
         if ids_in_db[classif_id] + chg['n'] == 0:
             # The delta means 0 for this taxon in this project, delete the line
             sqlparam = {'prj': prj_id, 'cid': classif_id}
             ts_sql = """DELETE FROM projects_taxo_stat 
                              WHERE projid = :prj AND id = :cid"""
         else:
             # General case
             sqlparam = {'prj': prj_id, 'cid': classif_id,
                         'nul': chg['n'], 'val': chg['V'], 'dub': chg['D'], 'prd': chg['P']}
             ts_sql = """UPDATE projects_taxo_stat 
                                SET nbr=nbr+:nul, nbr_v=nbr_v+:val, nbr_d=nbr_d+:dub, nbr_p=nbr_p+:prd 
                              WHERE projid = :prj AND id = :cid"""
         session.execute(text(ts_sql), sqlparam)
Example #3
0
 def historize_classification(self, only_qual=None, manual=True):
     """
        Copy current classification information into history table, for all rows in self.
        :param only_qual: If set, only historize for current rows with this classification.
        :param manual: If set, historize manual entries, otherwise, pick automatic ones.
     """
     # Light up a bit the SQLA expressions
     oh = ObjectHeader
     och = ObjectsClassifHisto
     # What we want to historize, as a subquery
     if manual:
         # What we want to historize, as a subquery
         sel_subqry = select([
             oh.objid, oh.classif_when,
             text("'M'"), oh.classif_id, oh.classif_qual, oh.classif_who
         ])
         if only_qual is not None:
             qual_cond = oh.classif_qual.in_(only_qual)
         else:
             qual_cond = true()
         sel_subqry = sel_subqry.where(
             and_(oh.objid == any_(self.object_ids),
                  oh.classif_when.isnot(None), qual_cond))
         ins_columns = [
             och.objid, och.classif_date, och.classif_type, och.classif_id,
             och.classif_qual, och.classif_who
         ]
     else:
         # What we want to historize, as a subquery
         sel_subqry = select([
             oh.objid, oh.classif_auto_when,
             text("'A'"), oh.classif_auto_id, oh.classif_qual,
             oh.classif_auto_score
         ])
         sel_subqry = sel_subqry.where(
             and_(oh.objid == any_(self.object_ids),
                  oh.classif_auto_id.isnot(None),
                  oh.classif_auto_when.isnot(None)))
         ins_columns = [
             och.objid, och.classif_date, och.classif_type, och.classif_id,
             och.classif_qual, och.classif_score
         ]
     # Insert into the log table
     ins_qry: Insert = pg_insert(och.__table__)
     ins_qry = ins_qry.from_select(ins_columns, sel_subqry)
     ins_qry = ins_qry.on_conflict_do_nothing(
         constraint='objectsclassifhisto_pkey')
     # TODO: mypy crashes due to pg_dialect below
     # logger.info("Histo query: %s", ins_qry.compile(dialect=pg_dialect()))
     nb_objs = self.session.execute(ins_qry).rowcount
     logger.info(" %d out of %d rows copied to log", nb_objs,
                 len(self.object_ids))
     return oh
Example #4
0
    def remap(session: Session, prj_id: int, table: MappedTableTypeT, remaps: List[RemapOp]):
        """
            Apply remapping operations onto the given table for given project.
        """
        # Do the remapping, including blanking of unused columns
        values = {a_remap.to: text(a_remap.frm) if a_remap.frm is not None else a_remap.frm
                  for a_remap in remaps}
        qry: Query = session.query(table)
        samples_4_prj: Query
        acqs_4_samples: Query
        if table == Sample:
            qry = qry.filter(Sample.projid == prj_id)  # type: ignore
        elif table == Acquisition:
            samples_4_prj = Query(Sample.sampleid).filter(Sample.projid == prj_id)
            qry = qry.filter(Acquisition.acq_sample_id.in_(samples_4_prj))  # type: ignore
        elif table == Process:
            samples_4_prj = Query(Sample.sampleid).filter(Sample.projid == prj_id)
            acqs_4_samples = Query(Acquisition.acquisid).filter(Acquisition.acq_sample_id.in_(samples_4_prj))
            qry = qry.filter(Process.processid.in_(acqs_4_samples))  # type: ignore
        elif table == ObjectFields:
            samples_4_prj = Query(Sample.sampleid).filter(Sample.projid == prj_id)
            acqs_4_samples = Query(Acquisition.acquisid).filter(Acquisition.acq_sample_id.in_(samples_4_prj))
            objs_for_acqs: Query = Query(ObjectHeader.objid).filter(ObjectHeader.acquisid.in_(acqs_4_samples))
            qry = qry.filter(ObjectFields.objfid.in_(objs_for_acqs))  # type: ignore
        qry = qry.update(values=values, synchronize_session=False)

        logger.info("Remap query for %s: %s", table.__tablename__, qry)
Example #5
0
 def read_taxo_stats(session: Session,
                     prj_ids: ProjectIDListT,
                     taxa_ids: Union[str, ClassifIDListT]) -> List[ProjectTaxoStats]:
     sql = """
     SELECT pts.projid, ARRAY_AGG(pts.id) as ids, 
            SUM(CASE WHEN pts.id = -1 THEN pts.nbr ELSE 0 END) as nb_u, 
            SUM(pts.nbr_v) as nb_v, SUM(pts.nbr_d) as nb_d, SUM(pts.nbr_p) as nb_p
       FROM projects_taxo_stat pts
      WHERE pts.projid = ANY(:ids)"""
     params: Dict[str, Any] = {'ids': prj_ids}
     if len(taxa_ids) > 0:
         if taxa_ids == 'all':
             pass
         else:
             sql += " AND pts.id = ANY(:tids)"
             params["tids"] = taxa_ids
     sql += """
     GROUP BY pts.projid"""
     if len(taxa_ids) > 0:
         sql += ", pts.id"
     res: Result = session.execute(text(sql), params)
     with CodeTimer("stats for %d projects:" % len(prj_ids), logger):
         ret = [ProjectTaxoStats(rec) for rec in res.fetchall()]
     for a_stat in ret:
         a_stat.used_taxa.sort()
     return ret
Example #6
0
 def get_bounding_geo(cls, session: Session, project_ids: ProjectIDListT) -> Iterable[float]:
     # TODO: Why using the view?
     sql = ("SELECT min(o.latitude), max(o.latitude), min(o.longitude), max(o.longitude)"
            "  FROM objects o "
            " WHERE o.projid = ANY(:prj)")
     res: Result = session.execute(text(sql), {"prj": project_ids})
     vals = res.first()
     assert vals
     return [a_val for a_val in vals]
Example #7
0
 def get_date_range(cls, session: Session, project_ids: ProjectIDListT) -> Iterable[datetime]:
     # TODO: Why using the view?
     sql = ("SELECT min(o.objdate), max(o.objdate)"
            "  FROM objects o "
            " WHERE o.projid = ANY(:prj)")
     res: Result = session.execute(text(sql), {"prj": project_ids})
     vals = res.first()
     assert vals
     return [a_val for a_val in vals]
Example #8
0
 def create_or_link_slaves(how: ImportHow, session: Session,
                           object_head_to_write, object_fields_to_write,
                           image_to_write) -> int:
     """
         Create, link or update slave entities, i.e. head, fields, image.
         Also update them... TODO: Split/fork the def
         :returns the number of new records
     """
     if object_head_to_write.orig_id in how.existing_objects:
         # Set the objid which will be copied for storing the image, the object itself
         # will not be stored due to returned value.
         objid = how.existing_objects[object_head_to_write.orig_id]
         object_head_to_write.objid = objid
         if how.can_update_only:
             # noinspection DuplicatedCode
             for a_cls, its_pk, an_upd in zip(
                 [ObjectHeader, ObjectFields], ['objid', 'objfid'],
                 [object_head_to_write, object_fields_to_write]):
                 filter_for_id = text("%s=%d" % (its_pk, objid))
                 # Fetch the record to update
                 obj = session.query(a_cls).filter(filter_for_id).first()
                 if a_cls == ObjectHeader:
                     # Eventually refresh sun position
                     if an_upd.nb_fields_from(USED_FIELDS_FOR_SUNPOS) > 0:
                         # Give the bean enough data for computation
                         for a_field in USED_FIELDS_FOR_SUNPOS.difference(
                                 an_upd.keys()):
                             an_upd[a_field] = getattr(obj, a_field)
                         TSVFile.do_sun_position_field(an_upd)
                 updates = TSVFile.update_orm_object(obj,
                                                     an_upd)  # type: ignore
                 if len(updates) > 0:
                     logger.info("Updating '%s' using %s", filter_for_id,
                                 updates)
                     session.flush()
             ret = 0  # nothing to write
         else:
             # 'Simply' a line with a complementary image
             logger.info("One more image for %s:%s ",
                         object_head_to_write.orig_id, image_to_write)
             ret = 1  # just a new image
     else:
         if how.can_update_only:
             # No objects creation while updating
             logger.info("Object %s not found while updating ",
                         object_head_to_write.orig_id)
             ret = 0
         else:
             # or create it
             # object_head_to_write.projid = how.prj_id
             object_head_to_write.random_value = random.randint(1, 99999999)
             # Below left NULL @see self.update_counts_and_img0
             # object_head_to_write.img0id = XXXXX
             ret = 3  # new image + new object_head + new object_fields
     return ret
Example #9
0
    def create_summary(self, src_project: Project):
        req = self.req
        proj_id = src_project.projid
        self.update_progress(1, "Start Summary export")

        now_txt = DateTime.now_time().strftime("%Y%m%d_%H%M")
        self.out_file_name = "export_summary_{0:d}_{1:s}.tsv".format(
            src_project.projid, now_txt)
        out_file = self.temp_for_jobs.base_dir_for(
            self.job_id) / self.out_file_name

        # Prepare a where clause and parameters from filter
        object_set: DescribedObjectSet = DescribedObjectSet(
            self.ro_session, proj_id, self.filters)

        # By default, select (and group by) unambiguous category name
        sels = ["txo.display_name"]
        if self.req.sum_subtotal == "A":
            sels[:0] = ["acq.orig_id"]
        elif self.req.sum_subtotal == "S":
            sels[:0] = [
                "sam.orig_id", "sam.latitude", "sam.longitude",
                "MAX(obh.objdate) AS date"
            ]
        sels.append("COUNT(*) AS nbr")

        select_clause = "SELECT " + ", ".join(sels)
        not_aggregated = [a_sel for a_sel in sels if " " not in a_sel]
        group_clause = " GROUP BY " + ", ".join(not_aggregated)
        order_clause = OrderClause()
        for a_sel in not_aggregated:
            alias, col = a_sel.split(".")
            order_clause.add_expression(alias, col)

        # Base SQL comes from filters
        from_, where, params = object_set.get_sql(self._get_owner_id(),
                                                  order_clause, select_clause)
        sql = select_clause + " FROM " + from_.get_sql() + where.get_sql(
        ) + group_clause + order_clause.get_sql()

        logger.info("Execute SQL : %s", sql)
        logger.info("Params : %s", params)
        res = self.ro_session.execute(text(sql), params)

        msg = "Creating file %s" % out_file
        logger.info(msg)
        self.update_progress(50, msg)
        nb_lines = self.write_result_to_csv(res, out_file)
        msg = "Extracted %d rows" % nb_lines
        logger.info(msg)
        self.update_progress(90, msg)
        return nb_lines
Example #10
0
 def get_sums_by_taxon(cls, session: Session, acquis_id: AcquisitionIDT) \
         -> Dict[ClassifIDT, int]:
     sql = text("SELECT o.classif_id, count(1)"
                "  FROM obj_head o "
                " WHERE o.acquisid = :acq "
                "   AND o.classif_id IS NOT NULL "
                "   AND o.classif_qual = 'V'"
                " GROUP BY o.classif_id")
     res: Result = session.execute(sql, {"acq": acquis_id})
     return {
         int(classif_id): int(cnt)
         for (classif_id, cnt) in res.fetchall()
     }
Example #11
0
 def fetch_existing_images(session: Session, prj_id):
     """
         Get all object/image pairs from the project
     """
     # Must be reloaded from DB, as phase 1 added all objects for duplicates checking
     # TODO: Why using the view?
     sql = text("SELECT concat(o.orig_id,'*',i.orig_file_name) "
                "  FROM images i "
                "  JOIN objects o ON i.objid = o.objid "
                " WHERE o.projid = :prj")
     res: Result = session.execute(sql, {"prj": prj_id})
     ret = {img_id for img_id, in res}
     return ret
Example #12
0
 def update_taxo_stats(session: Session, projid: int):
     sql = text("""
     DELETE FROM projects_taxo_stat pts
      WHERE pts.projid = :prjid;
     INSERT INTO projects_taxo_stat(projid, id, nbr, nbr_v, nbr_d, nbr_p) 
     SELECT sam.projid, COALESCE(obh.classif_id, -1) id, COUNT(*) nbr, 
            COUNT(CASE WHEN obh.classif_qual = 'V' THEN 1 END) nbr_v,
            COUNT(CASE WHEN obh.classif_qual = 'D' THEN 1 END) nbr_d, 
            COUNT(CASE WHEN obh.classif_qual = 'P' THEN 1 END) nbr_p
       FROM obj_head obh
       JOIN acquisitions acq ON acq.acquisid = obh.acquisid 
       JOIN samples sam ON sam.sampleid = acq.acq_sample_id AND sam.projid = :prjid 
     GROUP BY sam.projid, obh.classif_id;""")
     session.execute(sql, {'prjid': projid})
Example #13
0
 def update_stats(session: Session, projid: int):
     sql = text("""
     UPDATE projects
        SET objcount=q.nbr_sum, 
            pctclassified=100.0*nbrclassified/q.nbr_sum, 
            pctvalidated=100.0*nbrvalidated/q.nbr_sum
       FROM projects p
       LEFT JOIN
          (SELECT projid, SUM(nbr) nbr_sum, SUM(CASE WHEN id>0 THEN nbr END) nbrclassified, SUM(nbr_v) nbrvalidated
             FROM projects_taxo_stat
            WHERE projid = :prjid
           GROUP BY projid) q ON p.projid = q.projid
     WHERE projects.projid = :prjid 
       AND p.projid = :prjid""")
     session.execute(sql, {'prjid': projid})
Example #14
0
 def read_taxo_stats(self) -> List[SampleTaxoStats]:
     sql = text("""
     SELECT sam.sampleid,
            ARRAY_AGG(DISTINCT COALESCE(obh.classif_id, -1)) as ids,
            SUM(CASE WHEN obh.classif_id <> -1 THEN 0 ELSE 1 END) as nb_u,
            COUNT(CASE WHEN obh.classif_qual = 'V' THEN 1 END) nbr_v,
            COUNT(CASE WHEN obh.classif_qual = 'D' THEN 1 END) nbr_d, 
            COUNT(CASE WHEN obh.classif_qual = 'P' THEN 1 END) nbr_p
       FROM obj_head obh
       JOIN acquisitions acq ON acq.acquisid = obh.acquisid 
       JOIN samples sam ON sam.sampleid = acq.acq_sample_id
      WHERE sam.sampleid = ANY(:ids)
      GROUP BY sam.sampleid;""")
     with CodeTimer("Stats for %d samples: " % len(self.ids), logger):
         res = self.session.execute(sql, {'ids': self.ids})
         ret = [SampleTaxoStats(rec) for rec in res]
     return ret
Example #15
0
 def __init__(self, session: Session, project_ids: ProjectIDListT):
     qry: Query = session.query(Acquisition.instrument)
     qry = qry.join(Sample).join(Project)
     # TODO: WTF WTF just for adding a column to the select
     qry = qry.add_columns(
         text(Project.__table__.name + "." +
              Project.__table__.c.projid.name))
     # Below SQLAlchemy complains
     # qry = qry.add_columns(Project.projid)
     if len(project_ids) > 0:
         qry = qry.filter(Project.projid.in_(project_ids))
     qry = qry.distinct()
     instruments_by_proj: Dict[ProjectIDT, Set[InstrumentIDT]] = {}
     instrument_names = set()
     for ins_name, projid in qry.all():
         if ins_name:
             instruments_by_proj.setdefault(projid, set()).add(ins_name)
             instrument_names.add(ins_name)
         else:
             pass  # Filter NULL & empty strings
     self.by_project = instruments_by_proj
     self.instrument_names = sorted(list(instrument_names))
Example #16
0
    def classify_validate(self, user_id: UserIDT, classif_ids: ClassifIDListT, wanted_qualif: str) \
            -> Tuple[int, Dict[Tuple, ObjectIDListT]]:
        """
            Set current classifications in self and/or validate current classification.
            :param user_id: The User who did these changes.
            :param classif_ids: One category id for each of the object ids in self. -1 means "keep current".
            :param wanted_qualif: Validate or Dubious
            :returns updated rows and a summary of changes, for MRU and logging.
        """
        # Gather state of classification, for impacted objects, before the change. Keep a lock on rows.
        present = self._fetch_classifs_and_lock()

        # Cook a diff b/w present and wanted values, both for the update of obj_head and preparing the ones on _stat
        # Group the updates as lots of them are identical
        updates: Dict[Tuple, EnumeratedObjectSet] = {}
        all_changes: OrderedDict[Tuple, List[int]] = OrderedDict()
        # A bit of obsessive optimization
        classif_id_col = ObjectHeader.classif_id.name
        classif_qual_col = ObjectHeader.classif_qual.name
        classif_who_col = ObjectHeader.classif_who.name
        classif_when_col = ObjectHeader.classif_when.name
        for obj_id, v in zip(self.object_ids, classif_ids):
            prev_obj = present[obj_id]
            prev_classif_id: Optional[int] = prev_obj['classif_id']
            new_classif_id: Optional[int]
            if v == -1:  # special value from validate all
                # Arrange that no change can happen for this field
                # Note: prev_classif_id can be None
                new_classif_id = prev_classif_id
            else:
                new_classif_id = v
            prev_classif_qual = prev_obj['classif_qual']
            if (prev_classif_id == new_classif_id
                    and prev_classif_qual == wanted_qualif
                    and prev_obj['classif_who'] == user_id):
                continue
            # There was at least 1 field change for this object
            an_update = updates.setdefault(
                (new_classif_id, wanted_qualif),
                EnumeratedObjectSet(self.session, []))
            an_update.add_object(obj_id)
            # Compact changes, grouped by operation
            change_key = (prev_classif_id, prev_classif_qual, new_classif_id,
                          wanted_qualif)
            for_this_change = all_changes.setdefault(change_key, [])
            for_this_change.append(obj_id)
            # Keep the recently used in first
            all_changes.move_to_end(change_key, last=False)

        if len(updates) == 0:
            # Nothing to do
            return 0, all_changes

        # Update of obj_head, grouped by similar operations.
        nb_updated = 0
        sql_now = text("now()")
        for (new_classif_id, wanted_qualif), an_obj_set in updates.items():
            # Historize the updated rows (can be a lot!)
            an_obj_set.historize_classification()
            row_upd = {
                classif_id_col: new_classif_id,
                classif_qual_col: wanted_qualif,
                classif_who_col: user_id,
                classif_when_col: sql_now
            }
            # Do the update itsef
            nb_updated += an_obj_set.update_all(row_upd)

        logger.info("%d rows updated in %d queries", nb_updated, len(updates))

        # Return statuses
        return nb_updated, all_changes
Example #17
0
    def classify_auto(self, classif_ids: ClassifIDListT, scores: List[float], keep_logs: bool) \
            -> Tuple[int, Dict[Tuple, ObjectIDListT]]:
        """
            Set automatic classifications in self.
            :param classif_ids: One category id for each of the object ids in self.
            :param scores: One confidence score for each object from automatic classification algorithm.
            :param keep_logs: Self-explained
            :returns updated rows and a summary of changes, for stats.
        """
        # Gather state of classification, for impacted objects, before the change. Keep a lock on rows.
        prev = self._fetch_classifs_and_lock()

        # Cook a diff b/w present and wanted values, both for the update of obj_head and preparing the ones on _stat
        # updates: Dict[Tuple, EnumeratedObjectSet] = {}
        all_changes: OrderedDict[Tuple, List[int]] = OrderedDict()
        # A bit of obsessive optimization
        classif_auto_id_col = ObjectHeader.classif_auto_id.name
        classif_auto_score_col = ObjectHeader.classif_auto_score.name
        classif_id_col = ObjectHeader.classif_id.name
        classif_qual_col = ObjectHeader.classif_qual.name
        overriden_by_prediction = {None, PREDICTED_CLASSIF_QUAL}
        full_updates = []
        partial_updates = []
        objid_param = "_objid"
        for obj_id, classif, score in zip(self.object_ids, classif_ids,
                                          scores):
            prev_obj = prev[obj_id]
            prev_classif_id: Optional[int] = prev_obj['classif_id']
            prev_classif_qual = prev_obj['classif_qual']
            # Whatever, set the auto_* fields
            an_update: Dict[str, Any] = {
                objid_param: obj_id,
                classif_auto_id_col: classif,
                classif_auto_score_col: score
            }
            if prev_classif_qual in overriden_by_prediction:
                # If not manually modified, go to Predicted state and set prediction as classification
                an_update[classif_id_col] = classif
                an_update[classif_qual_col] = PREDICTED_CLASSIF_QUAL
                full_updates.append(an_update)
                change_key = (prev_classif_id, prev_classif_qual, classif,
                              PREDICTED_CLASSIF_QUAL)
                # Compact changes, grouped by operation
                for_this_change = all_changes.setdefault(change_key, [])
                for_this_change.append(obj_id)
            else:
                # Just store prediction, no change on user-visible data
                partial_updates.append(an_update)

        # Historize (auto)
        if keep_logs:
            self.historize_classification(None, True)

        # Bulk (or sort of) update of obj_head
        sql_now = text("now()")
        obj_upd_qry: Update = ObjectHeader.__table__.update()
        obj_upd_qry = obj_upd_qry.where(
            ObjectHeader.objid == bindparam(objid_param))
        nb_updated = 0
        if len(full_updates) > 0:
            full_upd_qry = obj_upd_qry.values(
                classif_id=bindparam(classif_id_col),
                classif_qual=bindparam(classif_qual_col),
                classif_auto_id=bindparam(classif_auto_id_col),
                classif_auto_score=bindparam(classif_auto_score_col),
                classif_auto_when=sql_now)
            nb_updated += self.session.execute(full_upd_qry,
                                               full_updates).rowcount
        # Partial updates
        if len(partial_updates) > 0:
            part_upd_qry = obj_upd_qry.values(
                classif_auto_id=bindparam(classif_auto_id_col),
                classif_auto_score=bindparam(classif_auto_score_col),
                classif_auto_when=sql_now)
            nb_updated += self.session.execute(part_upd_qry,
                                               partial_updates).rowcount
        # TODO: Cache upd
        logger.info("_auto: %d and %d gives %d rows updated ",
                    len(full_updates), len(partial_updates), nb_updated)

        # Return statuses
        return nb_updated, all_changes
Example #18
0
    def create_tsv(self, src_project: Project,
                   end_progress: int) -> Tuple[int, int]:
        """
            Create the TSV file.
        """
        req = self.req
        proj_id = src_project.projid
        self.update_progress(1, "Start TSV export")
        progress_range = end_progress - 1

        # Get a fast count of the maximum of what to do
        count_sql = "SELECT SUM(nbr) AS cnt FROM projects_taxo_stat WHERE projid = :prj"
        res = self.ro_session.execute(text(count_sql), {"prj": proj_id})
        obj_count = res.first()[0]

        # Prepare a where clause and parameters from filter
        object_set: DescribedObjectSet = DescribedObjectSet(
            self.ro_session, proj_id, self.filters)

        # Backup or not, the column namings are taken from common mapping
        # @See Mapping.py
        # TSV column order
        # field_order = ["object_id", "object_lat", "object_lon", "object_date", "object_time", "object_depth_max",
        #                "object_annotation_status", "object_annotation_person_name", "object_annotation_person_email",
        #                "object_annotation_date", "object_annotation_time", "object_annotation_category"]
        # formats = {"object_date": "TO_CHAR({0},'YYYYMMDD')",
        #            "object_time": "TO_CHAR({0},'HH24MISS')",
        #            "object_annotation_date": "TO_CHAR({0},'YYYYMMDD')",
        #            "object_annotation_time": "TO_CHAR({0},'HH24MISS')",
        #            "object_annotation_status": """
        #                  CASE {0}
        #                     WHEN 'V' then 'validated'
        #                     WHEN 'P' then 'predicted'
        #                     WHEN 'D' then 'dubious'
        #                     ELSE {0}
        #                  END
        #            """
        #            }
        # prefices = {ObjectHeader.__tablename__: "obh",
        #             }
        # for a_fld in field_order:
        #     mpg = GlobalMapping.PREDEFINED_FIELDS[a_fld]
        #     mpg[""]
        #     assert a_fld in GlobalMapping.PREDEFINED_FIELDS, "%s is not a mapped column" % a_fld
        date_fmt, time_fmt = "YYYYMMDD", "HH24MISS"
        if req.format_dates_times:
            date_fmt, time_fmt = "YYYY-MM-DD", "HH24:MI:SS"

        select_clause = "select "

        if req.with_images or (req.exp_type == ExportTypeEnum.backup):
            select_clause += "img.orig_file_name AS img_file_name, img.imgrank AS img_rank"
            if req.with_images:
                select_clause += ", img.file_name AS img_src_path"
            select_clause += ",\n"

        select_clause += """obh.orig_id AS object_id, obh.latitude AS object_lat, obh.longitude AS object_lon,
                         TO_CHAR(obh.objdate,'{0}') AS object_date,
                         TO_CHAR(obh.objtime,'{1}') AS object_time,
                         obh.object_link, obh.depth_min AS object_depth_min, obh.depth_max AS object_depth_max,
                         CASE obh.classif_qual 
                            WHEN 'V' then 'validated' 
                            WHEN 'P' then 'predicted' 
                            WHEN 'D' then 'dubious' 
                            ELSE obh.classif_qual 
                         END AS object_annotation_status,                
                         usr.name AS object_annotation_person_name, usr.email AS object_annotation_person_email,
                         TO_CHAR(obh.classif_when,'{0}') AS object_annotation_date,
                         TO_CHAR(obh.classif_when,'{1}') AS object_annotation_time,                
                         txo.display_name AS object_annotation_category 
                    """.format(date_fmt, time_fmt)
        if req.exp_type == ExportTypeEnum.backup:
            select_clause += ", txo.id AS object_annotation_category_id"
        else:
            select_clause += "," + TaxonomyBO.parents_sql(
                "obh.classif_id") + " AS object_annotation_hierarchy"

        if 'C' in req.tsv_entities:
            select_clause += "\n, obh.complement_info"

        # Deal with mappings, the goal is to emit SQL which will reconstitute the TSV structure
        src_mappings = ProjectMapping().load_from_project(src_project)
        if 'O' in req.tsv_entities:
            select_clause += "\n " + src_mappings.object_mappings.as_select_list(
                "obf")

        if 'S' in req.tsv_entities:
            select_clause += "\n, sam.orig_id AS sample_id, sam.dataportal_descriptor AS sample_dataportal_descriptor "
            select_clause += src_mappings.sample_mappings.as_select_list("sam")

        if 'P' in req.tsv_entities:
            select_clause += "\n, prc.orig_id AS process_id "
            select_clause += src_mappings.process_mappings.as_select_list(
                "prc")

        if 'A' in req.tsv_entities:
            select_clause += "\n, acq.orig_id AS acq_id, acq.instrument AS acq_instrument "
            select_clause += src_mappings.acquisition_mappings.as_select_list(
                "acq")

        if req.exp_type == ExportTypeEnum.dig_obj_ident:
            select_clause += "\n, obh.objid"

        if req.with_internal_ids:
            select_clause += """\n, obh.objid, 
                    obh.acquisid AS processid_internal, obh.acquisid AS acq_id_internal, 
                    sam.sampleid AS sample_id_internal, 
                    obh.classif_id, obh.classif_who, obh.classif_auto_id, txp.name classif_auto_name, 
                    obh.classif_auto_score, obh.classif_auto_when,
                    obh.random_value object_random_value, obh.sunpos object_sunpos """
            if 'S' in req.tsv_entities:
                select_clause += "\n, sam.latitude sample_lat, sam.longitude sample_long "

        # TODO: The condition on o.projid=1 in historical code below prevents any data production
        # if 'H' in req.tsv_entities:
        #     sql1 += " , oh.classif_date AS histoclassif_date, classif_type AS histoclassif_type, " \
        #             "to3.name histoclassif_name, oh.classif_qual histoclassif_qual,uo3.name histoclassif_who, " \
        #             "classif_score histoclassif_score"
        #     sql2 += """ LEFT JOIN (select o.objid, classif_date, classif_type, och.classif_id,
        #                                   och.classif_qual, och.classif_who, classif_score
        #                              from objectsclassifhisto och
        #                              join objects o on o.objid=och.objid and o.projid=1 {0}
        #                            union all
        #                            select o.objid, o.classif_when classif_date, 'C' classif_type, classif_id,
        #                                   classif_qual, classif_who, NULL
        #                              from objects o {0} where o.projid=1
        #                           ) oh on o.objid=oh.objid
        #                 LEFT JOIN taxonomy to3 on oh.classif_id=to3.id
        #                 LEFT JOIN users uo3 on oh.classif_who=uo3.id
        #             """.format(samplefilter)

        order_clause = OrderClause()
        if req.split_by == "sample":
            order_clause.add_expression("sam", "orig_id")
            split_field = "sample_id"  # AKA sam.orig_id, but renamed in select list
        elif req.split_by == "taxo":
            select_clause += "\n, txo.display_name AS taxo_parent_child "
            order_clause.add_expression(None, "taxo_parent_child")
            split_field = "taxo_parent_child"
        else:
            order_clause.add_expression("sam", "orig_id")
            split_field = "object_id"  # cette valeur permet d'éviter des erreurs plus loin dans r[split_field]
        order_clause.add_expression("obh", "objid")

        if req.with_images or (req.exp_type == ExportTypeEnum.backup):
            order_clause.add_expression(None, "img_rank")

        # Base SQL comes from filters
        from_, where, params = object_set.get_sql(
            self._get_owner_id(),
            order_clause,
            select_clause,
            all_images=not req.only_first_image)
        sql = select_clause + " FROM " + from_.get_sql() + where.get_sql(
        ) + order_clause.get_sql()
        logger.info("Execute SQL : %s" % sql)
        logger.info("Params : %s" % params)

        res = self.ro_session.execute(text(sql), params)

        now_txt = DateTime.now_time().strftime("%Y%m%d_%H%M")
        self.out_file_name = "export_{0:d}_{1:s}.{2}".format(
            proj_id, now_txt, "zip")

        produced_path = self.out_path / self.out_file_name
        zfile = zipfile.ZipFile(produced_path,
                                'w',
                                allowZip64=True,
                                compression=zipfile.ZIP_DEFLATED)

        splitcsv = (req.split_by != "")
        csv_filename = 'data.tsv'  # Just a temp name as there is a rename while filling up the Zip
        if splitcsv:
            # Produce into the same temp file all the time, at zipping time the name in archive will vary
            prev_value = "NotAssigned"  # To trigger a sequence change immediately
        else:
            # The zip will contain a single TSV with same base name as the zip
            prev_value = self.out_file_name.replace('.zip', '')

        csv_path: Path = self.out_path / csv_filename  # Constant path to a (sometimes) changing file
        csv_fd: Optional[IO] = None
        csv_wtr = None

        # Store the images to save in a separate CSV. Useless if not exporting images but who cares.
        temp_img_file = self.out_path / "images.csv"
        img_file_fd = open(temp_img_file, 'w')
        img_wtr = csv.DictWriter(img_file_fd, ["src_path", "dst_path"],
                                 delimiter='\t',
                                 quotechar='"',
                                 lineterminator='\n')
        img_wtr.writeheader()

        # Prepare TSV structure
        col_descs = [
            a_desc for a_desc in res.cursor.description
            if a_desc.name != "img_src_path"
        ]
        # read latitude column to get float DB type
        for a_desc in col_descs:
            if a_desc.name == "object_lat":
                db_float_type = a_desc.type_code
                break
        else:
            raise
        float_cols = set()
        # Prepare float separator conversion, if not required the set will just be empty
        if req.coma_as_separator:
            for a_desc in col_descs:
                if a_desc.type_code == db_float_type:
                    float_cols.add(a_desc.name)

        tsv_cols = [a_desc.name for a_desc in col_descs]
        tsv_types_line = {
            name: ('[f]' if a_desc.type_code == db_float_type else '[t]')
            for name, a_desc in zip(tsv_cols, col_descs)
        }
        nb_rows = 0
        nb_images = 0
        used_dst_pathes = set()
        for r in res:
            # Rows from SQLAlchemy are not mutable, so we need a clone for arranging values
            a_row = dict(r)
            if ((splitcsv and (prev_value != a_row[split_field])
                 )  # At each split column values change
                    or (nb_rows == 0)):  # And anyway for the first row
                # Start of sequence, eventually end of previous sequence
                if csv_fd:
                    csv_fd.close()  # Close previous file
                    self.store_csv_into_zip(zfile, prev_value, csv_path)
                if splitcsv:
                    prev_value = a_row[split_field]
                logger.info("Writing into file %s", csv_path)
                csv_fd = open(csv_path, 'w', encoding='latin_1')
                csv_wtr = csv.DictWriter(csv_fd,
                                         tsv_cols,
                                         delimiter='\t',
                                         quotechar='"',
                                         lineterminator='\n',
                                         quoting=csv.QUOTE_NONNUMERIC)
                csv_wtr.writeheader()
                if req.exp_type == ExportTypeEnum.backup:
                    # Write types line for backup type
                    csv_wtr.writerow(tsv_types_line)
            if req.with_images:
                copy_op = {"src_path": a_row.pop("img_src_path")}
                if req.exp_type == ExportTypeEnum.dig_obj_ident:
                    # Images will be stored in a per-category directory, but there is a single TSV at the Zip root
                    categ = a_row['object_annotation_category']
                    # All names cannot directly become directories
                    a_row['img_file_name'] = self.get_DOI_imgfile_name(
                        a_row['objid'], a_row['img_rank'], categ,
                        a_row['img_file_name'])
                    copy_op["dst_path"] = a_row['img_file_name']
                else:  # It's a backup
                    # Images are stored in the Zip subdirectory per sample/taxo, i.e. at the same place as
                    # their referring TSV
                    dst_path = "{0}/{1}".format(prev_value,
                                                a_row['img_file_name'])
                    if dst_path in used_dst_pathes:
                        # Avoid duplicates in zip as only the last entry will be present during unzip
                        # root cause: for UVP6 bundles, the vignette and original image are both stored
                        # with the same name.
                        img_with_rank = "{0}/{1}".format(
                            a_row['img_rank'], a_row['img_file_name'])
                        a_row[
                            'img_file_name'] = img_with_rank  # write into TSV the corrected path
                        dst_path = prev_value + "/" + img_with_rank
                    used_dst_pathes.add(dst_path)
                    copy_op["dst_path"] = dst_path
                img_wtr.writerow(copy_op)
                nb_images += 1
            # Remove CR from comments
            if 'C' in req.tsv_entities and a_row['complement_info']:
                a_row['complement_info'] = ' '.join(
                    a_row['complement_info'].splitlines())
            # Replace decimal separator
            for cname in float_cols:
                if a_row[cname] is not None:
                    a_row[cname] = str(a_row[cname]).replace('.', ',')
            assert csv_wtr is not None
            # Produce the row in the TSV
            csv_wtr.writerow(a_row)
            nb_rows += 1
            if nb_rows % self.ROWS_REPORT_EVERY == 0:
                msg = "Row %d of max %d" % (nb_rows, obj_count)
                logger.info(msg)
                self.update_progress(1 + progress_range / obj_count * nb_rows,
                                     msg)
        if csv_fd:
            csv_fd.close()  # Close last file
            self.store_csv_into_zip(zfile, prev_value, csv_path)
        logger.info("Extracted %d rows", nb_rows)
        img_file_fd.close()
        if zfile:
            zfile.close()
        return nb_rows, nb_images
Example #19
0
    def projects_for_user(session: Session, user: User,
                          for_managing: bool = False,
                          not_granted: bool = False,
                          title_filter: str = '',
                          instrument_filter: str = '',
                          filter_subset: bool = False) -> List[ProjectIDT]:
        """
        :param session:
        :param user: The user for which the list is needed.
        :param for_managing: If set, list the projects that the user can manage.
        :param not_granted: If set, list (only) the projects on which given user has no right, so user can
                                request access to them.
        :param title_filter: If set, filter out the projects with title not matching the required string,
                                or if set to a number, filter out the projects of which ID does not match.
        :param instrument_filter: If set, filter out the projects which do not have given instrument in at least
                                     one sample.
        :param filter_subset: If set, filter out any project of which title contains 'subset'.
        :return: The project IDs
        """
        sql_params: Dict[str, Any] = {"user_id": user.id}

        # Default query: all projects, eventually with first manager information
        # noinspection SqlResolve
        sql = """SELECT p.projid
                       FROM projects p
                       LEFT JOIN ( """ + ProjectPrivilegeBO.first_manager_by_project() + """ ) fpm 
                         ON fpm.projid = p.projid """
        if not_granted:
            # Add the projects for which no entry is found in ProjectPrivilege
            sql += """
                       LEFT JOIN projectspriv pp ON p.projid = pp.projid AND pp.member = :user_id
                      WHERE pp.member is null """
            if for_managing:
                sql += " AND False "
        else:
            if not user.has_role(Role.APP_ADMINISTRATOR):
                # Not an admin, so restrict to projects which current user can work on, or view
                sql += """
                            JOIN projectspriv pp 
                              ON p.projid = pp.projid 
                             AND pp.member = :user_id """
                if for_managing:
                    sql += """
                             AND pp.privilege = '%s' """ % ProjectPrivilegeBO.MANAGE
            sql += " WHERE 1 = 1 "

        if title_filter != '':
            sql += """ 
                        AND ( title ILIKE '%%'|| :title ||'%%'
                              OR TO_CHAR(p.projid,'999999') LIKE '%%'|| :title ) """
            sql_params["title"] = title_filter

        if instrument_filter != '':
            sql += """
                         AND p.projid IN (SELECT DISTINCT sam.projid FROM samples sam, acquisitions acq
                                           WHERE acq.acq_sample_id = sam.sampleid
                                             AND acq.instrument ILIKE '%%'|| :instrum ||'%%' ) """
            sql_params["instrum"] = instrument_filter

        if filter_subset:
            sql += """
                         AND NOT title ILIKE '%%subset%%'  """

        with CodeTimer("Projects query:", logger):
            res: Result = session.execute(text(sql), sql_params)
            # single-element tuple :( DBAPI
            ret = [an_id for an_id, in res.fetchall()]
        return ret  # type:ignore