def _delete_chunk(session: Session, a_chunk: ObjectIDListT) -> Tuple[int, int, List[str]]: """ Delete a chunk from self's object list. Technical Note: We use SQLA Core as we don't want to fetch the rows """ # Start with images which are not deleted via a CASCADE on DB side # This is maybe due to relationship cycle b/w ObjectHeader and Images @See comment in Image class img_del_qry: Delete = Image.__table__.delete() img_del_qry = img_del_qry.where(Image.objid == any_(a_chunk)) img_del_qry = img_del_qry.returning(Image.file_name, Image.thumb_file_name) with CodeTimer("DELETE for %d images: " % len(a_chunk), logger): files_res = session.execute(img_del_qry) img_files = [] nb_img_rows = 0 for a_file_tuple in files_res: # We have main file and optionally the thumbnail one for a_file in a_file_tuple: if a_file: img_files.append(a_file) nb_img_rows += 1 logger.info("Removed: %d rows, to remove: %d files", nb_img_rows, len(img_files)) obj_del_qry: Delete = ObjectHeader.__table__.delete() obj_del_qry = obj_del_qry.where(ObjectHeader.objid == any_(a_chunk)) with CodeTimer("DELETE for %d objs: " % len(a_chunk), logger): nb_objs = session.execute(obj_del_qry).rowcount session.commit() # TODO: Cache delete return nb_objs, nb_img_rows, img_files
def update_taxo_stats(session: Session, projid: int): sql = text(""" DELETE FROM projects_taxo_stat pts WHERE pts.projid = :prjid; INSERT INTO projects_taxo_stat(projid, id, nbr, nbr_v, nbr_d, nbr_p) SELECT sam.projid, COALESCE(obh.classif_id, -1) id, COUNT(*) nbr, COUNT(CASE WHEN obh.classif_qual = 'V' THEN 1 END) nbr_v, COUNT(CASE WHEN obh.classif_qual = 'D' THEN 1 END) nbr_d, COUNT(CASE WHEN obh.classif_qual = 'P' THEN 1 END) nbr_p FROM obj_head obh JOIN acquisitions acq ON acq.acquisid = obh.acquisid JOIN samples sam ON sam.sampleid = acq.acq_sample_id AND sam.projid = :prjid GROUP BY sam.projid, obh.classif_id;""") session.execute(sql, {'prjid': projid})
def update_stats(session: Session, projid: int): sql = text(""" UPDATE projects SET objcount=q.nbr_sum, pctclassified=100.0*nbrclassified/q.nbr_sum, pctvalidated=100.0*nbrvalidated/q.nbr_sum FROM projects p LEFT JOIN (SELECT projid, SUM(nbr) nbr_sum, SUM(CASE WHEN id>0 THEN nbr END) nbrclassified, SUM(nbr_v) nbrvalidated FROM projects_taxo_stat WHERE projid = :prjid GROUP BY projid) q ON p.projid = q.projid WHERE projects.projid = :prjid AND p.projid = :prjid""") session.execute(sql, {'prjid': projid})
def read_taxo_stats(session: Session, prj_ids: ProjectIDListT, taxa_ids: Union[str, ClassifIDListT]) -> List[ProjectTaxoStats]: sql = """ SELECT pts.projid, ARRAY_AGG(pts.id) as ids, SUM(CASE WHEN pts.id = -1 THEN pts.nbr ELSE 0 END) as nb_u, SUM(pts.nbr_v) as nb_v, SUM(pts.nbr_d) as nb_d, SUM(pts.nbr_p) as nb_p FROM projects_taxo_stat pts WHERE pts.projid = ANY(:ids)""" params: Dict[str, Any] = {'ids': prj_ids} if len(taxa_ids) > 0: if taxa_ids == 'all': pass else: sql += " AND pts.id = ANY(:tids)" params["tids"] = taxa_ids sql += """ GROUP BY pts.projid""" if len(taxa_ids) > 0: sql += ", pts.id" res: Result = session.execute(text(sql), params) with CodeTimer("stats for %d projects:" % len(prj_ids), logger): ret = [ProjectTaxoStats(rec) for rec in res.fetchall()] for a_stat in ret: a_stat.used_taxa.sort() return ret
def get_date_range(cls, session: Session, project_ids: ProjectIDListT) -> Iterable[datetime]: # TODO: Why using the view? sql = ("SELECT min(o.objdate), max(o.objdate)" " FROM objects o " " WHERE o.projid = ANY(:prj)") res: Result = session.execute(text(sql), {"prj": project_ids}) vals = res.first() assert vals return [a_val for a_val in vals]
def get_bounding_geo(cls, session: Session, project_ids: ProjectIDListT) -> Iterable[float]: # TODO: Why using the view? sql = ("SELECT min(o.latitude), max(o.latitude), min(o.longitude), max(o.longitude)" " FROM objects o " " WHERE o.projid = ANY(:prj)") res: Result = session.execute(text(sql), {"prj": project_ids}) vals = res.first() assert vals return [a_val for a_val in vals]
def fetch_existing_images(session: Session, prj_id): """ Get all object/image pairs from the project """ # Must be reloaded from DB, as phase 1 added all objects for duplicates checking # TODO: Why using the view? sql = text("SELECT concat(o.orig_id,'*',i.orig_file_name) " " FROM images i " " JOIN objects o ON i.objid = o.objid " " WHERE o.projid = :prj") res: Result = session.execute(sql, {"prj": prj_id}) ret = {img_id for img_id, in res} return ret
def incremental_update_taxo_stats(cls, session: Session, prj_id: int, collated_changes: Dict): """ Do not recompute the full stats for a project (which can be long). Instead, apply deltas because in this context we know them. TODO: All SQL to SQLAlchemy form """ needed_ids = list(collated_changes.keys()) # Lock taxo lines to prevent re-entering, during validation it's often a handful of them. pts_sql = """SELECT id FROM taxonomy WHERE id = ANY(:ids) FOR NO KEY UPDATE """ session.execute(text(pts_sql), {"ids": needed_ids}) # Lock the rows we are going to update, including -1 for unclassified pts_sql = """SELECT id, nbr FROM projects_taxo_stat WHERE projid = :prj AND id = ANY(:ids) FOR NO KEY UPDATE""" res = session.execute(text(pts_sql), {"prj": prj_id, "ids": needed_ids}) ids_in_db = {classif_id: nbr for (classif_id, nbr) in res.fetchall()} ids_not_in_db = set(needed_ids).difference(ids_in_db.keys()) if len(ids_not_in_db) > 0: # Insert rows for missing IDs pts_ins = """INSERT INTO projects_taxo_stat(projid, id, nbr, nbr_v, nbr_d, nbr_p) SELECT :prj, COALESCE(obh.classif_id, -1), COUNT(*) nbr, COUNT(CASE WHEN obh.classif_qual = 'V' THEN 1 END) nbr_v, COUNT(CASE WHEN obh.classif_qual = 'D' THEN 1 END) nbr_d, COUNT(CASE WHEN obh.classif_qual = 'P' THEN 1 END) nbr_p FROM obj_head obh JOIN acquisitions acq ON acq.acquisid = obh.acquisid JOIN samples sam ON sam.sampleid = acq.acq_sample_id AND sam.projid = :prj WHERE COALESCE(obh.classif_id, -1) = ANY(:ids) GROUP BY obh.classif_id""" session.execute(text(pts_ins), {'prj': prj_id, 'ids': list(ids_not_in_db)}) # Apply delta for classif_id, chg in collated_changes.items(): if classif_id in ids_not_in_db: # The line was created just above, with OK values continue if ids_in_db[classif_id] + chg['n'] == 0: # The delta means 0 for this taxon in this project, delete the line sqlparam = {'prj': prj_id, 'cid': classif_id} ts_sql = """DELETE FROM projects_taxo_stat WHERE projid = :prj AND id = :cid""" else: # General case sqlparam = {'prj': prj_id, 'cid': classif_id, 'nul': chg['n'], 'val': chg['V'], 'dub': chg['D'], 'prd': chg['P']} ts_sql = """UPDATE projects_taxo_stat SET nbr=nbr+:nul, nbr_v=nbr_v+:val, nbr_d=nbr_d+:dub, nbr_p=nbr_p+:prd WHERE projid = :prj AND id = :cid""" session.execute(text(ts_sql), sqlparam)
def delete(session: Session, prj_id: int): """ Completely remove the project. It is assumed that contained objects have been removed. """ # TODO: Remove from user preferences # Unlink Particle project if any upd_qry = ParticleProject.__table__.update().where(ParticleProject.projid == prj_id).values(projid=None) row_count = session.execute(upd_qry).rowcount logger.info("%d EcoPart project unlinked", row_count) # Remove project session.query(Project). \ filter(Project.projid == prj_id).delete() # Remove privileges session.query(ProjectPrivilege). \ filter(ProjectPrivilege.projid == prj_id).delete()
def delete_object_parents(cls, session: Session, prj_id: int) -> List[int]: """ Remove object parents, also project children entities, in the project. """ # The EcoTaxa samples which are going to disappear. We have to cleanup Particle side. soon_deleted_samples: Query = Query(Sample.sampleid).filter(Sample.projid == prj_id) # The EcoPart samples to clean. soon_invalid_part_samples: Query = Query(ParticleSample.psampleid).filter( ParticleSample.sampleid.in_(soon_deleted_samples)) # Cleanup EcoPart corresponding tables del_qry = ParticleCategoryHistogramList.__table__. \ delete().where(ParticleCategoryHistogramList.psampleid.in_(soon_invalid_part_samples)) logger.info("Del part histo lst :%s", str(del_qry)) session.execute(del_qry) del_qry = ParticleCategoryHistogram.__table__. \ delete().where(ParticleCategoryHistogram.psampleid.in_(soon_invalid_part_samples)) logger.info("Del part histo :%s", str(del_qry)) session.execute(del_qry) upd_qry = ParticleSample.__table__. \ update().where(ParticleSample.psampleid.in_(soon_invalid_part_samples)).values(sampleid=None) logger.info("Upd part samples :%s", str(upd_qry)) row_count = session.execute(upd_qry).rowcount logger.info(" %d EcoPart samples unlinked and cleaned", row_count) ret = [] del_acquis_qry: Delete = Acquisition.__table__. \ delete().where(Acquisition.acq_sample_id.in_(soon_deleted_samples)) logger.info("Del acquisitions :%s", str(del_acquis_qry)) gone_acqs = session.execute(del_acquis_qry).rowcount ret.append(gone_acqs) logger.info("%d rows deleted", gone_acqs) del_sample_qry: Delete = Sample.__table__. \ delete().where(Sample.sampleid.in_(soon_deleted_samples)) logger.info("Del samples :%s", str(del_sample_qry)) gone_sams = session.execute(del_sample_qry).rowcount ret.append(gone_sams) logger.info("%d rows deleted", gone_sams) ret.append(gone_acqs) session.commit() return ret
def projects_for_user(session: Session, user: User, for_managing: bool = False, not_granted: bool = False, title_filter: str = '', instrument_filter: str = '', filter_subset: bool = False) -> List[ProjectIDT]: """ :param session: :param user: The user for which the list is needed. :param for_managing: If set, list the projects that the user can manage. :param not_granted: If set, list (only) the projects on which given user has no right, so user can request access to them. :param title_filter: If set, filter out the projects with title not matching the required string, or if set to a number, filter out the projects of which ID does not match. :param instrument_filter: If set, filter out the projects which do not have given instrument in at least one sample. :param filter_subset: If set, filter out any project of which title contains 'subset'. :return: The project IDs """ sql_params: Dict[str, Any] = {"user_id": user.id} # Default query: all projects, eventually with first manager information # noinspection SqlResolve sql = """SELECT p.projid FROM projects p LEFT JOIN ( """ + ProjectPrivilegeBO.first_manager_by_project() + """ ) fpm ON fpm.projid = p.projid """ if not_granted: # Add the projects for which no entry is found in ProjectPrivilege sql += """ LEFT JOIN projectspriv pp ON p.projid = pp.projid AND pp.member = :user_id WHERE pp.member is null """ if for_managing: sql += " AND False " else: if not user.has_role(Role.APP_ADMINISTRATOR): # Not an admin, so restrict to projects which current user can work on, or view sql += """ JOIN projectspriv pp ON p.projid = pp.projid AND pp.member = :user_id """ if for_managing: sql += """ AND pp.privilege = '%s' """ % ProjectPrivilegeBO.MANAGE sql += " WHERE 1 = 1 " if title_filter != '': sql += """ AND ( title ILIKE '%%'|| :title ||'%%' OR TO_CHAR(p.projid,'999999') LIKE '%%'|| :title ) """ sql_params["title"] = title_filter if instrument_filter != '': sql += """ AND p.projid IN (SELECT DISTINCT sam.projid FROM samples sam, acquisitions acq WHERE acq.acq_sample_id = sam.sampleid AND acq.instrument ILIKE '%%'|| :instrum ||'%%' ) """ sql_params["instrum"] = instrument_filter if filter_subset: sql += """ AND NOT title ILIKE '%%subset%%' """ with CodeTimer("Projects query:", logger): res: Result = session.execute(text(sql), sql_params) # single-element tuple :( DBAPI ret = [an_id for an_id, in res.fetchall()] return ret # type:ignore
def query_pg_and_cache(self, table_name: str, pg_sess: Session, cache_sql: str) -> int: logger.info("For cache fetch: %s", cache_sql) res: Result = pg_sess.execute(cache_sql) tbl_cols = self.create_sqlite_table(table_name, res) nb_ins = self.pg_to_sqlite(table_name, tbl_cols, res) return nb_ins