Exemple #1
0
 def classify_set(self, current_user_id: UserIDT, target_ids: ObjectIDListT, classif_ids: ClassifIDListT,
                  wanted_qualif: str) -> Tuple[int, int, Dict]:
     """
         Classify or validate/set to dubious a set of objects.
     """
     # Get the objects and project, checking rights at the same time.
     object_set, project = self._the_project_for(current_user_id, target_ids, Action.ANNOTATE)
     # Do the raw classification with history.
     nb_upd, all_changes = object_set.classify_validate(current_user_id, classif_ids, wanted_qualif)
     # Propagate changes to update projects_taxo_stat
     if nb_upd > 0:
         # Log a bit
         for a_chg, impacted in all_changes.items():
             logger.info("change %s for %s", a_chg, impacted)
         # Collate changes
         collated_changes: Dict[int, Dict] = {}
         for (prev_classif_id, prev_classif_qual, new_classif_id, wanted_qualif), objects in all_changes.items():
             # Decrement for what was before
             self.count_in_and_out(collated_changes, prev_classif_id, prev_classif_qual, -len(objects))
             # Increment for what arrives
             self.count_in_and_out(collated_changes, new_classif_id, wanted_qualif, len(objects))
         # Update the table
         ProjectBO.incremental_update_taxo_stats(self.session, project.projid, collated_changes)
         self.session.commit()
     else:
         self.session.rollback()
     # Return status
     return nb_upd, project.projid, all_changes
Exemple #2
0
    def do_run(self, current_user_id: int) -> SubsetRsp:
        # Security checks
        RightsBO.user_wants(self.session, current_user_id, Action.READ, self.prj_id)
        RightsBO.user_wants(self.session, current_user_id, Action.ADMINISTRATE, self.dest_prj.projid)
        # OK
        logger.info("Starting subset of '%s'", self.prj.title)
        ret = SubsetRsp()

        self.update_progress(5, "Determining objects to clone")
        self._find_what_to_clone()

        logger.info("Matched %s objects", len(self.to_clone))
        if len(self.to_clone) == 0:
            self.task.taskstate = "Error"
            self.update_progress(10, "No object to include in the subset project")
            ret.errors.append("No object found to clone into subset.")
            return ret

        self._do_clone()
        self.session.commit()

        # Recompute stats and so on
        ProjectBO.do_after_load(self.session, self.dest_prj.projid)
        self.session.commit()
        return ret
Exemple #3
0
    def do_run(self, current_user_id: int) -> MergeRsp:
        """
            Run the service, merge the projects.
        :return:
        """
        # Security check
        RightsBO.user_wants(self.session, current_user_id, Action.ADMINISTRATE,
                            self.prj_id)
        RightsBO.user_wants(self.session, current_user_id, Action.ADMINISTRATE,
                            self.src_prj_id)
        # OK
        prj = self.session.query(Project).get(self.prj_id)
        assert prj is not None
        src_prj = self.session.query(Project).get(self.src_prj_id)
        assert src_prj is not None

        logger.info("Validating Merge of '%s'", prj.title)
        ret = MergeRsp()
        errs = self._verify_possible(prj, src_prj)
        ret.errors = errs
        # Exit if errors or dry run
        if self.dry_run or len(errs) > 0:
            return ret

        logger.info("Remaps: %s", self.remap_operations)
        # Go for real if not dry run AND len(errs) == 0
        logger.info("Starting Merge of '%s'", prj.title)
        self._do_merge(prj)
        self.session.commit()

        # Recompute stats and so on
        ProjectBO.do_after_load(self.session, prj_id=self.prj_id)
        self.session.commit()
        return ret
Exemple #4
0
    def delete(self, current_user_id: UserIDT, object_ids: ObjectIDListT) -> Tuple[int, int, int, int]:
        """
            Remove from DB all the objects with ID in given list.
        """
        # Security check
        obj_set = EnumeratedObjectSet(self.session, object_ids)
        # Get project IDs for the objects and verify rights
        prj_ids = obj_set.get_projects_ids()
        for a_prj_id in prj_ids:
            RightsBO.user_wants(self.session, current_user_id, Action.ADMINISTRATE, a_prj_id)

        # Prepare & start a remover thread that will run in // with DB queries
        remover = VaultRemover(self.link_src, logger).do_start()
        # Do the deletion itself.
        nb_objs, nb_img_rows, img_files = obj_set.delete(self.CHUNK_SIZE, remover.add_files)

        # Update stats on impacted project(s)
        for prj_id in prj_ids:
            ProjectBO.update_taxo_stats(self.session, prj_id)
            # Stats depend on taxo stats
            ProjectBO.update_stats(self.session, prj_id)

        self.session.commit()
        # Wait for the files handled
        remover.wait_for_done()
        return nb_objs, 0, nb_img_rows, len(img_files)
Exemple #5
0
 def do_run(self, current_user_id: int) -> List[str]:
     # Security check
     _user, project = RightsBO.user_wants(self.session, current_user_id,
                                          Action.READ, self.prj_id)
     # OK
     proj_bo = ProjectBO(project).enrich()
     ret = []
     # TODO: Permissions
     ret.append(proj_bo.title)
     ret.append(str(proj_bo.obj_free_cols))
     free_cols_vals = proj_bo.get_all_num_columns_values(self.session)
     acquis_stats: AcquisitionStats = AcquisitionStats("", 0)
     for a_row in free_cols_vals:
         acquis_id, acquis_orig_id, objid, *free_vals = a_row
         free_vals = [
             a_val if a_val is not None else Decimal('nan')
             for a_val in free_vals
         ]
         if acquis_id == acquis_stats.acquis_id:
             # Same acquisition
             pass
         else:
             # New acquisition, close previous one
             self.output_acq(acquis_stats, ret)
             # And start new one
             acquis_stats = AcquisitionStats(acquis_orig_id, acquis_id)
         acquis_stats.add_values(free_vals)
     self.output_acq(acquis_stats, ret)
     return ret
Exemple #6
0
    def do_run(self, current_user_id: int) -> ImportRealRsp:
        """
            Do the real job using injected parameters.
            :return:
        """
        # Security check
        RightsBO.user_wants(self.session, current_user_id, Action.ADMINISTRATE,
                            self.prj_id)
        # OK
        loaded_files = none_to_empty(self.prj.fileloaded).splitlines()
        logger.info("Previously loaded files: %s", loaded_files)

        # Save mappings straight away
        self.save_mapping(self.custom_mapping)

        source_bundle = InBundle(
            self.req.source_path,
            Path(self.temp_for_task.data_dir_for(self.task_id)))
        # Configure the import to come, destination
        db_writer = DBWriter(self.session)
        import_where = ImportWhere(
            db_writer, self.vault,
            self.temp_for_task.base_dir_for(self.task_id))
        # Configure the import to come, directives
        import_how = ImportHow(self.prj_id, self.req.update_mode,
                               self.custom_mapping,
                               self.req.skip_existing_objects, loaded_files)
        import_how.taxo_mapping = self.req.taxo_mappings
        import_how.taxo_found = self.req.found_taxa
        import_how.found_users = self.req.found_users
        if self.req.skip_loaded_files:
            import_how.compute_skipped(source_bundle, logger)
        if not self.req.skip_existing_objects:
            with CodeTimer("run: Existing images for %d: " % self.prj_id,
                           logger):
                import_how.objects_and_images_to_skip = Image.fetch_existing_images(
                    self.session, self.prj_id)
        import_how.do_thumbnail_above(int(self.config['THUMBSIZELIMIT']))

        # Do the bulk job of import
        row_count = source_bundle.do_import(import_where, import_how,
                                            self.req.rowcount,
                                            self.report_progress)

        # Update loaded files in DB, removing duplicates
        self.prj.fileloaded = "\n".join(set(import_how.loaded_files))
        self.session.commit()

        # Recompute stats
        ProjectBO.do_after_load(self.session, self.prj_id)
        self.session.commit()

        logger.info("Total of %d rows loaded" % row_count)

        # Prepare response
        ret = ImportRealRsp()
        return ret
Exemple #7
0
    def do_real(self) -> None:
        """
            Do the real job, i.e. write everywhere (DB/filesystem)
        """
        loaded_files = none_to_empty(self.prj.fileloaded).splitlines()
        logger.info("Previously loaded files: %s", loaded_files)

        found_users, taxo_found, col_mapping_dict, \
        nb_rows, source_path = self._load_vars_from_state(self.STATE_KEYS)

        # Save mappings straight away
        col_mapping = ProjectMapping().load_from_dict(col_mapping_dict)
        col_mapping.write_to_project(self.prj)
        self.session.commit()

        # TODO: Duplicated code
        source_bundle = InBundle(
            source_path, Path(self.temp_for_jobs.data_dir_for(self.job_id)))
        # Configure the import to come, destination
        db_writer = DBWriter(self.session)
        import_where = ImportWhere(
            db_writer, self.vault,
            self.temp_for_jobs.base_dir_for(self.job_id))
        # Configure the import to come, directives
        import_how = ImportHow(self.prj_id, self.req.update_mode, col_mapping,
                               self.req.skip_existing_objects, loaded_files)
        import_how.taxo_mapping = self.req.taxo_mappings
        import_how.found_taxa = taxo_found
        import_how.found_users = found_users
        if self.req.skip_loaded_files:
            import_how.compute_skipped(source_bundle, logger)
        if self.req.skip_existing_objects:
            # If we must skip existing objects then do an inventory of what's in already
            with CodeTimer("run: Existing images for %d: " % self.prj_id,
                           logger):
                import_how.objects_and_images_to_skip = Image.fetch_existing_images(
                    self.session, self.prj_id)
        import_how.do_thumbnail_above(int(self.config['THUMBSIZELIMIT']))

        # Do the bulk job of import
        rowcount_from_validate = nb_rows
        row_count = source_bundle.do_import(import_where, import_how,
                                            rowcount_from_validate,
                                            self.report_progress)

        # Update loaded files in DB, removing duplicates
        self.prj.fileloaded = "\n".join(set(import_how.loaded_files))
        self.session.commit()

        # Recompute stats
        ProjectBO.do_after_load(self.session, self.prj_id)
        self.session.commit()

        msg = "Total of %d rows loaded" % row_count
        logger.info(msg)
        self.set_job_result(errors=[], infos={"rowcount": row_count})
Exemple #8
0
    def delete(self, current_user_id: int, prj_id: int,
               only_objects: bool) -> Tuple[int, int, int, int]:
        # Security barrier
        _current_user, _project = RightsBO.user_wants(self.session,
                                                      current_user_id,
                                                      Action.ADMINISTRATE,
                                                      prj_id)
        # Troll-ish way of erasing
        all_object_ids = ProjectBO.get_all_object_ids(self.session,
                                                      prj_id=prj_id)
        # Build a big set
        obj_set = EnumeratedObjectSet(self.session, all_object_ids)

        # Prepare a remover thread that will run in // with DB queries
        remover = VaultRemover(self.link_src, logger).do_start()
        # Do the deletion itself.
        nb_objs, nb_img_rows, img_files = obj_set.delete(
            self.DELETE_CHUNK_SIZE, remover.add_files)

        ProjectBO.delete_object_parents(self.session, prj_id)

        if only_objects:
            # Update stats, should all be 0...
            ProjectBO.update_taxo_stats(self.session, prj_id)
            # Stats depend on taxo stats
            ProjectBO.update_stats(self.session, prj_id)
        else:
            ProjectBO.delete(self.session, prj_id)

        self.session.commit()
        # Wait for the files handled
        remover.wait_for_done()
        return nb_objs, 0, nb_img_rows, len(img_files)
Exemple #9
0
    def do_import(self):
        """
            Do the real job, i.e. copy files while creating records.
        """
        errors = []
        self.manage_uploaded()
        self.unzip_if_needed()
        # Use a Bundle
        source_bundle = InBundle(
            self.source_dir_or_zip,
            Path(self.temp_for_task.data_dir_for(self.task_id)))
        # Clean it, in case the ZIP contains a CSV
        source_bundle.remove_all_tsvs()
        images = source_bundle.list_image_files()
        # Configure the import to come, destination
        db_writer = DBWriter(self.session)
        import_where = ImportWhere(
            db_writer, self.vault,
            self.temp_for_task.base_dir_for(self.task_id))
        # Configure the import to come, directives
        import_how = ImportHow(prj_id=self.prj_id,
                               update_mode="",
                               custom_mapping=ProjectMapping(),
                               skip_object_duplicates=False,
                               loaded_files=[])
        import_how.do_thumbnail_above(int(self.config['THUMBSIZELIMIT']))
        # Generate TSV
        req_values = self.req.values
        if req_values.get(SimpleImportFields.userlb, ""):
            import_how.found_users["user"] = {
                "id": req_values.get(SimpleImportFields.userlb)
            }
            req_values[SimpleImportFields.userlb] = "user"
        if req_values.get(SimpleImportFields.status, ""):
            req_values[SimpleImportFields.status] = classif_qual.get(
                req_values[SimpleImportFields.status], "")
        self.make_tsv(source_bundle, images)
        # Import
        nb_image_files = len(images)
        nb_images = source_bundle.do_import(import_where, import_how,
                                            nb_image_files,
                                            self.report_progress)
        self.session.commit()

        # Recompute stats and so on
        ProjectBO.do_after_load(self.session, self.prj_id)
        self.session.commit()

        ret = SimpleImportRsp(errors=errors, nb_images=nb_images)
        return ret
Exemple #10
0
 def update_db_stats(self):
     """
         Refresh the database for aggregates.
     """
     project_ids = [
         a_project.projid for a_project in self.collection.projects
     ]
     for a_project_id in project_ids:
         # Ensure the taxo stats are OK
         ProjectBO.update_taxo_stats(self.session, projid=a_project_id)
         # Ensure that the geography is OK propagated upwards from objects, for all projects inside the collection
         Sample.propagate_geo(self.session, prj_id=a_project_id)
     a_stat: ProjectTaxoStats
     for a_stat in ProjectBO.read_taxo_stats(self.session, project_ids, []):
         self.validated_count += a_stat.nb_validated
Exemple #11
0
 def read_stats(self, current_user_id: int,
                prj_ids: ProjectIDListT) -> List[ProjectTaxoStats]:
     """
         Read classification statistics for these projects.
     """
     # No security barrier because there is no private information inside
     return ProjectBO.read_taxo_stats(self.session, prj_ids)
Exemple #12
0
 def __init__(self, project: Project, mapping: TableMapping,
              where_clause: WhereClause, order_clause: Optional[OrderClause],
              params: Dict[str, Any], window_start: Optional[int], window_size: Optional[int]):
     self.sort_fields = ProjectBO.get_sort_db_columns(project, mapping=mapping)
     self.projid = project.projid
     # Store the PG query specifics
     self.pg_where = where_clause
     self.pg_order = order_clause
     self.pg_params = params
     self.pg_window_start = window_start
     self.pg_window_size = window_size
     # Move to DBs
     file_name = self.file_name(project.projid)
     self.conn: Optional[SQLiteConnection] = None
     self.meta: Optional[DBMeta] = None
     try:
         self.conn = SQLite3.get_conn(file_name, "ro")
     except OperationalError as e:
         # No file or locked file
         logger.info("No conn %s", str(e))
         return
     try:
         self.meta = SQLite3.get_meta(self.conn)
     except OperationalError as e:
         # DB could be locked e.g. writing
         logger.info("No meta %s", str(e))
         return
     self.can = True, ""
     # The eventual SQLite equivalent, arranged
     self.cache_where = WhereClause()
     self.where_params: Dict[str, Any] = {}
Exemple #13
0
    def reset_to_predicted(self, current_user_id: UserIDT, proj_id: ProjectIDT, filters: ProjectFilters) -> None:
        """
            Query the given project with given filters, reset the resulting objects to predicted.
        """
        # Security check
        RightsBO.user_wants(self.session, current_user_id, Action.ADMINISTRATE, proj_id)

        impacted_objs = [r[0] for r in self.query(current_user_id, proj_id, filters)[0]]

        EnumeratedObjectSet(self.session, impacted_objs).reset_to_predicted()

        # Update stats
        ProjectBO.update_taxo_stats(self.session, proj_id)
        # Stats depend on taxo stats
        ProjectBO.update_stats(self.session, proj_id)
        self.session.commit()
Exemple #14
0
 def read_stats(self, current_user_id: Optional[UserIDT],
                prj_ids: ProjectIDListT,
                taxa_ids: Union[str, ClassifIDListT]) -> List[ProjectTaxoStats]:
     """
         Read classification statistics for these projects.
     """
     # No security barrier because there is no private information inside
     return ProjectBO.read_taxo_stats(self.session, prj_ids, taxa_ids)
Exemple #15
0
 def search(self, current_user_id: Optional[int],
            for_managing: bool = False,
            not_granted: bool = False,
            title_filter: str = '',
            instrument_filter: str = '',
            filter_subset: bool = False) -> List[ProjectBO]:
     current_user: Optional[User]
     if current_user_id is None:
         # For public
         matching_ids = ProjectBO.list_public_projects(self.ro_session, title_filter)
         projects = ProjectBOSet(self.session, matching_ids, public=True)
     else:
         # No rights checking as basically everyone can see all projects
         current_user = self.ro_session.query(User).get(current_user_id)
         assert current_user is not None
         matching_ids = ProjectBO.projects_for_user(self.ro_session, current_user, for_managing, not_granted,
                                                    title_filter, instrument_filter, filter_subset)
         projects = ProjectBOSet(self.ro_session, matching_ids, public=False)
     return projects.as_list()
Exemple #16
0
 def read_user_stats(self, current_user_id: int,
                     prj_ids: ProjectIDListT) -> List[ProjectUserStats]:
     """
         Read user statistics for these projects.
     """
     # Security barrier
     [RightsBO.user_wants(self.session, current_user_id, Action.ADMINISTRATE, prj_id)
      for prj_id in prj_ids]
     ret = ProjectBO.read_user_stats(self.session, prj_ids)
     return ret
Exemple #17
0
    def do_run(self) -> None:
        # OK
        logger.info("Starting subset of '%s'", self.prj.title)

        self.update_progress(5, "Determining objects to clone")
        self._find_what_to_clone()

        logger.info("Matched %s objects", len(self.to_clone))
        if len(self.to_clone) == 0:
            errors = ["No object found to clone into subset."]
            self.set_job_result(errors=errors, infos={"infos": ""})
            return

        self._do_clone()
        self.session.commit()

        # Recompute stats and so on
        ProjectBO.do_after_load(self.session, self.dest_prj.projid)
        self.session.commit()

        self.set_job_result(errors=[], infos={"rowcount": len(self.to_clone)})
Exemple #18
0
    def revert_to_history(self, current_user_id: UserIDT, proj_id: ProjectIDT,
                          filters: ProjectFilters, dry_run: bool,
                          target: Optional[int]) -> Tuple[List[HistoricalLastClassif], ClassifSetInfoT]:
        """
            Revert to classification history the given set, if dry_run then only simulate.
        """
        # Security check
        RightsBO.user_wants(self.session, current_user_id, Action.ADMINISTRATE, proj_id)

        # Get target objects
        impacted_objs = [r[0] for r in self.query(current_user_id, proj_id, filters)[0]]
        obj_set = EnumeratedObjectSet(self.session, impacted_objs)

        # We don't revert to a previous version in history from same annotator
        but_not_by: Optional[int] = None
        but_not_by_str = filters.get('filt_last_annot', None)
        if but_not_by_str is not None:
            try:
                but_not_by = int(but_not_by_str)
            except ValueError:
                pass
        if dry_run:
            # Return information on what to do
            impact = obj_set.evaluate_revert_to_history(target, but_not_by)
            # And names for display
            classifs = TaxonomyBO.names_with_parent_for(self.session, self.collect_classif(impact))
        else:
            # Do the real thing
            impact = obj_set.revert_to_history(target, but_not_by)
            classifs = {}
            # Update stats
            ProjectBO.update_taxo_stats(self.session, proj_id)
            # Stats depend on taxo stats
            ProjectBO.update_stats(self.session, proj_id)
            self.session.commit()
        # Give feedback
        return impact, classifs
Exemple #19
0
    def _do_merge(self, dest_prj: Project):
        """
            Real merge operation.
        """
        # Loop over involved tables and remap free columns
        for a_mapped_tbl in MAPPED_TABLES:
            remaps = self.remap_operations.get(a_mapped_tbl)
            # Do the remappings if any
            if remaps is not None:
                logger.info("Doing re-mapping in %s: %s",
                            a_mapped_tbl.__tablename__, remaps)
                ProjectBO.remap(self.session, self.src_prj_id, a_mapped_tbl,
                                remaps)

        # Collect orig_id
        dest_parents = InBundle.fetch_existing_parents(self.ro_session,
                                                       prj_id=self.prj_id)
        src_parents = InBundle.fetch_existing_parents(self.ro_session,
                                                      prj_id=self.src_prj_id)

        # Compute needed projections in order to keep orig_id unicity
        common_samples = self.get_ids_for_common_orig_id(
            Sample, dest_parents, src_parents)
        common_acquisitions = self.get_ids_for_common_orig_id(
            Acquisition, dest_parents, src_parents)

        # Align foreign keys, to Project, Sample and Acquisition
        for a_fk_to_proj_tbl in [
                Sample, Acquisition, ObjectHeader, ParticleProject
        ]:
            upd: Query = self.session.query(a_fk_to_proj_tbl)
            if a_fk_to_proj_tbl == Sample:
                # Move (i.e. change project) samples which are 'new' from merged project,
                #    so take all of them from src project...
                upd = upd.filter(
                    a_fk_to_proj_tbl.projid == self.src_prj_id)  # type: ignore
                # ...but not the ones with same orig_id, which are presumably equal.
                upd = upd.filter(
                    Sample.sampleid != all_(list(common_samples.keys())))
                # And update the column
                upd_values = {'projid': self.prj_id}
            elif a_fk_to_proj_tbl == Acquisition:
                # Acquisitions which were created, in source, under new samples, will 'follow'
                #    them during above move, thanks to the FK on acq_sample_id.
                # BUT some acquisitions were potentially created in source project, inside
                #    forked samples. They need to be attached to the dest (self) corresponding sample.
                if len(common_samples) > 0:
                    # Build a CTE with values for the update
                    smp_cte = values_cte("upd_smp", ("src_id", "dst_id"),
                                         [(k, v)
                                          for k, v in common_samples.items()])
                    smp_subqry = self.session.query(smp_cte.c.column2).filter(
                        smp_cte.c.column1 == Acquisition.acq_sample_id)
                    upd_values = {
                        'acq_sample_id':
                        func.coalesce(
                            smp_subqry.scalar_subquery(),  # type: ignore
                            Acquisition.acq_sample_id)
                    }
                    upd = upd.filter(Acquisition.acq_sample_id == any_(
                        list(common_samples.keys())))
                    # upd = upd.filter(Acquisition.acquisid != all_(list(common_acquisitions.keys())))
                if len(common_samples) == 0:
                    # Nothing to do. There were only new samples, all of them moved to self.
                    continue
            elif a_fk_to_proj_tbl == ObjectHeader:
                # Generated SQL looks like:
                # with upd_acq (src_id, dst_id) as (values (5,6), (7,8))
                # update obj_head
                #    set acquisid = coalesce((select dst_id from upd_acq where acquisid=src_id), acquisid)
                #  where acquisid in (select src_id from upd_acq)
                if len(common_acquisitions) > 0:
                    # Object must follow its acquisition
                    acq_cte = values_cte(
                        "upd_acq", ("src_id", "dst_id"),
                        [(k, v) for k, v in common_acquisitions.items()])
                    acq_subqry = self.session.query(acq_cte.c.column2).filter(
                        acq_cte.c.column1 == ObjectHeader.acquisid)
                    upd_values = {
                        'acquisid':
                        func.coalesce(
                            acq_subqry.scalar_subquery(),  # type:ignore
                            ObjectHeader.acquisid)
                    }
                    upd = upd.filter(ObjectHeader.acquisid == any_(
                        list(common_acquisitions.keys())))
                if len(common_acquisitions) == 0:
                    # Nothing to do. There were only new acquisitions, all of them moved to self.
                    continue
            else:
                # For Particle project
                upd = upd.filter(
                    ParticleProject.projid == self.src_prj_id)  # type: ignore
                upd_values = {'projid': self.prj_id}
            rowcount = upd.update(values=upd_values, synchronize_session=False)
            table_name = a_fk_to_proj_tbl.__tablename__  # type: ignore
            logger.info("Update in %s: %s rows", table_name, rowcount)

        # Acquisition & twin Process have followed their enclosing Sample

        # Remove the parents which are duplicate from orig_id point of view
        for a_fk_to_proj_tbl in [Acquisition, Sample]:
            to_del: Query = self.session.query(a_fk_to_proj_tbl)
            if a_fk_to_proj_tbl == Acquisition:
                # Remove conflicting acquisitions, they should be empty?
                to_del = to_del.filter(Acquisition.acquisid == any_(
                    list(common_acquisitions.keys())))  # type: ignore
            elif a_fk_to_proj_tbl == Sample:
                # Remove conflicting samples
                to_del = to_del.filter(Sample.sampleid == any_(
                    list(common_samples.keys())))  # type: ignore
            rowcount = to_del.delete(synchronize_session=False)
            table_name = a_fk_to_proj_tbl.__tablename__  # type: ignore
            logger.info("Delete in %s: %s rows", table_name, rowcount)

        self.dest_augmented_mappings.write_to_project(dest_prj)

        ProjectPrivilegeBO.generous_merge_into(self.session, self.prj_id,
                                               self.src_prj_id)

        # Completely erase the source project
        ProjectBO.delete(self.session, self.src_prj_id)
Exemple #20
0
    def build_meta(self) -> Optional[EMLMeta]:
        """
            Various queries/copies on/from the projects for getting metadata.
        """
        ret = None
        the_collection: CollectionBO = CollectionBO(self.collection).enrich()

        identifier = EMLIdentifier(packageId=the_collection.external_id,
                                   system=the_collection.external_id_system)

        title = EMLTitle(title=the_collection.title)

        creators: List[EMLPerson] = []
        for a_user in the_collection.creator_users:
            person, errs = self.user_to_eml_person(
                a_user, "creator '%s'" % a_user.name)
            if errs:
                self.warnings.extend(errs)
            else:
                assert person is not None
                creators.append(person)
        for an_org in the_collection.creator_organisations:
            creators.append(self.organisation_to_eml_person(an_org))
        if len(creators) == 0:
            self.errors.append(
                "No valid data creator (user or organisation) found for EML metadata."
            )

        contact, errs = self.user_to_eml_person(the_collection.contact_user,
                                                "contact")
        if contact is None:
            self.errors.append("No valid contact user found for EML metadata.")

        provider, errs = self.user_to_eml_person(the_collection.provider_user,
                                                 "provider")
        if provider is None:
            self.errors.append(
                "No valid metadata provider user found for EML metadata.")

        associates: List[EMLAssociatedPerson] = []
        for a_user in the_collection.associate_users:
            person, errs = self.user_to_eml_person(
                a_user, "associated person %d" % a_user.id)
            if errs:
                self.warnings.extend(errs)
            else:
                assert person is not None
                associates.append(
                    self.eml_person_to_associated_person(person, "originator"))
        for an_org in the_collection.associate_organisations:
            # noinspection PyTypeChecker
            associates.append(self.organisation_to_eml_person(an_org))

        # TODO if needed
        # EMLAssociatedPerson = EMLPerson + specific role

        # TODO: a marine regions substitute
        (min_lat, max_lat, min_lon,
         max_lon) = ProjectBO.get_bounding_geo(self.session,
                                               the_collection.project_ids)
        geo_cov = EMLGeoCoverage(
            geographicDescription="See coordinates",
            westBoundingCoordinate=self.geo_to_txt(min_lon),
            eastBoundingCoordinate=self.geo_to_txt(max_lon),
            northBoundingCoordinate=self.geo_to_txt(min_lat),
            southBoundingCoordinate=self.geo_to_txt(max_lat))

        (min_date,
         max_date) = ProjectBO.get_date_range(self.session,
                                              the_collection.project_ids)
        time_cov = EMLTemporalCoverage(beginDate=timestamp_to_str(min_date),
                                       endDate=timestamp_to_str(max_date))

        publication_date = date.today().isoformat()

        abstract = the_collection.abstract
        if not abstract:
            self.errors.append("Collection 'abstract' field is empty")
        elif len(abstract) < self.MIN_ABSTRACT_CHARS:
            self.errors.append(
                "Collection 'abstract' field is too short (%d chars) to make a good EMLMeta abstract. Minimum is %d"
                % (len(abstract), self.MIN_ABSTRACT_CHARS))

        additional_info = None  # Just to see if it goes thru QC
        # additional_info = """  marine, harvested by iOBIS.
        # The OOV supported the financial effort of the survey.
        # We are grateful to the crew of the research boat at OOV that collected plankton during the temporal survey."""

        coll_license: LicenseEnum = cast(LicenseEnum, the_collection.license)
        if coll_license not in self.OK_LICENSES:
            self.errors.append(
                "Collection license should be one of %s to be accepted, not %s."
                % (self.OK_LICENSES, coll_license))
        else:
            lic_url = DataLicense.EXPORT_EXPLANATIONS[
                coll_license] + "legalcode"
            lic_txt = DataLicense.NAMES[coll_license]
            lic_txt = lic_txt.replace("International Public ", "")
            # ipt.gbif.org does not find the full license name, so adjust a bit
            version = "4.0"
            if version in lic_txt:
                lic_txt = lic_txt.replace(
                    version,
                    "(%s) " % DataLicense.SHORT_NAMES[coll_license] + version)
            licence = "This work is licensed under a <ulink url=\"%s\"><citetitle>%s</citetitle></ulink>." % (
                lic_url, lic_txt)

        # Preferably one of https://www.emodnet-biology.eu/contribute?page=list&subject=thestdas&SpColID=552&showall=1#P
        keywords = EMLKeywordSet(
            keywords=[
                "Plankton",
                "Imaging",
                "EcoTaxa"  # Not in list above
                # "Ligurian sea" TODO: Geo area?
                # TODO: ZooProcess (from projects)
            ],
            keywordThesaurus="GBIF Dataset Type Vocabulary: "
            "http://rs.gbif.org/vocabulary/gbif/dataset_type.xml")

        taxo_cov = self.get_taxo_coverage(the_collection.project_ids)

        now = datetime.now().replace(microsecond=0)
        meta_plus = EMLAdditionalMeta(dateStamp=now.isoformat())

        coll_title = the_collection.title
        info_url = "https://ecotaxa.obs-vlfr.fr/api/collections/by_title?q=%s" % quote_plus(
            coll_title)

        if len(self.errors) == 0:
            # The research project
            # noinspection PyUnboundLocalVariable
            # project = EMLProject(title=the_collection.title,
            #                      personnel=[])  # TODO: Unsure about duplicated information with metadata
            # noinspection PyUnboundLocalVariable
            ret = EMLMeta(
                identifier=identifier,
                titles=[title],
                creators=creators,
                contacts=[contact],
                metadataProviders=[provider],
                associatedParties=associates,
                pubDate=publication_date,
                abstract=[abstract],
                keywordSet=keywords,
                additionalInfo=additional_info,
                geographicCoverage=geo_cov,
                temporalCoverage=time_cov,
                taxonomicCoverage=taxo_cov,
                intellectualRights=licence,
                # project=project,
                maintenance="periodic review of origin data",
                maintenanceUpdateFrequency="unknown",  # From XSD
                additionalMetadata=meta_plus,
                informationUrl=info_url)
        return ret