def classify_set(self, current_user_id: UserIDT, target_ids: ObjectIDListT, classif_ids: ClassifIDListT, wanted_qualif: str) -> Tuple[int, int, Dict]: """ Classify or validate/set to dubious a set of objects. """ # Get the objects and project, checking rights at the same time. object_set, project = self._the_project_for(current_user_id, target_ids, Action.ANNOTATE) # Do the raw classification with history. nb_upd, all_changes = object_set.classify_validate(current_user_id, classif_ids, wanted_qualif) # Propagate changes to update projects_taxo_stat if nb_upd > 0: # Log a bit for a_chg, impacted in all_changes.items(): logger.info("change %s for %s", a_chg, impacted) # Collate changes collated_changes: Dict[int, Dict] = {} for (prev_classif_id, prev_classif_qual, new_classif_id, wanted_qualif), objects in all_changes.items(): # Decrement for what was before self.count_in_and_out(collated_changes, prev_classif_id, prev_classif_qual, -len(objects)) # Increment for what arrives self.count_in_and_out(collated_changes, new_classif_id, wanted_qualif, len(objects)) # Update the table ProjectBO.incremental_update_taxo_stats(self.session, project.projid, collated_changes) self.session.commit() else: self.session.rollback() # Return status return nb_upd, project.projid, all_changes
def do_run(self, current_user_id: int) -> SubsetRsp: # Security checks RightsBO.user_wants(self.session, current_user_id, Action.READ, self.prj_id) RightsBO.user_wants(self.session, current_user_id, Action.ADMINISTRATE, self.dest_prj.projid) # OK logger.info("Starting subset of '%s'", self.prj.title) ret = SubsetRsp() self.update_progress(5, "Determining objects to clone") self._find_what_to_clone() logger.info("Matched %s objects", len(self.to_clone)) if len(self.to_clone) == 0: self.task.taskstate = "Error" self.update_progress(10, "No object to include in the subset project") ret.errors.append("No object found to clone into subset.") return ret self._do_clone() self.session.commit() # Recompute stats and so on ProjectBO.do_after_load(self.session, self.dest_prj.projid) self.session.commit() return ret
def do_run(self, current_user_id: int) -> MergeRsp: """ Run the service, merge the projects. :return: """ # Security check RightsBO.user_wants(self.session, current_user_id, Action.ADMINISTRATE, self.prj_id) RightsBO.user_wants(self.session, current_user_id, Action.ADMINISTRATE, self.src_prj_id) # OK prj = self.session.query(Project).get(self.prj_id) assert prj is not None src_prj = self.session.query(Project).get(self.src_prj_id) assert src_prj is not None logger.info("Validating Merge of '%s'", prj.title) ret = MergeRsp() errs = self._verify_possible(prj, src_prj) ret.errors = errs # Exit if errors or dry run if self.dry_run or len(errs) > 0: return ret logger.info("Remaps: %s", self.remap_operations) # Go for real if not dry run AND len(errs) == 0 logger.info("Starting Merge of '%s'", prj.title) self._do_merge(prj) self.session.commit() # Recompute stats and so on ProjectBO.do_after_load(self.session, prj_id=self.prj_id) self.session.commit() return ret
def delete(self, current_user_id: UserIDT, object_ids: ObjectIDListT) -> Tuple[int, int, int, int]: """ Remove from DB all the objects with ID in given list. """ # Security check obj_set = EnumeratedObjectSet(self.session, object_ids) # Get project IDs for the objects and verify rights prj_ids = obj_set.get_projects_ids() for a_prj_id in prj_ids: RightsBO.user_wants(self.session, current_user_id, Action.ADMINISTRATE, a_prj_id) # Prepare & start a remover thread that will run in // with DB queries remover = VaultRemover(self.link_src, logger).do_start() # Do the deletion itself. nb_objs, nb_img_rows, img_files = obj_set.delete(self.CHUNK_SIZE, remover.add_files) # Update stats on impacted project(s) for prj_id in prj_ids: ProjectBO.update_taxo_stats(self.session, prj_id) # Stats depend on taxo stats ProjectBO.update_stats(self.session, prj_id) self.session.commit() # Wait for the files handled remover.wait_for_done() return nb_objs, 0, nb_img_rows, len(img_files)
def do_run(self, current_user_id: int) -> List[str]: # Security check _user, project = RightsBO.user_wants(self.session, current_user_id, Action.READ, self.prj_id) # OK proj_bo = ProjectBO(project).enrich() ret = [] # TODO: Permissions ret.append(proj_bo.title) ret.append(str(proj_bo.obj_free_cols)) free_cols_vals = proj_bo.get_all_num_columns_values(self.session) acquis_stats: AcquisitionStats = AcquisitionStats("", 0) for a_row in free_cols_vals: acquis_id, acquis_orig_id, objid, *free_vals = a_row free_vals = [ a_val if a_val is not None else Decimal('nan') for a_val in free_vals ] if acquis_id == acquis_stats.acquis_id: # Same acquisition pass else: # New acquisition, close previous one self.output_acq(acquis_stats, ret) # And start new one acquis_stats = AcquisitionStats(acquis_orig_id, acquis_id) acquis_stats.add_values(free_vals) self.output_acq(acquis_stats, ret) return ret
def do_run(self, current_user_id: int) -> ImportRealRsp: """ Do the real job using injected parameters. :return: """ # Security check RightsBO.user_wants(self.session, current_user_id, Action.ADMINISTRATE, self.prj_id) # OK loaded_files = none_to_empty(self.prj.fileloaded).splitlines() logger.info("Previously loaded files: %s", loaded_files) # Save mappings straight away self.save_mapping(self.custom_mapping) source_bundle = InBundle( self.req.source_path, Path(self.temp_for_task.data_dir_for(self.task_id))) # Configure the import to come, destination db_writer = DBWriter(self.session) import_where = ImportWhere( db_writer, self.vault, self.temp_for_task.base_dir_for(self.task_id)) # Configure the import to come, directives import_how = ImportHow(self.prj_id, self.req.update_mode, self.custom_mapping, self.req.skip_existing_objects, loaded_files) import_how.taxo_mapping = self.req.taxo_mappings import_how.taxo_found = self.req.found_taxa import_how.found_users = self.req.found_users if self.req.skip_loaded_files: import_how.compute_skipped(source_bundle, logger) if not self.req.skip_existing_objects: with CodeTimer("run: Existing images for %d: " % self.prj_id, logger): import_how.objects_and_images_to_skip = Image.fetch_existing_images( self.session, self.prj_id) import_how.do_thumbnail_above(int(self.config['THUMBSIZELIMIT'])) # Do the bulk job of import row_count = source_bundle.do_import(import_where, import_how, self.req.rowcount, self.report_progress) # Update loaded files in DB, removing duplicates self.prj.fileloaded = "\n".join(set(import_how.loaded_files)) self.session.commit() # Recompute stats ProjectBO.do_after_load(self.session, self.prj_id) self.session.commit() logger.info("Total of %d rows loaded" % row_count) # Prepare response ret = ImportRealRsp() return ret
def do_real(self) -> None: """ Do the real job, i.e. write everywhere (DB/filesystem) """ loaded_files = none_to_empty(self.prj.fileloaded).splitlines() logger.info("Previously loaded files: %s", loaded_files) found_users, taxo_found, col_mapping_dict, \ nb_rows, source_path = self._load_vars_from_state(self.STATE_KEYS) # Save mappings straight away col_mapping = ProjectMapping().load_from_dict(col_mapping_dict) col_mapping.write_to_project(self.prj) self.session.commit() # TODO: Duplicated code source_bundle = InBundle( source_path, Path(self.temp_for_jobs.data_dir_for(self.job_id))) # Configure the import to come, destination db_writer = DBWriter(self.session) import_where = ImportWhere( db_writer, self.vault, self.temp_for_jobs.base_dir_for(self.job_id)) # Configure the import to come, directives import_how = ImportHow(self.prj_id, self.req.update_mode, col_mapping, self.req.skip_existing_objects, loaded_files) import_how.taxo_mapping = self.req.taxo_mappings import_how.found_taxa = taxo_found import_how.found_users = found_users if self.req.skip_loaded_files: import_how.compute_skipped(source_bundle, logger) if self.req.skip_existing_objects: # If we must skip existing objects then do an inventory of what's in already with CodeTimer("run: Existing images for %d: " % self.prj_id, logger): import_how.objects_and_images_to_skip = Image.fetch_existing_images( self.session, self.prj_id) import_how.do_thumbnail_above(int(self.config['THUMBSIZELIMIT'])) # Do the bulk job of import rowcount_from_validate = nb_rows row_count = source_bundle.do_import(import_where, import_how, rowcount_from_validate, self.report_progress) # Update loaded files in DB, removing duplicates self.prj.fileloaded = "\n".join(set(import_how.loaded_files)) self.session.commit() # Recompute stats ProjectBO.do_after_load(self.session, self.prj_id) self.session.commit() msg = "Total of %d rows loaded" % row_count logger.info(msg) self.set_job_result(errors=[], infos={"rowcount": row_count})
def delete(self, current_user_id: int, prj_id: int, only_objects: bool) -> Tuple[int, int, int, int]: # Security barrier _current_user, _project = RightsBO.user_wants(self.session, current_user_id, Action.ADMINISTRATE, prj_id) # Troll-ish way of erasing all_object_ids = ProjectBO.get_all_object_ids(self.session, prj_id=prj_id) # Build a big set obj_set = EnumeratedObjectSet(self.session, all_object_ids) # Prepare a remover thread that will run in // with DB queries remover = VaultRemover(self.link_src, logger).do_start() # Do the deletion itself. nb_objs, nb_img_rows, img_files = obj_set.delete( self.DELETE_CHUNK_SIZE, remover.add_files) ProjectBO.delete_object_parents(self.session, prj_id) if only_objects: # Update stats, should all be 0... ProjectBO.update_taxo_stats(self.session, prj_id) # Stats depend on taxo stats ProjectBO.update_stats(self.session, prj_id) else: ProjectBO.delete(self.session, prj_id) self.session.commit() # Wait for the files handled remover.wait_for_done() return nb_objs, 0, nb_img_rows, len(img_files)
def do_import(self): """ Do the real job, i.e. copy files while creating records. """ errors = [] self.manage_uploaded() self.unzip_if_needed() # Use a Bundle source_bundle = InBundle( self.source_dir_or_zip, Path(self.temp_for_task.data_dir_for(self.task_id))) # Clean it, in case the ZIP contains a CSV source_bundle.remove_all_tsvs() images = source_bundle.list_image_files() # Configure the import to come, destination db_writer = DBWriter(self.session) import_where = ImportWhere( db_writer, self.vault, self.temp_for_task.base_dir_for(self.task_id)) # Configure the import to come, directives import_how = ImportHow(prj_id=self.prj_id, update_mode="", custom_mapping=ProjectMapping(), skip_object_duplicates=False, loaded_files=[]) import_how.do_thumbnail_above(int(self.config['THUMBSIZELIMIT'])) # Generate TSV req_values = self.req.values if req_values.get(SimpleImportFields.userlb, ""): import_how.found_users["user"] = { "id": req_values.get(SimpleImportFields.userlb) } req_values[SimpleImportFields.userlb] = "user" if req_values.get(SimpleImportFields.status, ""): req_values[SimpleImportFields.status] = classif_qual.get( req_values[SimpleImportFields.status], "") self.make_tsv(source_bundle, images) # Import nb_image_files = len(images) nb_images = source_bundle.do_import(import_where, import_how, nb_image_files, self.report_progress) self.session.commit() # Recompute stats and so on ProjectBO.do_after_load(self.session, self.prj_id) self.session.commit() ret = SimpleImportRsp(errors=errors, nb_images=nb_images) return ret
def update_db_stats(self): """ Refresh the database for aggregates. """ project_ids = [ a_project.projid for a_project in self.collection.projects ] for a_project_id in project_ids: # Ensure the taxo stats are OK ProjectBO.update_taxo_stats(self.session, projid=a_project_id) # Ensure that the geography is OK propagated upwards from objects, for all projects inside the collection Sample.propagate_geo(self.session, prj_id=a_project_id) a_stat: ProjectTaxoStats for a_stat in ProjectBO.read_taxo_stats(self.session, project_ids, []): self.validated_count += a_stat.nb_validated
def read_stats(self, current_user_id: int, prj_ids: ProjectIDListT) -> List[ProjectTaxoStats]: """ Read classification statistics for these projects. """ # No security barrier because there is no private information inside return ProjectBO.read_taxo_stats(self.session, prj_ids)
def __init__(self, project: Project, mapping: TableMapping, where_clause: WhereClause, order_clause: Optional[OrderClause], params: Dict[str, Any], window_start: Optional[int], window_size: Optional[int]): self.sort_fields = ProjectBO.get_sort_db_columns(project, mapping=mapping) self.projid = project.projid # Store the PG query specifics self.pg_where = where_clause self.pg_order = order_clause self.pg_params = params self.pg_window_start = window_start self.pg_window_size = window_size # Move to DBs file_name = self.file_name(project.projid) self.conn: Optional[SQLiteConnection] = None self.meta: Optional[DBMeta] = None try: self.conn = SQLite3.get_conn(file_name, "ro") except OperationalError as e: # No file or locked file logger.info("No conn %s", str(e)) return try: self.meta = SQLite3.get_meta(self.conn) except OperationalError as e: # DB could be locked e.g. writing logger.info("No meta %s", str(e)) return self.can = True, "" # The eventual SQLite equivalent, arranged self.cache_where = WhereClause() self.where_params: Dict[str, Any] = {}
def reset_to_predicted(self, current_user_id: UserIDT, proj_id: ProjectIDT, filters: ProjectFilters) -> None: """ Query the given project with given filters, reset the resulting objects to predicted. """ # Security check RightsBO.user_wants(self.session, current_user_id, Action.ADMINISTRATE, proj_id) impacted_objs = [r[0] for r in self.query(current_user_id, proj_id, filters)[0]] EnumeratedObjectSet(self.session, impacted_objs).reset_to_predicted() # Update stats ProjectBO.update_taxo_stats(self.session, proj_id) # Stats depend on taxo stats ProjectBO.update_stats(self.session, proj_id) self.session.commit()
def read_stats(self, current_user_id: Optional[UserIDT], prj_ids: ProjectIDListT, taxa_ids: Union[str, ClassifIDListT]) -> List[ProjectTaxoStats]: """ Read classification statistics for these projects. """ # No security barrier because there is no private information inside return ProjectBO.read_taxo_stats(self.session, prj_ids, taxa_ids)
def search(self, current_user_id: Optional[int], for_managing: bool = False, not_granted: bool = False, title_filter: str = '', instrument_filter: str = '', filter_subset: bool = False) -> List[ProjectBO]: current_user: Optional[User] if current_user_id is None: # For public matching_ids = ProjectBO.list_public_projects(self.ro_session, title_filter) projects = ProjectBOSet(self.session, matching_ids, public=True) else: # No rights checking as basically everyone can see all projects current_user = self.ro_session.query(User).get(current_user_id) assert current_user is not None matching_ids = ProjectBO.projects_for_user(self.ro_session, current_user, for_managing, not_granted, title_filter, instrument_filter, filter_subset) projects = ProjectBOSet(self.ro_session, matching_ids, public=False) return projects.as_list()
def read_user_stats(self, current_user_id: int, prj_ids: ProjectIDListT) -> List[ProjectUserStats]: """ Read user statistics for these projects. """ # Security barrier [RightsBO.user_wants(self.session, current_user_id, Action.ADMINISTRATE, prj_id) for prj_id in prj_ids] ret = ProjectBO.read_user_stats(self.session, prj_ids) return ret
def do_run(self) -> None: # OK logger.info("Starting subset of '%s'", self.prj.title) self.update_progress(5, "Determining objects to clone") self._find_what_to_clone() logger.info("Matched %s objects", len(self.to_clone)) if len(self.to_clone) == 0: errors = ["No object found to clone into subset."] self.set_job_result(errors=errors, infos={"infos": ""}) return self._do_clone() self.session.commit() # Recompute stats and so on ProjectBO.do_after_load(self.session, self.dest_prj.projid) self.session.commit() self.set_job_result(errors=[], infos={"rowcount": len(self.to_clone)})
def revert_to_history(self, current_user_id: UserIDT, proj_id: ProjectIDT, filters: ProjectFilters, dry_run: bool, target: Optional[int]) -> Tuple[List[HistoricalLastClassif], ClassifSetInfoT]: """ Revert to classification history the given set, if dry_run then only simulate. """ # Security check RightsBO.user_wants(self.session, current_user_id, Action.ADMINISTRATE, proj_id) # Get target objects impacted_objs = [r[0] for r in self.query(current_user_id, proj_id, filters)[0]] obj_set = EnumeratedObjectSet(self.session, impacted_objs) # We don't revert to a previous version in history from same annotator but_not_by: Optional[int] = None but_not_by_str = filters.get('filt_last_annot', None) if but_not_by_str is not None: try: but_not_by = int(but_not_by_str) except ValueError: pass if dry_run: # Return information on what to do impact = obj_set.evaluate_revert_to_history(target, but_not_by) # And names for display classifs = TaxonomyBO.names_with_parent_for(self.session, self.collect_classif(impact)) else: # Do the real thing impact = obj_set.revert_to_history(target, but_not_by) classifs = {} # Update stats ProjectBO.update_taxo_stats(self.session, proj_id) # Stats depend on taxo stats ProjectBO.update_stats(self.session, proj_id) self.session.commit() # Give feedback return impact, classifs
def _do_merge(self, dest_prj: Project): """ Real merge operation. """ # Loop over involved tables and remap free columns for a_mapped_tbl in MAPPED_TABLES: remaps = self.remap_operations.get(a_mapped_tbl) # Do the remappings if any if remaps is not None: logger.info("Doing re-mapping in %s: %s", a_mapped_tbl.__tablename__, remaps) ProjectBO.remap(self.session, self.src_prj_id, a_mapped_tbl, remaps) # Collect orig_id dest_parents = InBundle.fetch_existing_parents(self.ro_session, prj_id=self.prj_id) src_parents = InBundle.fetch_existing_parents(self.ro_session, prj_id=self.src_prj_id) # Compute needed projections in order to keep orig_id unicity common_samples = self.get_ids_for_common_orig_id( Sample, dest_parents, src_parents) common_acquisitions = self.get_ids_for_common_orig_id( Acquisition, dest_parents, src_parents) # Align foreign keys, to Project, Sample and Acquisition for a_fk_to_proj_tbl in [ Sample, Acquisition, ObjectHeader, ParticleProject ]: upd: Query = self.session.query(a_fk_to_proj_tbl) if a_fk_to_proj_tbl == Sample: # Move (i.e. change project) samples which are 'new' from merged project, # so take all of them from src project... upd = upd.filter( a_fk_to_proj_tbl.projid == self.src_prj_id) # type: ignore # ...but not the ones with same orig_id, which are presumably equal. upd = upd.filter( Sample.sampleid != all_(list(common_samples.keys()))) # And update the column upd_values = {'projid': self.prj_id} elif a_fk_to_proj_tbl == Acquisition: # Acquisitions which were created, in source, under new samples, will 'follow' # them during above move, thanks to the FK on acq_sample_id. # BUT some acquisitions were potentially created in source project, inside # forked samples. They need to be attached to the dest (self) corresponding sample. if len(common_samples) > 0: # Build a CTE with values for the update smp_cte = values_cte("upd_smp", ("src_id", "dst_id"), [(k, v) for k, v in common_samples.items()]) smp_subqry = self.session.query(smp_cte.c.column2).filter( smp_cte.c.column1 == Acquisition.acq_sample_id) upd_values = { 'acq_sample_id': func.coalesce( smp_subqry.scalar_subquery(), # type: ignore Acquisition.acq_sample_id) } upd = upd.filter(Acquisition.acq_sample_id == any_( list(common_samples.keys()))) # upd = upd.filter(Acquisition.acquisid != all_(list(common_acquisitions.keys()))) if len(common_samples) == 0: # Nothing to do. There were only new samples, all of them moved to self. continue elif a_fk_to_proj_tbl == ObjectHeader: # Generated SQL looks like: # with upd_acq (src_id, dst_id) as (values (5,6), (7,8)) # update obj_head # set acquisid = coalesce((select dst_id from upd_acq where acquisid=src_id), acquisid) # where acquisid in (select src_id from upd_acq) if len(common_acquisitions) > 0: # Object must follow its acquisition acq_cte = values_cte( "upd_acq", ("src_id", "dst_id"), [(k, v) for k, v in common_acquisitions.items()]) acq_subqry = self.session.query(acq_cte.c.column2).filter( acq_cte.c.column1 == ObjectHeader.acquisid) upd_values = { 'acquisid': func.coalesce( acq_subqry.scalar_subquery(), # type:ignore ObjectHeader.acquisid) } upd = upd.filter(ObjectHeader.acquisid == any_( list(common_acquisitions.keys()))) if len(common_acquisitions) == 0: # Nothing to do. There were only new acquisitions, all of them moved to self. continue else: # For Particle project upd = upd.filter( ParticleProject.projid == self.src_prj_id) # type: ignore upd_values = {'projid': self.prj_id} rowcount = upd.update(values=upd_values, synchronize_session=False) table_name = a_fk_to_proj_tbl.__tablename__ # type: ignore logger.info("Update in %s: %s rows", table_name, rowcount) # Acquisition & twin Process have followed their enclosing Sample # Remove the parents which are duplicate from orig_id point of view for a_fk_to_proj_tbl in [Acquisition, Sample]: to_del: Query = self.session.query(a_fk_to_proj_tbl) if a_fk_to_proj_tbl == Acquisition: # Remove conflicting acquisitions, they should be empty? to_del = to_del.filter(Acquisition.acquisid == any_( list(common_acquisitions.keys()))) # type: ignore elif a_fk_to_proj_tbl == Sample: # Remove conflicting samples to_del = to_del.filter(Sample.sampleid == any_( list(common_samples.keys()))) # type: ignore rowcount = to_del.delete(synchronize_session=False) table_name = a_fk_to_proj_tbl.__tablename__ # type: ignore logger.info("Delete in %s: %s rows", table_name, rowcount) self.dest_augmented_mappings.write_to_project(dest_prj) ProjectPrivilegeBO.generous_merge_into(self.session, self.prj_id, self.src_prj_id) # Completely erase the source project ProjectBO.delete(self.session, self.src_prj_id)
def build_meta(self) -> Optional[EMLMeta]: """ Various queries/copies on/from the projects for getting metadata. """ ret = None the_collection: CollectionBO = CollectionBO(self.collection).enrich() identifier = EMLIdentifier(packageId=the_collection.external_id, system=the_collection.external_id_system) title = EMLTitle(title=the_collection.title) creators: List[EMLPerson] = [] for a_user in the_collection.creator_users: person, errs = self.user_to_eml_person( a_user, "creator '%s'" % a_user.name) if errs: self.warnings.extend(errs) else: assert person is not None creators.append(person) for an_org in the_collection.creator_organisations: creators.append(self.organisation_to_eml_person(an_org)) if len(creators) == 0: self.errors.append( "No valid data creator (user or organisation) found for EML metadata." ) contact, errs = self.user_to_eml_person(the_collection.contact_user, "contact") if contact is None: self.errors.append("No valid contact user found for EML metadata.") provider, errs = self.user_to_eml_person(the_collection.provider_user, "provider") if provider is None: self.errors.append( "No valid metadata provider user found for EML metadata.") associates: List[EMLAssociatedPerson] = [] for a_user in the_collection.associate_users: person, errs = self.user_to_eml_person( a_user, "associated person %d" % a_user.id) if errs: self.warnings.extend(errs) else: assert person is not None associates.append( self.eml_person_to_associated_person(person, "originator")) for an_org in the_collection.associate_organisations: # noinspection PyTypeChecker associates.append(self.organisation_to_eml_person(an_org)) # TODO if needed # EMLAssociatedPerson = EMLPerson + specific role # TODO: a marine regions substitute (min_lat, max_lat, min_lon, max_lon) = ProjectBO.get_bounding_geo(self.session, the_collection.project_ids) geo_cov = EMLGeoCoverage( geographicDescription="See coordinates", westBoundingCoordinate=self.geo_to_txt(min_lon), eastBoundingCoordinate=self.geo_to_txt(max_lon), northBoundingCoordinate=self.geo_to_txt(min_lat), southBoundingCoordinate=self.geo_to_txt(max_lat)) (min_date, max_date) = ProjectBO.get_date_range(self.session, the_collection.project_ids) time_cov = EMLTemporalCoverage(beginDate=timestamp_to_str(min_date), endDate=timestamp_to_str(max_date)) publication_date = date.today().isoformat() abstract = the_collection.abstract if not abstract: self.errors.append("Collection 'abstract' field is empty") elif len(abstract) < self.MIN_ABSTRACT_CHARS: self.errors.append( "Collection 'abstract' field is too short (%d chars) to make a good EMLMeta abstract. Minimum is %d" % (len(abstract), self.MIN_ABSTRACT_CHARS)) additional_info = None # Just to see if it goes thru QC # additional_info = """ marine, harvested by iOBIS. # The OOV supported the financial effort of the survey. # We are grateful to the crew of the research boat at OOV that collected plankton during the temporal survey.""" coll_license: LicenseEnum = cast(LicenseEnum, the_collection.license) if coll_license not in self.OK_LICENSES: self.errors.append( "Collection license should be one of %s to be accepted, not %s." % (self.OK_LICENSES, coll_license)) else: lic_url = DataLicense.EXPORT_EXPLANATIONS[ coll_license] + "legalcode" lic_txt = DataLicense.NAMES[coll_license] lic_txt = lic_txt.replace("International Public ", "") # ipt.gbif.org does not find the full license name, so adjust a bit version = "4.0" if version in lic_txt: lic_txt = lic_txt.replace( version, "(%s) " % DataLicense.SHORT_NAMES[coll_license] + version) licence = "This work is licensed under a <ulink url=\"%s\"><citetitle>%s</citetitle></ulink>." % ( lic_url, lic_txt) # Preferably one of https://www.emodnet-biology.eu/contribute?page=list&subject=thestdas&SpColID=552&showall=1#P keywords = EMLKeywordSet( keywords=[ "Plankton", "Imaging", "EcoTaxa" # Not in list above # "Ligurian sea" TODO: Geo area? # TODO: ZooProcess (from projects) ], keywordThesaurus="GBIF Dataset Type Vocabulary: " "http://rs.gbif.org/vocabulary/gbif/dataset_type.xml") taxo_cov = self.get_taxo_coverage(the_collection.project_ids) now = datetime.now().replace(microsecond=0) meta_plus = EMLAdditionalMeta(dateStamp=now.isoformat()) coll_title = the_collection.title info_url = "https://ecotaxa.obs-vlfr.fr/api/collections/by_title?q=%s" % quote_plus( coll_title) if len(self.errors) == 0: # The research project # noinspection PyUnboundLocalVariable # project = EMLProject(title=the_collection.title, # personnel=[]) # TODO: Unsure about duplicated information with metadata # noinspection PyUnboundLocalVariable ret = EMLMeta( identifier=identifier, titles=[title], creators=creators, contacts=[contact], metadataProviders=[provider], associatedParties=associates, pubDate=publication_date, abstract=[abstract], keywordSet=keywords, additionalInfo=additional_info, geographicCoverage=geo_cov, temporalCoverage=time_cov, taxonomicCoverage=taxo_cov, intellectualRights=licence, # project=project, maintenance="periodic review of origin data", maintenanceUpdateFrequency="unknown", # From XSD additionalMetadata=meta_plus, informationUrl=info_url) return ret