def __call__(backend="git", root_metadata_store=None, path=None): root_metadata_store = root_metadata_store or "." path_realm_associations = process_separated_path_spec(path) # TODO: we should read-lock all ag_realms # Collect aggregate information aggregate_items = [] for ag_path, ag_metadata_store in path_realm_associations: ag_tree_version_list, ag_uuid_set = get_top_level_metadata_objects( backend, ag_metadata_store) if ag_tree_version_list is None or ag_uuid_set is None: message = (f"No {backend}-mapped datalad metadata model " f"found in: {ag_metadata_store}, ignoring metadata" f"location {ag_metadata_store} (and sub-dataset " f"{ag_path}).") lgr.warning(message) yield dict(backend=backend, realm=root_metadata_store, status='error', message=message) continue aggregate_items.append( AggregateItem(ag_tree_version_list, ag_uuid_set, ag_path)) lock_backend(root_metadata_store) tree_version_list, uuid_set = get_top_level_metadata_objects( backend, root_metadata_store) if tree_version_list is None: lgr.warning( f"no tree version list found in {root_metadata_store}, " f"creating an empty tree version list") tree_version_list = TreeVersionList(backend, root_metadata_store) if uuid_set is None: lgr.warning(f"no uuid set found in {root_metadata_store}, " f"creating an empty set") uuid_set = UUIDSet(backend, root_metadata_store) perform_aggregation(root_metadata_store, tree_version_list, uuid_set, aggregate_items) tree_version_list.save() uuid_set.save() flush_object_references(root_metadata_store) unlock_backend(root_metadata_store) yield dict(action="meta_aggregate", status='ok', backend=backend, metadata_store=root_metadata_store, message="aggregation performed") return
def add_dataset_metadata(metadata_store: Path, ap: AddParameter): realm = str(metadata_store) lock_backend(realm) tree_version_list, uuid_set, mrr = _get_top_nodes(realm, ap) dataset_level_metadata = mrr.get_dataset_level_metadata() if dataset_level_metadata is None: dataset_level_metadata = Metadata(default_mapper_family, realm) mrr.set_dataset_level_metadata(dataset_level_metadata) add_metadata_content(dataset_level_metadata, ap) tree_version_list.save() uuid_set.save() flush_object_references(realm) unlock_backend(realm) yield { "status": "ok", "action": "add", "type": "dataset", "message": "added dataset metadata" } return
def add_file_metadata(metadata_store: Path, ap: AddParameter): realm = str(metadata_store) lock_backend(realm) tree_version_list, uuid_set, mrr = _get_top_nodes(realm, ap) file_tree = mrr.get_file_tree() if file_tree is None: file_tree = FileTree(default_mapper_family, realm) mrr.set_file_tree(file_tree) if ap.file_path in file_tree: file_level_metadata = file_tree.get_metadata(ap.file_path) else: file_level_metadata = Metadata(default_mapper_family, realm) file_tree.add_metadata(ap.file_path, file_level_metadata) add_metadata_content(file_level_metadata, ap) tree_version_list.save() uuid_set.save() flush_object_references(realm) unlock_backend(realm) yield { "status": "ok", "action": "add", "type": "file", "message": "added file metadata" } return
def add_file_metadata_source(ep: ExtractionParameter, result: ExtractorResult, metadata_source: dict): lock_backend(ep.realm.path) tree_version_list, uuid_set, mrr = get_top_nodes_and_mrr(ep) file_tree = mrr.get_file_tree() if file_tree is None: file_tree = FileTree(default_mapper_family, ep.realm.path) mrr.set_file_tree(file_tree) if ep.file_tree_path in file_tree: metadata = file_tree.get_metadata(ep.file_tree_path) else: metadata = Metadata(default_mapper_family, ep.realm.path) file_tree.add_metadata(ep.file_tree_path, metadata) metadata.add_extractor_run( time.time(), ep.extractor_name, ep.agent_name, ep.agent_email, ExtractorConfiguration(result.extractor_version, result.extraction_parameter), metadata_source) tree_version_list.save() uuid_set.save() flush_object_references(ep.realm.path) unlock_backend(ep.realm.path)
def add_dataset_metadata_source(ep: ExtractionParameter, result: ExtractorResult, metadata_source: dict): lock_backend(ep.realm.path) tree_version_list, uuid_set, mrr = get_top_nodes_and_mrr(ep) dataset_level_metadata = mrr.get_dataset_level_metadata() if dataset_level_metadata is None: dataset_level_metadata = Metadata(default_mapper_family, ep.realm.path) mrr.set_dataset_level_metadata(dataset_level_metadata) dataset_level_metadata.add_extractor_run( time.time(), ep.extractor_name, ep.agent_name, ep.agent_email, ExtractorConfiguration(result.extractor_version, result.extraction_parameter), metadata_source) tree_version_list.save() uuid_set.save() flush_object_references(ep.realm.path) unlock_backend(ep.realm.path)