def drop_single_dataset_metadata(self, accession): """Drop all metadata entries associated with `accession` from `self.mongo_collections.metadata`""" collection = self.mongo_collections.metadata with collection.database.client.start_session() as session: with session.start_transaction(): run_mongo_action( "delete_many", collection, query={"id.accession": accession}, ) return "dropped", "removed from database", None
def drop_status(collection, prefix="Status update", accession=None, status=None, info=None, warning=None, error=None, **kwargs): """Drop all references to accession from `collection`""" query = {"accession": accession} run_mongo_action(action="delete_many", collection=collection, query=query) log_status(prefix, status, info, warning, error, query)
def update_metadata_value_lookup(mongo_collections, cacher_id, keys=("investigation", "study", "assay")): """Collect existing keys and values for lookups""" m = "{}:\n reindexing metadata lookup records ('{}')" GeneFabLogger.info(m.format(cacher_id, mongo_collections.metadata_aux.name)) index = deepcopy_keys(METADATA_AUX_TEMPLATE, *keys) INPLACE_update_metadata_value_lookup_keys(index, mongo_collections) INPLACE_update_metadata_value_lookup_values(index, mongo_collections) collection = mongo_collections.metadata_aux with collection.database.client.start_session() as session: with session.start_transaction(): for isa_category in index: for subkey in index[isa_category]: run_mongo_action( action="replace", collection=collection, query={"isa_category": isa_category, "subkey": subkey}, data={"content": index[isa_category][subkey]}, ) m = "{}:\n finished reindexing metadata lookup records ('{}')" GeneFabLogger.info(m.format(cacher_id, mongo_collections.metadata_aux.name))
def update_status(collection, prefix="Status update", report_type=None, accession=None, assay_name=None, sample_name=None, status=None, info=None, warning=None, error=None, **kwargs): """Update status of dataset (and, optionally, assay/sample) in `collection`, log with logger""" query = { "status": status, "report type": report_type or ("dataset status" if sample_name is None else "parser message"), "accession": accession, "assay name": assay_name, "sample name": sample_name, "info": info, "warning": warning, "error": None if (error is None) else type(error).__name__, "args": getattr(error, "args", []), "kwargs": kwargs, } run_mongo_action( action="replace", collection=collection, query=query, data={"report timestamp": int(datetime.now().timestamp())}, ) log_status(prefix, status, info, warning, error, query)
def __init__(self, identifier, collection, value): """Match existing documents by base64-encoded `value`, update if changed, report state in self.changed""" if not isinstance(identifier, dict): msg = "ValueCheckedRecord(): `identifier` is not a dictionary" raise GeneFabConfigurationException(msg, identifier=identifier) elif "base64value" in identifier: msg = "ValueCheckedRecord(): `identifier` uses a reserved key" _kw = dict(identifier=identifier, key="base64value") raise GeneFabConfigurationException(msg, **_kw) else: self.identifier, self.value = identifier, value try: dumped = dumps(value, sort_keys=True, default=funcdump) self.base64value = compress(encodebytes(dumped.encode())) except TypeError as e: msg, _erep = "ValueCheckedRecord(): TypeError", repr(e) _kw = dict(identifier=identifier, value=value, debug_info=_erep) raise GeneFabConfigurationException(msg, **_kw) else: self.changed, n_stale_entries = True, 0 for entry in collection.find(identifier): if entry["base64value"] == self.base64value: self.changed = False else: n_stale_entries += 1 if (n_stale_entries != 0) or self.changed: msg = f"ValueCheckedRecord updated:\n {identifier}" GeneFabLogger.info(msg) with collection.database.client.start_session() as session: with session.start_transaction(): run_mongo_action( "replace", collection, query=identifier, data={"base64value": self.base64value}, )