def __selectObjectIds(self, databaseName, collectionName, selectionQueryD): """Return a list of object identifiers for the input selection query.""" try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if mg.collectionExists(databaseName, collectionName): logger.info("%s %s document count is %d", databaseName, collectionName, mg.count(databaseName, collectionName)) qD = {} if selectionQueryD: qD.update(selectionQueryD) selectL = ["_id"] dL = mg.fetch(databaseName, collectionName, selectL, queryD=qD) logger.info("Selection %r fetch result count %d", selectL, len(dL)) except Exception as e: logger.exception("Failing with %s", str(e)) return dL
def getEntityIds(self, entryIdList): """ """ dbName = "pdbx_core" collectionName = "pdbx_core_polymer_entity" docD = {} try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if mg.collectionExists(dbName, collectionName): logger.info("%s %s document count is %d", dbName, collectionName, mg.count(dbName, collectionName)) for entryId in entryIdList: qD = { "rcsb_polymer_entity_container_identifiers.entry_id": entryId } selectL = ["rcsb_polymer_entity_container_identifiers"] tL = mg.fetch(dbName, collectionName, selectL, queryD=qD) # logger.debug("Selection %r fetch result count %d", selectL, len(tL)) docD[entryId] = [ vv["rcsb_polymer_entity_container_identifiers"] for vv in tL ] logger.debug("docD is %r", docD) except Exception as e: logger.exception("Failing with %s", str(e)) return docD
def __selectEntries(self, **kwargs): """Return a dictionary of PDB entries satifying the input conditions (e.g. method, resolution limit)""" dbName = kwargs.get("dbName", "pdbx_core") collectionName = kwargs.get("collectionName", "pdbx_core_entry") selectionQueryD = kwargs.get("entrySelectionQuery", {}) # entryD = {} try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if mg.collectionExists(dbName, collectionName): logger.info("%s %s document count is %d", dbName, collectionName, mg.count(dbName, collectionName)) qD = {} if selectionQueryD: qD.update(qD) selectL = ["rcsb_entry_container_identifiers"] dL = mg.fetch(dbName, collectionName, selectL, queryD=qD) logger.info("Selection %r fetch result count %d", selectL, len(dL)) # for dD in dL: # if ( ("rcsb_entry_container_identifiers" in dD) and ("entry_id" in dD["rcsb_entry_container_identifiers"]) and ("polymer_entity_ids" in dD["rcsb_entry_container_identifiers"]) and dD["rcsb_entry_container_identifiers"]["polymer_entity_ids"] ): entryD[dD["rcsb_entry_container_identifiers"]["entry_id"]] = {"polymer_entity_ids": dD["rcsb_entry_container_identifiers"]["polymer_entity_ids"]} except Exception as e: logger.exception("Failing with %s", str(e)) return entryD
def update(self, databaseName, collectionName, updateDL): """Update documents satisfying the selection details with the content of updateDL. Args: databaseName (str): Target database name collectionName (str): Target collection name updateDL = [{selectD: ..., updateD: ... }, ....] selectD = {'ky1': 'val1', 'ky2': 'val2', ...} updateD = {'key1.subkey1...': 'val1', 'key2.subkey2..': 'val2', ...} """ try: numUpdated = 0 with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if mg.collectionExists(databaseName, collectionName): logger.debug("%s %s document count is %d", databaseName, collectionName, mg.count(databaseName, collectionName)) for updateD in updateDL: num = mg.update(databaseName, collectionName, updateD["updateD"], updateD["selectD"], upsertFlag=True) numUpdated += num except Exception as e: logger.exception("Failing with %s", str(e)) return numUpdated
def getEntryInfo(self, **kwargs): """Return a dictionary of PDB entries satifying the input conditions (e.g. method, resolution limit)""" resLimit = kwargs.get("resLimit", 3.5) expMethod = kwargs.get("expMethod", "X-ray") # dbName = kwargs.get("dbName", "pdbx_core") collectionName = kwargs.get("collectionName", "pdbx_core_entry") # entryD = {} try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if mg.collectionExists(dbName, collectionName): logger.info("%s %s document count is %d", dbName, collectionName, mg.count(dbName, collectionName)) qD = { "rcsb_entry_info.experimental_method": expMethod, "refine.0.ls_d_res_high": { "$lte": resLimit } } selectL = [ "rcsb_entry_container_identifiers", "rcsb_entry_info", "refine" ] dL = mg.fetch(dbName, collectionName, selectL, queryD=qD) logger.info("Selection %r fetch result count %d", selectL, len(dL)) # for dV in dL: if "rcsb_entry_container_identifiers" not in dV: continue entryId = dV["rcsb_entry_container_identifiers"][ "entry_id"] entryD[entryId] = {} if "rcsb_entry_info" in dV and "polymer_composition" in dV[ "rcsb_entry_info"]: entryD[entryId] = { "polymer_composition": dV["rcsb_entry_info"]["polymer_composition"], "experimental_method": dV["rcsb_entry_info"]["experimental_method"], } if "refine" in dV and dV[ "refine"] and "ls_d_res_high" in dV["refine"][ 0]: entryD[entryId]["ls_d_res_high"] = dV["refine"][0][ "ls_d_res_high"] logger.debug("Got res %r", dV["refine"][0]["ls_d_res_high"]) except Exception as e: logger.exception("Failing with %s", str(e)) return entryD
def __transform(self, databaseName, collectionName, docSelectList, logIncrement=100): """Return a list of object identifiers for the input selection query.""" # ok = True try: self.__valInst = self.__getValidator(databaseName, collectionName, schemaLevel="full") with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if mg.collectionExists(databaseName, collectionName): numDoc = len(docSelectList) for ii, dD in enumerate(docSelectList, 1): if "_id" not in dD: continue rObj = mg.fetchOne(databaseName, collectionName, "_id", dD["_id"]) del rObj["_id"] # fOk = True if self.__oAdapt: self.__validateObj(databaseName, collectionName, rObj, label="Original") fOk, rObj = self.__oAdapt.filter(rObj) self.__validateObj(databaseName, collectionName, rObj, label="Updated") if fOk: rOk = mg.replace(databaseName, collectionName, rObj, dD) if rOk is None: tId = rObj[ "rcsb_id"] if rObj and "rcsb_id" in rObj else "anonymous" logger.error("%r %r (%r) failing", databaseName, collectionName, tId) # logger.info("rObj.keys() %r", list(rObj.keys())) # logger.info("rObj.items() %s", rObj.items()) rOk = False ok = ok and rOk # if ii % logIncrement == 0 or ii == numDoc: logger.info("Replace status %r object (%d of %d)", ok, ii, numDoc) # except Exception as e: logger.exception("Failing with %s", str(e)) return ok
def __removeCollection(self, dbName, collectionName): """Drop collection within database """ try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) # logger.debug("Remove collection database %s collection %s", dbName, collectionName) logger.debug("Starting databases = %r", mg.getDatabaseNames()) logger.debug("Starting collections = %r", mg.getCollectionNames(dbName)) ok = mg.dropCollection(dbName, collectionName) logger.debug("Databases = %r", mg.getDatabaseNames()) logger.debug("Post drop collections = %r", mg.getCollectionNames(dbName)) ok = mg.collectionExists(dbName, collectionName) logger.debug("Post drop collections = %r", mg.getCollectionNames(dbName)) return ok except Exception as e: logger.exception("Failing with %s", str(e)) return False
def count(self, databaseName, collectionName): try: numTotal = 0 with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if mg.collectionExists(databaseName, collectionName): numTotal = mg.count(databaseName, collectionName) except Exception as e: logger.exception("Failing with %s", str(e)) return numTotal
def testCreateDatabase(self): """Test case - create database -""" try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) ok = mg.createDatabase(self.__dbName) self.assertTrue(ok) ok = mg.createDatabase(self.__dbName) self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def __select(self, **kwargs): """Return a dictionary of object content satisfying the input conditions (e.g. method, resolution limit) and selection options. """ databaseName = kwargs.get("databaseName", "pdbx_core") collectionName = kwargs.get("collectionName", "pdbx_core_entry") selectionQueryD = kwargs.get("selectionQuery", {}) uniqueAttributes = kwargs.get("uniqueAttributes", ["rcsb_id"]) selectL = kwargs.get("selectionList", []) stripObjectId = kwargs.get("stripObjectId", False) # tV = kwargs.get("objectLimit", None) objLimit = int(tV) if tV is not None else None # objectD = {} try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if mg.collectionExists(databaseName, collectionName): logger.info("%s %s document count is %d", databaseName, collectionName, mg.count(databaseName, collectionName)) qD = {} if selectionQueryD: qD.update(selectionQueryD) dL = mg.fetch(databaseName, collectionName, selectL, queryD=qD, suppressId=True) logger.info("Selection %r fetch result count %d", selectL, len(dL)) # for ii, rObj in enumerate(dL, 1): stKey = ".".join([rObj[ky] for ky in uniqueAttributes]) if stripObjectId and rObj and "_id" in rObj: rObj.pop("_id") objectD[stKey] = copy.copy(rObj) if objLimit and ii >= objLimit: break # logger.debug("Saving %d %s", ii, stKey) # logger.debug("Current objectD keys %r", list(objectD.keys())) except Exception as e: logger.exception("Failing with %s", str(e)) return objectD
def testReplaceList(self): """Test case - create collection and insert document list - replace and upsert document list""" try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: nDocs = 10 mg = MongoDbUtil(client) ok = mg.createCollection(self.__dbName, self.__collectionName) self.assertTrue(ok) ok = mg.databaseExists(self.__dbName) self.assertTrue(ok) ok = mg.collectionExists(self.__dbName, self.__collectionName) self.assertTrue(ok) # dList = [] for ii in range(nDocs): dObj = self.__makeDataObj(2, 5, 5, ii) dList.append(dObj) # keyName = "DOC_ID" rIdL = mg.insertList(self.__dbName, self.__collectionName, dList, keyNames=[keyName], salvage=True) self.assertEqual(len(rIdL), len(dList)) # for ii, rId in enumerate(rIdL): rObj = mg.fetchOne(self.__dbName, self.__collectionName, "_id", rId) # logger.debug("Return Object %s", pprint.pformat(rObj)) self.assertEqual(len(dList[ii]), len(rObj)) self.assertEqual(dList[ii], rObj) # # Replace with 2x the list length - half are duplicates id's dList = [] for ii in range(nDocs + nDocs): dObj = self.__makeDataObj(4, 10, 10, ii) dList.append(dObj) # updL = mg.replaceList(self.__dbName, self.__collectionName, dList, ["DOC_ID"], upsertFlag=True) # logger.info("Upserted id list length %d", len(updL)) for ii in range(nDocs + nDocs): kVal = "DOC_%d" % ii rObj = mg.fetchOne(self.__dbName, self.__collectionName, "DOC_ID", kVal) if not rObj: logger.info("Failing to recover doc %s", kVal) # logger.debug("Return Object %s", pprint.pformat(rObj)) rObj.pop("_id", None) dList[ii].pop("_id", None) self.assertEqual(len(dList[ii]), len(rObj)) self.assertEqual(dList[ii], rObj) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testInsertList(self): """Test case - create collection and insert data -""" try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) ok = mg.createCollection(self.__dbName, self.__collectionName) self.assertTrue(ok) ok = mg.databaseExists(self.__dbName) self.assertTrue(ok) ok = mg.collectionExists(self.__dbName, self.__collectionName) self.assertTrue(ok) # dList = [] for ii in range(100): dList.append(self.__makeDataObj(2, 5, 5, ii)) # keyName = "DOC_ID" rIdL = mg.insertList(self.__dbName, self.__collectionName, dList, keyNames=[keyName], salvage=True) self.assertEqual(len(rIdL), len(dList)) # # Note that dObj is mutated by additional key '_id' that is added on insert - # for ii, rId in enumerate(rIdL): rObj = mg.fetchOne(self.__dbName, self.__collectionName, "_id", rId) logger.debug("Return Object %s", pprint.pformat(rObj)) jj = int(rObj["DOC_ID"][4:]) self.assertEqual(len(dList[jj]), len(rObj)) self.assertEqual(dList[jj], rObj) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testInsertSingle(self): """Test case - create collection and insert data - """ try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) ok = mg.createCollection(self.__dbName, self.__collectionName) self.assertTrue(ok) ok = mg.databaseExists(self.__dbName) self.assertTrue(ok) ok = mg.collectionExists(self.__dbName, self.__collectionName) self.assertTrue(ok) # dObj = self.__makeDataObj(2, 5, 5) rId = mg.insert(self.__dbName, self.__collectionName, dObj) self.assertTrue(rId is not None) # Note that dObj is mutated by additional key '_id' that is added on insert - # rObj = mg.fetchOne(self.__dbName, self.__collectionName, "_id", rId) logger.debug("Return Object %s", pprint.pformat(rObj)) self.assertEqual(len(dObj), len(rObj)) self.assertEqual(dObj, rObj) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def createCollection(self, databaseName, collectionName, indexAttributeNames=None, indexName="primary", checkExists=False, bsonSchema=None): """Create collection and optionally set index attributes for the named index and validation schema for a new collection. Args: databaseName (str): target database name collectionName (str): target collection name indexAttributeNames (list, optional): list of attribute names for the 'primary' index. Defaults to None. checkExists (bool, optional): reuse an existing collection if True. Defaults to False. bsonSchema (object, optional): BSON compatable validation schema. Defaults to None. Returns: (bool): True for success or False otherwise """ try: logger.debug("Create database %s collection %s", databaseName, collectionName) with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if checkExists and mg.databaseExists( databaseName) and mg.collectionExists( databaseName, collectionName): ok1 = True else: ok1 = mg.createCollection(databaseName, collectionName, bsonSchema=bsonSchema) ok2 = mg.databaseExists(databaseName) ok3 = mg.collectionExists(databaseName, collectionName) okI = True if indexAttributeNames: okI = mg.createIndex(databaseName, collectionName, indexAttributeNames, indexName=indexName, indexType="DESCENDING", uniqueFlag=False) return ok1 and ok2 and ok3 and okI # except Exception as e: logger.exception("Failing with %s", str(e)) return False
def delete(self, databaseName, collectionName, selectD): """Remove documents satisfying the input selection details. Args: databaseName (str): Target database name collectionName (str): Target collection name selectD = {'ky1': 'val1', 'ky2': 'val2', ...} """ try: numDeleted = 0 with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if mg.collectionExists(databaseName, collectionName): logger.info("%s %s document count is %d", databaseName, collectionName, mg.count(databaseName, collectionName)) numDeleted = mg.delete(databaseName, collectionName, selectD) except Exception as e: logger.exception("Failing with %s", str(e)) return numDeleted
def __selectObjects(self, **kwargs): """Return a dictionary of objects satisfying the input conditions (e.g. method, resolution limit)""" databaseName = kwargs.get("databaseName", "pdbx_core") collectionName = kwargs.get("collectionName", "pdbx_core_entry") selectionQueryD = kwargs.get("selectionQuery", {}) # uniqueAttributes = kwargs.get("uniqueAttributes", ["rcsb_id"]) # tV = kwargs.get("objectLimit", None) objLimit = int(tV) if tV is not None else None stripObjectId = kwargs.get("stripObjectId", False) logIncrement = kwargs.get("logIncrement", 10000) # objectD = {} try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if mg.collectionExists(databaseName, collectionName): logger.info("%s %s document count is %d", databaseName, collectionName, mg.count(databaseName, collectionName)) qD = {} if selectionQueryD: qD.update(selectionQueryD) selectL = ["_id"] dL = mg.fetch(databaseName, collectionName, selectL, queryD=qD) numDoc = len(dL) if dL else 0 logger.info("Selection %r fetch result count %d", selectL, numDoc) # for ii, dD in enumerate(dL, 1): if "_id" not in dD: continue rObj = mg.fetchOne(databaseName, collectionName, "_id", dD["_id"]) if stripObjectId and rObj and "_id" in rObj: rObj.pop("_id") else: rObj["_id"] = str(rObj["_id"]) # stKey = ".".join([rObj[ky] for ky in uniqueAttributes]) objectD[stKey] = copy.copy(rObj) if objLimit and ii >= objLimit: break logger.debug("Saving %d %s", ii, stKey) if ii % logIncrement == 0 or ii == numDoc: logger.info("Extracting object (%d of %d)", ii, numDoc) except Exception as e: logger.exception("Failing with %s", str(e)) return objectD
def testCreateCollectionDropDatabase(self): """Test case - create/drop collection -""" try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) ok = mg.createCollection(self.__dbName, self.__collectionName) self.assertTrue(ok) ok = mg.databaseExists(self.__dbName) self.assertTrue(ok) ok = mg.collectionExists(self.__dbName, self.__collectionName) self.assertTrue(ok) # ok = mg.dropDatabase(self.__dbName) self.assertTrue(ok) ok = mg.databaseExists(self.__dbName) self.assertFalse(ok) ok = mg.collectionExists(self.__dbName, self.__collectionName) self.assertFalse(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def __createCollection(self, dbName, collectionName, indexAttributeNames=None, checkExists=False, bsonSchema=None): """Create database and collection and optionally a primary index - """ try: logger.debug("Create database %s collection %s", dbName, collectionName) with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if checkExists and mg.databaseExists( dbName) and mg.collectionExists( dbName, collectionName): ok1 = True else: ok1 = mg.createCollection(dbName, collectionName, bsonSchema=bsonSchema) ok2 = mg.databaseExists(dbName) ok3 = mg.collectionExists(dbName, collectionName) okI = True if indexAttributeNames: okI = mg.createIndex(dbName, collectionName, indexAttributeNames, indexName="primary", indexType="DESCENDING", uniqueFlag=False) return ok1 and ok2 and ok3 and okI # except Exception as e: logger.exception("Failing with %s", str(e)) return False
def __selectPolymerEntities(self, entryD, **kwargs): """Skeleton entity selector recovering essential biological sequence mapping features for macromolecules (default type = protein). "1CP9": { "polymer_entity_ids": [ "1", "2" ], "selected_polymer_entities": { "1": { "rcsb_multiple_source_flag": "N", "asym_ids": [ "A" ], "auth_asym_ids": [ "A" ], "entity_id": "1", "type": "polypeptide(L)", "rcsb_entity_polymer_type": "Protein", "rcsb_entity_source_organism": [ { "ncbi_taxonomy_id": 587, "beg_seq_num": 1, "end_seq_num": 205, "ncbi_scientific_name": "Providencia rettgeri" } ], "struct_ref": [ { "id": "1", "db_name": "UNP", "pdbx_db_accession": "Q7WZI9", "entity_id": "1", "pdbx_seq_one_letter_code": "QSTQIKIERDNYGVPHIYANDTYSLFYGYGYA...", "alignD": { "A": [ { "align_id": "1", "ref_id": "1", "pdbx_PDB_id_code": "1CP9", "pdbx_strand_id": "A", "seq_align_beg": 1, "seq_align_end": 205, "pdbx_db_accession": "Q7WZI9", "db_align_beg": 24, "db_align_end": 228, "pdbx_auth_seq_align_beg": "1", "pdbx_auth_seq_align_end": "205", "rcsb_entity_id": "1" } ] } } ] }, "2": { "rcsb_multiple_source_flag": "N", "asym_ids": [ "B" ], "auth_asym_ids": [ "B" ], "entity_id": "2", "type": "polypeptide(L)", "rcsb_entity_polymer_type": "Protein", "rcsb_entity_source_organism": [ { "ncbi_taxonomy_id": 587, "beg_seq_num": 1, "end_seq_num": 553, "ncbi_scientific_name": "Providencia rettgeri" } ], "struct_ref": [ { "id": "2", "db_name": "UNP", "pdbx_db_accession": "Q7WZI9", "entity_id": "2", "pdbx_seq_one_letter_code": "SNVWLVGKTKASGAKAILLNGPQFGWFNPAYTYGIGLHG", "alignD": { "B": [ { "align_id": "2", "ref_id": "2", "pdbx_PDB_id_code": "1CP9", "pdbx_strand_id": "B", "seq_align_beg": 1, "seq_align_end": 553, "pdbx_db_accession": "Q7WZI9", "db_align_beg": 285, "db_align_end": 837, "pdbx_auth_seq_align_beg": "1", "pdbx_auth_seq_align_end": "553", "rcsb_entity_id": "2" } ] } } ] } } }, """ dbName = kwargs.get("dbName", "pdbx_core") collectionName = kwargs.get("collectionName", "pdbx_core_polymer_entity") resultKey = kwargs.get("resultKey", "selected_polymer_entities") entryLimit = kwargs.get("entryLimit", None) selectionQueryD = kwargs.get("entitySelectionQuery", {"entity_poly.rcsb_entity_polymer_type": "Protein"}) # try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if mg.collectionExists(dbName, collectionName): logger.info("%s %s document count is %d", dbName, collectionName, mg.count(dbName, collectionName)) selectL = [ "rcsb_polymer_entity_container_identifiers", "entity.rcsb_multiple_source_flag", "entity_poly.type", "entity_poly.rcsb_entity_polymer_type", "entity_poly.pdbx_seq_one_letter_code_can", "rcsb_entity_source_organism.ncbi_taxonomy_id", "rcsb_entity_source_organism.ncbi_scientific_name", "rcsb_entity_source_organism.beg_seq_num", "rcsb_entity_source_organism.end_seq_num", "struct_ref.id", "struct_ref.pdbx_db_accession", "struct_ref.db_name", "struct_ref.entity_id", "struct_ref.pdbx_seq_one_letter_code", "struct_ref.pdbx_align_begin", "struct_ref_seq", # "entity_src_nat.pdbx_ncbi_taxonomy_id", "entity_src_gen.pdbx_gene_src_ncbi_taxonomy_id", "entity_src_gen.pdbx_host_org_ncbi_taxonomy_id", "pdbx_entity_src_syn.ncbi_taxonomy_id", ] iCount = 0 for entryId in entryD: # if resultKey in entryD[entryId]: continue # qD = {"rcsb_polymer_entity_container_identifiers.entry_id": entryId} qD.update(selectionQueryD) # dL = mg.fetch(dbName, collectionName, selectL, queryD=qD) logger.debug("%s query %r fetch result count %d", entryId, qD, len(dL)) eD = {} for ii, dD in enumerate(dL, 1): rD = {} logger.debug("%s (%4d) d is %r", entryId, ii, dD) if "entity" in dD: rD["rcsb_multiple_source_flag"] = dD["entity"]["rcsb_multiple_source_flag"] if "rcsb_multiple_source_flag" in dD["entity"] else "N" # if "rcsb_polymer_entity_container_identifiers" in dD: rD["asym_ids"] = dD["rcsb_entity_container_identifiers"]["asym_ids"] if "asym_ids" in dD["rcsb_entity_container_identifiers"] else [] rD["auth_asym_ids"] = dD["rcsb_entity_container_identifiers"]["auth_asym_ids"] if "auth_asym_ids" in dD["rcsb_entity_container_identifiers"] else [] rD["entity_id"] = dD["rcsb_entity_container_identifiers"]["entity_id"] # if "entity_poly" in dD: rD["type"] = dD["entity_poly"]["type"] if "type" in dD["entity_poly"] else None rD["rcsb_entity_polymer_type"] = dD["entity_poly"]["rcsb_entity_polymer_type"] if "rcsb_entity_polymer_type" in dD["entity_poly"] else None rD["entity_polymer_length"] = len(dD["entity_poly"]["pdbx_seq_one_letter_code_can"]) if "pdbx_seq_one_letter_code_can" in dD["entity_poly"] else 0 # tL = [] if "rcsb_entity_source_organism" in dD: for tD in dD["rcsb_entity_source_organism"]: tL.append(tD) rD["rcsb_entity_source_organism"] = copy.copy(tL) # qDL = [] if "struct_ref" in dD: for tD in dD["struct_ref"]: if "db_name" in tD: tD["db_name"] = str(tD["db_name"]).upper().strip() tD["db_name"] = "UNP" if tD["db_name"] in ["TREMBL"] else tD["db_name"] qDL.append(tD) if "struct_ref_seq" in dD: for qD in qDL: refId = qD["id"] alignL = [] for tD in dD["struct_ref_seq"]: if refId == tD["ref_id"]: alignL.append(tD) # qD['align_list'] = copy.copy(aL) for align in alignL: authAsymId = align["pdbx_strand_id"] qD.setdefault("alignD", {}).setdefault(authAsymId, []).append(align) rD["struct_ref"] = qDL # taxIdL = [] if "entity_src_nat" in dD: for tD in dD["entity_src_nat"]: if "pdbx_ncbi_taxonomy_id" in tD: taxIdL.append(tD["pdbx_ncbi_taxonomy_id"]) if "entity_src_gen" in dD: for tD in dD["entity_src_gen"]: if "pdbx_gene_src_ncbi_taxonomy_id" in tD: taxIdL.append(tD["pdbx_gene_src_ncbi_taxonomy_id"]) if "pdbx_host_org_ncbi_taxonomy_id" in tD: taxIdL.append(tD["pdbx_host_org_ncbi_taxonomy_id"]) if "pdbx_entity_src_syn" in dD: for tD in dD["pdbx_entity_src_syn"]: if "ncbi_taxonomy_id" in tD: taxIdL.append(tD["ncbi_taxonomy_id"]) qL = [] for taxId in taxIdL: ttL = [int(t.strip()) for t in taxId.split(",") if t.strip().isdigit()] qL.extend(ttL) logger.debug("TaxId list %r", qL) rD["original_taxonomy_ids"] = copy.copy(list(set(qL))) # if "entity_id" in rD: eD[rD["entity_id"]] = copy.copy(rD) entryD[entryId][resultKey] = copy.copy(eD) iCount += 1 if iCount % 1000 == 0: logger.info("Completed fetch %d/%d entries", iCount, len(entryD)) if entryLimit and iCount >= entryLimit: logger.info("Quitting after %d", iCount) break except Exception as e: logger.exception("Failing with %s", str(e)) return entryD
def __loadDocuments(self, dbName, collectionName, docList, loadType="full", readBackCheck=False, keyNames=None): # # Load database/collection with input document list - # failList = [] rIdL = [] successList = [] logger.debug( "Loading dbName %s collectionName %s with document count %d keynames %r", dbName, collectionName, len(docList), keyNames) if keyNames: # map the document list to some document key if this is provided indD = {} indL = [] try: for ii, doc in enumerate(docList): dIdTup = self.__getKeyValues(doc, keyNames) indD[dIdTup] = ii indL = list(range(len(docList))) except Exception as e: logger.exception("Failing ii %d d %r with %s", ii, doc, str(e)) try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) # if loadType == "replace" and keyNames: dTupL = mg.deleteList(dbName, collectionName, docList, keyNames) logger.debug("Deleted document status %r", (dTupL, )) # rIdL = mg.insertList(dbName, collectionName, docList, keyNames=keyNames) logger.debug("Insert returns rIdL length %r", len(rIdL)) # --- # If there is a failure then determine the specific successes and failures - # successList = docList failList = [] if len(rIdL) != len(docList): if keyNames: successIndList = [] for rId in rIdL: rObj = mg.fetchOne(dbName, collectionName, "_id", rId) dIdTup = self.__getKeyValues(rObj, keyNames) successIndList.append(indD[dIdTup]) failIndList = list(set(indL) - set(successIndList)) failList = [docList[ii] for ii in failIndList] successList = [docList[ii] for ii in successIndList] else: # fail the whole batch if we don't have visibility into each document failList = docList successList = [] # rbStatus = True if readBackCheck and keyNames: # # Note that objects in docList are mutated by the insert operation with the additional key '_id', # hence, it is possible to compare the fetched object with the input object. # for ii, rId in enumerate(rIdL): rObj = mg.fetchOne(dbName, collectionName, "_id", rId) dIdTup = self.__getKeyValues(rObj, keyNames) jj = indD[dIdTup] if rObj != docList[jj]: rbStatus = False break # if readBackCheck and not rbStatus: return False, successList, failList # return len(rIdL) == len(docList), successList, failList except Exception as e: logger.exception("Failing %r %r (len=%d) %s with %s", dbName, collectionName, len(docList), keyNames, str(e)) return False, [], docList
def testSchemaValidation3(self): """Test case - create collection and insert data with schema validation (warn mode) (integrated schema assignment) """ try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if mg.databaseExists(self.__dbName): ok = mg.dropDatabase(self.__dbName) self.assertTrue(ok) # ok = mg.createDatabase(self.__dbName) self.assertTrue(ok) # ok = mg.createCollection(self.__dbName, self.__collectionName, overWrite=True, bsonSchema=self.__mongoSchema, validationAction="warn") self.assertTrue(ok) ok = mg.databaseExists(self.__dbName) self.assertTrue(ok) ok = mg.collectionExists(self.__dbName, self.__collectionName) self.assertTrue(ok) # dObj = {"x": 1} rId = mg.insert(self.__dbName, self.__collectionName, dObj) logger.info("rId is %r", rId) self.assertNotEqual(rId, None) # s2 = unescapeXmlCharRef( " " Φ Ψ α £ ℅ ☆ 𝕫" ) dObj = { "strField1": "test value", "strField2": s2, "intField1": 50, "enumField1": "v3a", "dblField1": 100.1 } rId = mg.insert(self.__dbName, self.__collectionName, dObj) self.assertNotEqual(rId, None) logger.info("rId is %r", rId) rObj = mg.fetchOne(self.__dbName, self.__collectionName, "_id", rId) logger.debug("Return Object %s", pprint.pformat(rObj)) self.assertEqual(len(dObj), len(rObj)) self.assertEqual(dObj, rObj) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testSchemaValidation1(self): """Test case - create collection and insert data with schema validation (ext. schema assignment) """ # Example of a Mongo flavor of JsonSchema vexpr = {"$jsonSchema": self.__mongoSchema} query = [("collMod", self.__collectionName), ("validator", vexpr), ("validationLevel", "moderate")] query = OrderedDict(query) try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if mg.databaseExists(self.__dbName): ok = mg.dropDatabase(self.__dbName) self.assertTrue(ok) # ok = mg.createDatabase(self.__dbName) self.assertTrue(ok) # ok = mg.createCollection(self.__dbName, self.__collectionName) self.assertTrue(ok) ok = mg.databaseExists(self.__dbName) self.assertTrue(ok) ok = mg.collectionExists(self.__dbName, self.__collectionName) self.assertTrue(ok) # mg.databaseCommand(self.__dbName, query) dObj = {"x": 1} rId = mg.insert(self.__dbName, self.__collectionName, dObj) logger.info("rId is %r", rId) self.assertEqual(rId, None) # s2 = unescapeXmlCharRef( " " Φ Ψ α £ ℅ ☆ 𝕫" ) dObj = { "strField1": "test value", "strField2": s2, "intField1": 50, "enumField1": "v3", "dblField1": 100.1 } rId = mg.insert(self.__dbName, self.__collectionName, dObj) logger.info("rId is %r", rId) rObj = mg.fetchOne(self.__dbName, self.__collectionName, "_id", rId) logger.debug("Return Object %s", pprint.pformat(rObj)) self.assertEqual(len(dObj), len(rObj)) self.assertEqual(dObj, rObj) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testSingleIndexSelect(self): """Test case - create collection, create simple single index, insert document list, read check documents. """ try: logger.debug("Starting testSingleIndexSelect") with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: nDocs = 100 mg = MongoDbUtil(client) ok = mg.createCollection(self.__dbName, self.__collectionName) self.assertTrue(ok) ok = mg.databaseExists(self.__dbName) self.assertTrue(ok) ok = mg.collectionExists(self.__dbName, self.__collectionName) self.assertTrue(ok) # # Create before insert ok = mg.createIndex(self.__dbName, self.__collectionName, keyList=["DOC_ID"], indexName="primary", indexType="DESCENDING", uniqueFlag=True) self.assertTrue(ok) dList = [] for ii in range(nDocs): dObj = self.__makeDataObj(2, 5, 5, ii) dList.append(dObj) # keyName = "DOC_ID" rIdL = mg.insertList(self.__dbName, self.__collectionName, dList, keyNames=[keyName], salvage=True) self.assertEqual(len(dList), len(rIdL)) # for ii in range(nDocs): kVal = "DOC_%d" % ii rObj = mg.fetchOne(self.__dbName, self.__collectionName, "DOC_ID", kVal) # logger.debug("Return Object %s" % pprint.pformat(rObj)) rObj.pop("_id", None) dList[ii].pop("_id", None) self.assertEqual(len(dList[ii]), len(rObj)) self.assertEqual(dList[ii], rObj) # ok = mg.dropIndex(self.__dbName, self.__collectionName, indexName="primary") self.assertTrue(ok) ok = mg.createIndex(self.__dbName, self.__collectionName, keyList=["DOC_ID"], indexName="primary", indexType="DESCENDING", uniqueFlag=True) self.assertTrue(ok) ok = mg.reIndex(self.__dbName, self.__collectionName) self.assertTrue(ok) # with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) ii = mg.count(self.__dbName, self.__collectionName) logger.debug("collection length %d", ii) # dList = mg.fetch(self.__dbName, self.__collectionName, ["DOC_ID"]) self.assertEqual(len(dList), nDocs) logger.debug("Fetch length %d", len(dList)) for ii, dD in enumerate(dList): logger.debug("Fetch num %d: %r", ii, dD) # dList = mg.fetch(self.__dbName, self.__collectionName, ["category_0.attribute_0"], queryD={"category_0.attribute_0": "val_0_0"}) self.assertEqual(len(dList), nDocs) logger.debug("Fetch length %d", len(dList)) for ii, dD in enumerate(dList): logger.debug("Fetch num %d: %r", ii, dD) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testReplaceSingle(self): """Test case - create collection and insert document and then replace document - """ try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) ok = mg.createCollection(self.__dbName, self.__collectionName) self.assertTrue(ok) ok = mg.databaseExists(self.__dbName) self.assertTrue(ok) ok = mg.collectionExists(self.__dbName, self.__collectionName) self.assertTrue(ok) # dObj = self.__makeDataObj(2, 5, 5, 1) rId = mg.insert(self.__dbName, self.__collectionName, dObj) self.assertTrue(rId is not None) # Note that dObj is mutated by additional key '_id' that is added on insert - # rObj = mg.fetchOne(self.__dbName, self.__collectionName, "_id", rId) logger.debug("Return Object %s", pprint.pformat(rObj)) self.assertEqual(len(dObj), len(rObj)) self.assertEqual(dObj, rObj) # # Now replace with a new document with the same document id dObj = self.__makeDataObj(3, 2, 2, 1) logger.debug("Replace Object %s", pprint.pformat(dObj)) rId = mg.replace(self.__dbName, self.__collectionName, dObj, {"DOC_ID": "DOC_1"}, upsertFlag=True) # self.assertTrue(rId is not None) rObj = mg.fetchOne(self.__dbName, self.__collectionName, "DOC_ID", "DOC_1") rObj.pop("_id", None) dObj.pop("_id", None) logger.debug("Return Object %s", pprint.pformat(rObj)) self.assertEqual(len(dObj), len(rObj)) self.assertEqual(dObj, rObj) # # Now replace with a new document with a different key dObj2 = self.__makeDataObj(5, 5, 5, 2) logger.debug("Replace Object %s", pprint.pformat(dObj)) # rId = mg.replace(self.__dbName, self.__collectionName, dObj2, {"DOC_ID": "DOC_2"}, upsertFlag=True) rObj = mg.fetchOne(self.__dbName, self.__collectionName, "DOC_ID", "DOC_2") rObj.pop("_id", None) dObj2.pop("_id", None) logger.debug("Return Object %s", pprint.pformat(rObj)) self.assertEqual(len(dObj2), len(rObj)) self.assertEqual(dObj2, rObj) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def getEntityInstances(self, entryD, **kwargs): """Get the selected validation data for the instances in the input entry dictionary. entryD[entryId]['selected_polymer_entities'][entityId]['validation'] = {} Add keys: 'pdbx_vrpt_instance_results' and 'pdbx_unobs_or_zero_occ_residues' to the validation dictionary above. Args: resourceName (str): resource name (e.g. DrugBank, CCDC) **kwargs: unused Returns: entryD: { } """ dbName = kwargs.get("dbName", "pdbx_core") collectionName = kwargs.get("collectionName", "pdbx_core_polymer_entity_instance") savePath = kwargs.get("savePath", "entry-data.pic") saveKwargs = kwargs.get("saveKwargs", {"fmt": "pickle"}) entryLimit = kwargs.get("entryLimit", None) # try: optF = False iCount = 0 with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if mg.collectionExists(dbName, collectionName): logger.info("%s %s total document count is %d", dbName, collectionName, mg.count(dbName, collectionName)) # for entryId, dV in entryD.items(): for entityId, peD in dV[ "selected_polymer_entities"].items(): # if 'anal_instances' in peD: # continue vD = {} for asymId in peD["asym_ids"]: qD = { "rcsb_polymer_entity_instance_container_identifiers.entry_id": entryId, "rcsb_polymer_entity_instance_container_identifiers.asym_id": asymId, } # qD = {'rcsb_entity_instance_container_validation_identifiers.entity_type': 'polymer'} # selectL = ['pdbx_vrpt_instance_results', 'pdbx_unobs_or_zero_occ_residues'] selectL = ["pdbx_vrpt_instance_results"] tL = mg.fetch(dbName, collectionName, selectL, queryD=qD) dV = {} if not tL: logger.info( "No validation data for %s %s %s(%s)", dbName, collectionName, entryId, asymId) continue # logger.debug( ">>> %s %s (%s) dict key length %d ", collectionName, entryId, asymId, len(tL[0])) # if optF: dV["pdbx_vrpt_instance_results"] = tL[0][ "pdbx_vrpt_instance_results"] if "pdbx_vrpt_instance_results" in tL[ 0] else [] dV["pdbx_unobs_or_zero_occ_residues"] = tL[0][ "pdbx_unobs_or_zero_occ_residues"] if "pdbx_unobs_or_zero_occ_residues" in tL[ 0] else [] # if optF: urdL = tL[0][ "pdbx_unobs_or_zero_occ_residues"] if "pdbx_unobs_or_zero_occ_residues" in tL[ 0] else [] oL = [{ "label_seq_id": urd["label_seq_id"], "label_comp_id": urd["label_comp_id"] } for urd in urdL] dV["pdbx_unobs_or_zero_occ_residues"] = oL # try: irdL = tL[0][ "pdbx_vrpt_instance_results"] if "pdbx_vrpt_instance_results" in tL[ 0] else [] oL = [{ "label_seq_id": ird["label_seq_id"], "label_comp_id": ird["label_comp_id"] } for ird in irdL] dV["pdbx_vrpt_instance_results_seq"] = oL except Exception as e: logger.error( "Failing with entryId %s entityId %s asymId %s bad validation data %s", entryId, entityId, asymId, str(e)) # try: irdL = tL[0][ "pdbx_vrpt_instance_results"] if "pdbx_vrpt_instance_results" in tL[ 0] else [] oL = [{ "OWAB": ird["OWAB"], "label_seq_id": ird["label_seq_id"], "label_comp_id": ird["label_comp_id"] } for ird in irdL] dV["pdbx_vrpt_instance_results_occ"] = oL except Exception as e: logger.debug( "Failing with entryId %s entityId %s asymId %s bad validation data %s", entryId, entityId, asymId, str(e)) vD[asymId] = copy.copy(dV) # analD = self.analEntity(entryId, peD, vD) entryD[entryId]["selected_polymer_entities"][ entityId]["anal_instances"] = copy.copy(analD) iCount += 1 if iCount % 500 == 0: logger.info("Completed %d/%d entries", iCount, len(entryD)) if iCount % 2000 == 0: ok = self.__mU.doExport(savePath, entryD, **saveKwargs) logger.info( "Saved polymer entity instance results (%d) status %r in %s", iCount, ok, savePath) if entryLimit and iCount >= entryLimit: break ok = self.__mU.doExport(savePath, entryD, **saveKwargs) logger.info( "Saved polymer instance results (%d) entries %d status %r in %s", iCount, len(entryD), ok, savePath) except Exception as e: logger.exception("Failing with %s", str(e)) return entryD
def testCreateDropCollection(self): """Test case - create/drop collection - """ try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) ok = mg.createCollection(self.__dbName, self.__collectionName) self.assertTrue(ok) ok = mg.databaseExists(self.__dbName) self.assertTrue(ok) ok = mg.collectionExists(self.__dbName, self.__collectionName) self.assertTrue(ok) # logger.debug("Databases = %r", mg.getDatabaseNames()) logger.debug("Collections = %r", mg.getCollectionNames(self.__dbName)) ok = mg.dropCollection(self.__dbName, self.__collectionName) self.assertTrue(ok) logger.debug("Databases = %r", mg.getDatabaseNames()) logger.debug("Collections = %r", mg.getCollectionNames(self.__dbName)) # Removing the last collection will remove the database (results appear differ between mac and linux - ) ok = mg.databaseExists(self.__dbName) # self.assertFalse(ok) # ok = mg.collectionExists(self.__dbName, self.__collectionName) self.assertFalse(ok) logger.debug("Collections = %r", mg.getCollectionNames(self.__dbName)) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def getPolymerEntities(self, entryD, **kwargs): """Add 'selected_polymer_entities' satisfying the input contiditions and add this to the input entry dictionary.""" dbName = kwargs.get("dbName", "pdbx_core") collectionName = kwargs.get("collectionName", "pdbx_core_polymer_entity") resultKey = kwargs.get("resultKey", "selected_polymer_entities") savePath = kwargs.get("savePath", "entry-data.pic") entryLimit = kwargs.get("entryLimit", None) saveKwargs = kwargs.get("saveKwargs", {"fmt": "pickle"}) # try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if mg.collectionExists(dbName, collectionName): logger.info("%s %s document count is %d", dbName, collectionName, mg.count(dbName, collectionName)) selectL = [ "rcsb_polymer_entity_container_identifiers", "entity_poly.type", "entity_poly.pdbx_seq_one_letter_code_can", "rcsb_entity_source_organism.ncbi_taxonomy_id", "rcsb_entity_source_organism.ncbi_scientific_name", "struct_ref.pdbx_seq_one_letter_code", "struct_ref.pdbx_db_accession", "struct_ref.db_name", "struct_ref.entity_id", ] iCount = 0 for entryId in entryD: # if resultKey in entryD[entryId]: continue # qD = { "rcsb_polymer_entity_container_identifiers.entry_id": entryId, "entity_poly.rcsb_entity_polymer_type": "Protein", "entity.rcsb_multiple_source_flag": "N", } # dL = mg.fetch(dbName, collectionName, selectL, queryD=qD) logger.debug("%s query %r fetch result count %d", entryId, qD, len(dL)) eD = {} for ii, dV in enumerate(dL, 1): rD = {} logger.debug("%s (%4d) d is %r", entryId, ii, dV) if "rcsb_polymer_entity_container_identifiers" in dV and "asym_ids" in dV[ "rcsb_polymer_entity_container_identifiers"]: rD["asym_ids"] = dV[ "rcsb_polymer_entity_container_identifiers"][ "asym_ids"] rD["entity_id"] = dV[ "rcsb_polymer_entity_container_identifiers"][ "entity_id"] if "entity_poly" in dV and "type" in dV[ "entity_poly"]: rD["type"] = dV["entity_poly"]["type"] rD["seq_one_letter_code_can"] = dV[ "entity_poly"][ "pdbx_seq_one_letter_code_can"] if "rcsb_entity_source_organism" in dV: rD["ncbi_taxonomy_id"] = dV[ "rcsb_entity_source_organism"][0][ "ncbi_taxonomy_id"] if "ncbi_taxonomy_id" in dV[ "rcsb_entity_source_organism"][ 0] else None rD["ncbi_scientific_name"] = ( dV["rcsb_entity_source_organism"][0] ["ncbi_scientific_name"] if "ncbi_scientific_name" in dV["rcsb_entity_source_organism"][0] else None) if "struct_ref" in dV and len( dV["struct_ref"]) == 1: rD["seq_one_letter_code_ref"] = dV["struct_ref"][ 0]["pdbx_seq_one_letter_code"] if "pdbx_seq_one_letter_code" in dV[ "struct_ref"][0] else None rD["db_accession"] = dV["struct_ref"][0][ "pdbx_db_accession"] if "pdbx_db_accession" in dV[ "struct_ref"][0] else None rD["db_name"] = dV["struct_ref"][0][ "db_name"] if "db_name" in dV[ "struct_ref"][0] else None # refDbName = rD["db_name"] dbAccession = rD["db_accession"] dbRefSeq = self.__seqCache[ dbAccession] if dbAccession in self.__seqCache else None if refDbName in ["UNP"] and not dbRefSeq: dbRefSeq = self.__fetchUniprot(dbAccession) self.__seqCache[dbAccession] = dbRefSeq logger.debug("Fetch uniprot %r", dbRefSeq) rD["ref_db_seq"] = dbRefSeq else: rD["seq_one_letter_code_ref"] = rD[ "db_accession"] = rD["db_name"] = None # if "entity_id" in rD: eD[rD["entity_id"]] = copy.copy(rD) entryD[entryId][resultKey] = copy.copy(eD) iCount += 1 if iCount % 10 == 0: logger.info( "Completed polymer entities fetch %d/%d entries", iCount, len(entryD)) if iCount % 2000 == 0: ok = self.__mU.doExport(savePath, entryD, **saveKwargs) logger.info( "Saved polymer entity results (%d) status %r in %s", iCount, ok, savePath) if entryLimit and iCount >= entryLimit: logger.info("Quitting after %d", iCount) break # # for entryId in entryD: # logger.debug(">> %s docD %r" % (entryId, entryD[entryId])) ok = self.__mU.doExport(savePath, entryD, **saveKwargs) logger.info( "Saved polymer entity results (%d) entries %d status %r in %s", iCount, len(entryD), ok, savePath) except Exception as e: logger.exception("Failing with %s", str(e)) return entryD