def testReplaceList(self): """Test case - create collection and insert document list - replace and upsert document list""" try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: nDocs = 10 mg = MongoDbUtil(client) ok = mg.createCollection(self.__dbName, self.__collectionName) self.assertTrue(ok) ok = mg.databaseExists(self.__dbName) self.assertTrue(ok) ok = mg.collectionExists(self.__dbName, self.__collectionName) self.assertTrue(ok) # dList = [] for ii in range(nDocs): dObj = self.__makeDataObj(2, 5, 5, ii) dList.append(dObj) # keyName = "DOC_ID" rIdL = mg.insertList(self.__dbName, self.__collectionName, dList, keyNames=[keyName], salvage=True) self.assertEqual(len(rIdL), len(dList)) # for ii, rId in enumerate(rIdL): rObj = mg.fetchOne(self.__dbName, self.__collectionName, "_id", rId) # logger.debug("Return Object %s", pprint.pformat(rObj)) self.assertEqual(len(dList[ii]), len(rObj)) self.assertEqual(dList[ii], rObj) # # Replace with 2x the list length - half are duplicates id's dList = [] for ii in range(nDocs + nDocs): dObj = self.__makeDataObj(4, 10, 10, ii) dList.append(dObj) # updL = mg.replaceList(self.__dbName, self.__collectionName, dList, ["DOC_ID"], upsertFlag=True) # logger.info("Upserted id list length %d", len(updL)) for ii in range(nDocs + nDocs): kVal = "DOC_%d" % ii rObj = mg.fetchOne(self.__dbName, self.__collectionName, "DOC_ID", kVal) if not rObj: logger.info("Failing to recover doc %s", kVal) # logger.debug("Return Object %s", pprint.pformat(rObj)) rObj.pop("_id", None) dList[ii].pop("_id", None) self.assertEqual(len(dList[ii]), len(rObj)) self.assertEqual(dList[ii], rObj) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testReplaceSingle(self): """Test case - create collection and insert document and then replace document -""" try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) ok = mg.createCollection(self.__dbName, self.__collectionName) self.assertTrue(ok) ok = mg.databaseExists(self.__dbName) self.assertTrue(ok) ok = mg.collectionExists(self.__dbName, self.__collectionName) self.assertTrue(ok) # dObj = self.__makeDataObj(2, 5, 5, 1) rId = mg.insert(self.__dbName, self.__collectionName, dObj) self.assertTrue(rId is not None) # Note that dObj is mutated by additional key '_id' that is added on insert - # rObj = mg.fetchOne(self.__dbName, self.__collectionName, "_id", rId) logger.debug("Return Object %s", pprint.pformat(rObj)) self.assertEqual(len(dObj), len(rObj)) self.assertEqual(dObj, rObj) # # Now replace with a new document with the same document id dObj = self.__makeDataObj(3, 2, 2, 1) logger.debug("Replace Object %s", pprint.pformat(dObj)) rId = mg.replace(self.__dbName, self.__collectionName, dObj, {"DOC_ID": "DOC_1"}, upsertFlag=True) # self.assertTrue(rId is not None) rObj = mg.fetchOne(self.__dbName, self.__collectionName, "DOC_ID", "DOC_1") rObj.pop("_id", None) dObj.pop("_id", None) logger.debug("Return Object %s", pprint.pformat(rObj)) self.assertEqual(len(dObj), len(rObj)) self.assertEqual(dObj, rObj) # # Now replace with a new document with a different key dObj2 = self.__makeDataObj(5, 5, 5, 2) logger.debug("Replace Object %s", pprint.pformat(dObj)) # rId = mg.replace(self.__dbName, self.__collectionName, dObj2, {"DOC_ID": "DOC_2"}, upsertFlag=True) rObj = mg.fetchOne(self.__dbName, self.__collectionName, "DOC_ID", "DOC_2") rObj.pop("_id", None) dObj2.pop("_id", None) logger.debug("Return Object %s", pprint.pformat(rObj)) self.assertEqual(len(dObj2), len(rObj)) self.assertEqual(dObj2, rObj) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testInsertSingle(self): """Test case - create collection and insert data - """ try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) ok = mg.createCollection(self.__dbName, self.__collectionName) self.assertTrue(ok) ok = mg.databaseExists(self.__dbName) self.assertTrue(ok) ok = mg.collectionExists(self.__dbName, self.__collectionName) self.assertTrue(ok) # dObj = self.__makeDataObj(2, 5, 5) rId = mg.insert(self.__dbName, self.__collectionName, dObj) self.assertTrue(rId is not None) # Note that dObj is mutated by additional key '_id' that is added on insert - # rObj = mg.fetchOne(self.__dbName, self.__collectionName, "_id", rId) logger.debug("Return Object %s", pprint.pformat(rObj)) self.assertEqual(len(dObj), len(rObj)) self.assertEqual(dObj, rObj) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testInsertList(self): """Test case - create collection and insert data -""" try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) ok = mg.createCollection(self.__dbName, self.__collectionName) self.assertTrue(ok) ok = mg.databaseExists(self.__dbName) self.assertTrue(ok) ok = mg.collectionExists(self.__dbName, self.__collectionName) self.assertTrue(ok) # dList = [] for ii in range(100): dList.append(self.__makeDataObj(2, 5, 5, ii)) # keyName = "DOC_ID" rIdL = mg.insertList(self.__dbName, self.__collectionName, dList, keyNames=[keyName], salvage=True) self.assertEqual(len(rIdL), len(dList)) # # Note that dObj is mutated by additional key '_id' that is added on insert - # for ii, rId in enumerate(rIdL): rObj = mg.fetchOne(self.__dbName, self.__collectionName, "_id", rId) logger.debug("Return Object %s", pprint.pformat(rObj)) jj = int(rObj["DOC_ID"][4:]) self.assertEqual(len(dList[jj]), len(rObj)) self.assertEqual(dList[jj], rObj) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testSchemaValidation1(self): """Test case - create collection and insert data with schema validation (ext. schema assignment) """ # Example of a Mongo flavor of JsonSchema vexpr = {"$jsonSchema": self.__mongoSchema} query = [("collMod", self.__collectionName), ("validator", vexpr), ("validationLevel", "moderate")] query = OrderedDict(query) try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if mg.databaseExists(self.__dbName): ok = mg.dropDatabase(self.__dbName) self.assertTrue(ok) # ok = mg.createDatabase(self.__dbName) self.assertTrue(ok) # ok = mg.createCollection(self.__dbName, self.__collectionName) self.assertTrue(ok) ok = mg.databaseExists(self.__dbName) self.assertTrue(ok) ok = mg.collectionExists(self.__dbName, self.__collectionName) self.assertTrue(ok) # mg.databaseCommand(self.__dbName, query) dObj = {"x": 1} rId = mg.insert(self.__dbName, self.__collectionName, dObj) logger.info("rId is %r", rId) self.assertEqual(rId, None) # s2 = unescapeXmlCharRef( " " Φ Ψ α £ ℅ ☆ 𝕫" ) dObj = { "strField1": "test value", "strField2": s2, "intField1": 50, "enumField1": "v3", "dblField1": 100.1 } rId = mg.insert(self.__dbName, self.__collectionName, dObj) logger.info("rId is %r", rId) rObj = mg.fetchOne(self.__dbName, self.__collectionName, "_id", rId) logger.debug("Return Object %s", pprint.pformat(rObj)) self.assertEqual(len(dObj), len(rObj)) self.assertEqual(dObj, rObj) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def __transform(self, databaseName, collectionName, docSelectList, logIncrement=100): """Return a list of object identifiers for the input selection query.""" # ok = True try: self.__valInst = self.__getValidator(databaseName, collectionName, schemaLevel="full") with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if mg.collectionExists(databaseName, collectionName): numDoc = len(docSelectList) for ii, dD in enumerate(docSelectList, 1): if "_id" not in dD: continue rObj = mg.fetchOne(databaseName, collectionName, "_id", dD["_id"]) del rObj["_id"] # fOk = True if self.__oAdapt: self.__validateObj(databaseName, collectionName, rObj, label="Original") fOk, rObj = self.__oAdapt.filter(rObj) self.__validateObj(databaseName, collectionName, rObj, label="Updated") if fOk: rOk = mg.replace(databaseName, collectionName, rObj, dD) if rOk is None: tId = rObj[ "rcsb_id"] if rObj and "rcsb_id" in rObj else "anonymous" logger.error("%r %r (%r) failing", databaseName, collectionName, tId) # logger.info("rObj.keys() %r", list(rObj.keys())) # logger.info("rObj.items() %s", rObj.items()) rOk = False ok = ok and rOk # if ii % logIncrement == 0 or ii == numDoc: logger.info("Replace status %r object (%d of %d)", ok, ii, numDoc) # except Exception as e: logger.exception("Failing with %s", str(e)) return ok
def __selectObjects(self, **kwargs): """Return a dictionary of objects satisfying the input conditions (e.g. method, resolution limit)""" databaseName = kwargs.get("databaseName", "pdbx_core") collectionName = kwargs.get("collectionName", "pdbx_core_entry") selectionQueryD = kwargs.get("selectionQuery", {}) # uniqueAttributes = kwargs.get("uniqueAttributes", ["rcsb_id"]) # tV = kwargs.get("objectLimit", None) objLimit = int(tV) if tV is not None else None stripObjectId = kwargs.get("stripObjectId", False) logIncrement = kwargs.get("logIncrement", 10000) # objectD = {} try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if mg.collectionExists(databaseName, collectionName): logger.info("%s %s document count is %d", databaseName, collectionName, mg.count(databaseName, collectionName)) qD = {} if selectionQueryD: qD.update(selectionQueryD) selectL = ["_id"] dL = mg.fetch(databaseName, collectionName, selectL, queryD=qD) numDoc = len(dL) if dL else 0 logger.info("Selection %r fetch result count %d", selectL, numDoc) # for ii, dD in enumerate(dL, 1): if "_id" not in dD: continue rObj = mg.fetchOne(databaseName, collectionName, "_id", dD["_id"]) if stripObjectId and rObj and "_id" in rObj: rObj.pop("_id") else: rObj["_id"] = str(rObj["_id"]) # stKey = ".".join([rObj[ky] for ky in uniqueAttributes]) objectD[stKey] = copy.copy(rObj) if objLimit and ii >= objLimit: break logger.debug("Saving %d %s", ii, stKey) if ii % logIncrement == 0 or ii == numDoc: logger.info("Extracting object (%d of %d)", ii, numDoc) except Exception as e: logger.exception("Failing with %s", str(e)) return objectD
def testSchemaValidation3(self): """Test case - create collection and insert data with schema validation (warn mode) (integrated schema assignment) """ try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) if mg.databaseExists(self.__dbName): ok = mg.dropDatabase(self.__dbName) self.assertTrue(ok) # ok = mg.createDatabase(self.__dbName) self.assertTrue(ok) # ok = mg.createCollection(self.__dbName, self.__collectionName, overWrite=True, bsonSchema=self.__mongoSchema, validationAction="warn") self.assertTrue(ok) ok = mg.databaseExists(self.__dbName) self.assertTrue(ok) ok = mg.collectionExists(self.__dbName, self.__collectionName) self.assertTrue(ok) # dObj = {"x": 1} rId = mg.insert(self.__dbName, self.__collectionName, dObj) logger.info("rId is %r", rId) self.assertNotEqual(rId, None) # s2 = unescapeXmlCharRef( " " Φ Ψ α £ ℅ ☆ 𝕫" ) dObj = { "strField1": "test value", "strField2": s2, "intField1": 50, "enumField1": "v3a", "dblField1": 100.1 } rId = mg.insert(self.__dbName, self.__collectionName, dObj) self.assertNotEqual(rId, None) logger.info("rId is %r", rId) rObj = mg.fetchOne(self.__dbName, self.__collectionName, "_id", rId) logger.debug("Return Object %s", pprint.pformat(rObj)) self.assertEqual(len(dObj), len(rObj)) self.assertEqual(dObj, rObj) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testSingleIndex(self): """Test case - create collection, create simple single index, insert document list, read check documents""" try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: nDocs = 100 mg = MongoDbUtil(client) ok = mg.createCollection(self.__dbName, self.__collectionName) self.assertTrue(ok) ok = mg.databaseExists(self.__dbName) self.assertTrue(ok) ok = mg.collectionExists(self.__dbName, self.__collectionName) self.assertTrue(ok) # # Create before insert ok = mg.createIndex(self.__dbName, self.__collectionName, keyList=["DOC_ID"], indexName="primary", indexType="DESCENDING", uniqueFlag=True) self.assertTrue(ok) dList = [] for ii in range(nDocs): dObj = self.__makeDataObj(2, 5, 5, ii) dList.append(dObj) # keyName = "DOC_ID" rIdL = mg.insertList(self.__dbName, self.__collectionName, dList, keyNames=[keyName], salvage=True) self.assertEqual(len(dList), len(rIdL)) # for ii in range(nDocs): kVal = "DOC_%d" % ii rObj = mg.fetchOne(self.__dbName, self.__collectionName, "DOC_ID", kVal) # logger.debug("Return Object %s" % pprint.pformat(rObj)) rObj.pop("_id", None) dList[ii].pop("_id", None) self.assertEqual(len(dList[ii]), len(rObj)) self.assertEqual(dList[ii], rObj) # ok = mg.dropIndex(self.__dbName, self.__collectionName, indexName="primary") self.assertTrue(ok) ok = mg.createIndex(self.__dbName, self.__collectionName, keyList=["DOC_ID"], indexName="primary", indexType="DESCENDING", uniqueFlag=True) self.assertTrue(ok) ok = mg.reIndex(self.__dbName, self.__collectionName) self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testSingleIndexSelect(self): """Test case - create collection, create simple single index, insert document list, read check documents.""" try: logger.debug("Starting testSingleIndexSelect") with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: nDocs = 100 mg = MongoDbUtil(client) ok = mg.createCollection(self.__dbName, self.__collectionName) self.assertTrue(ok) ok = mg.databaseExists(self.__dbName) self.assertTrue(ok) ok = mg.collectionExists(self.__dbName, self.__collectionName) self.assertTrue(ok) # # Create before insert ok = mg.createIndex(self.__dbName, self.__collectionName, keyList=["DOC_ID"], indexName="primary", indexType="DESCENDING", uniqueFlag=True) self.assertTrue(ok) dList = [] nRows = 5 for ii in range(nDocs): dObj = self.__makeDataObj(2, 5, nRows, ii) dList.append(dObj) # keyName = "DOC_ID" rIdL = mg.insertList(self.__dbName, self.__collectionName, dList, keyNames=[keyName], salvage=True) self.assertEqual(len(dList), len(rIdL)) # for ii in range(nDocs): kVal = "DOC_%d" % ii rObj = mg.fetchOne(self.__dbName, self.__collectionName, "DOC_ID", kVal) # logger.debug("Return Object %s" % pprint.pformat(rObj)) rObj.pop("_id", None) dList[ii].pop("_id", None) self.assertEqual(len(dList[ii]), len(rObj)) self.assertEqual(dList[ii], rObj) # ok = mg.dropIndex(self.__dbName, self.__collectionName, indexName="primary") self.assertTrue(ok) ok = mg.createIndex(self.__dbName, self.__collectionName, keyList=["DOC_ID"], indexName="primary", indexType="DESCENDING", uniqueFlag=True) self.assertTrue(ok) ok = mg.reIndex(self.__dbName, self.__collectionName) self.assertTrue(ok) # with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) ii = mg.count(self.__dbName, self.__collectionName) logger.debug("collection length %d", ii) # dList = mg.fetch(self.__dbName, self.__collectionName, ["DOC_ID"]) self.assertEqual(len(dList), nDocs) logger.debug("Fetch length %d", len(dList)) for ii, dD in enumerate(dList): logger.debug("Fetch num %d: %r", ii, dD) # dList = mg.fetch(self.__dbName, self.__collectionName, ["category_0.attribute_0"], queryD={"category_0.attribute_0": "val_0_0"}) self.assertEqual(len(dList), nDocs) logger.debug("Fetch length %d", len(dList)) for ii, dD in enumerate(dList): logger.debug("Fetch num %d: %r", ii, dD) atName = "category_0.attribute_0" vL0 = mg.distinct(self.__dbName, self.__collectionName, atName) self.assertEqual(len(vL0), nRows + 2) logger.debug("vL0 %r", vL0) vL1 = mg.distinct(self.__dbName, self.__collectionName, "category_1.attribute_0") self.assertEqual(len(vL1), nRows + 2) for v in vL0: num = mg.count(self.__dbName, self.__collectionName, countFilter={atName: v}) logger.debug("%s value %s (%d)", atName, v, num) self.assertGreaterEqual(num, 100) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def __loadDocuments(self, dbName, collectionName, docList, loadType="full", readBackCheck=False, keyNames=None): # # Load database/collection with input document list - # failList = [] rIdL = [] successList = [] logger.debug( "Loading dbName %s collectionName %s with document count %d keynames %r", dbName, collectionName, len(docList), keyNames) if keyNames: # map the document list to some document key if this is provided indD = {} indL = [] try: for ii, doc in enumerate(docList): dIdTup = self.__getKeyValues(doc, keyNames) indD[dIdTup] = ii indL = list(range(len(docList))) except Exception as e: logger.exception("Failing ii %d d %r with %s", ii, doc, str(e)) try: with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: mg = MongoDbUtil(client) # if loadType == "replace" and keyNames: dTupL = mg.deleteList(dbName, collectionName, docList, keyNames) logger.debug("Deleted document status %r", (dTupL, )) # rIdL = mg.insertList(dbName, collectionName, docList, keyNames=keyNames) logger.debug("Insert returns rIdL length %r", len(rIdL)) # --- # If there is a failure then determine the specific successes and failures - # successList = docList failList = [] if len(rIdL) != len(docList): if keyNames: successIndList = [] for rId in rIdL: rObj = mg.fetchOne(dbName, collectionName, "_id", rId) dIdTup = self.__getKeyValues(rObj, keyNames) successIndList.append(indD[dIdTup]) failIndList = list(set(indL) - set(successIndList)) failList = [docList[ii] for ii in failIndList] successList = [docList[ii] for ii in successIndList] else: # fail the whole batch if we don't have visibility into each document failList = docList successList = [] # rbStatus = True if readBackCheck and keyNames: # # Note that objects in docList are mutated by the insert operation with the additional key '_id', # hence, it is possible to compare the fetched object with the input object. # for ii, rId in enumerate(rIdL): rObj = mg.fetchOne(dbName, collectionName, "_id", rId) dIdTup = self.__getKeyValues(rObj, keyNames) jj = indD[dIdTup] if rObj != docList[jj]: rbStatus = False break # if readBackCheck and not rbStatus: return False, successList, failList # return len(rIdL) == len(docList), successList, failList except Exception as e: logger.exception("Failing %r %r (len=%d) %s with %s", dbName, collectionName, len(docList), keyNames, str(e)) return False, [], docList