def testGetReferencedFields(self): op = { 'collection': 'blah', 'predicates': { }, 'query_aggregate': True, 'query_content': [ ], 'resp_content': [{'n': 16, 'ok': 1}], 'type': constants.OP_TYPE_QUERY, } expected = set() for i in xrange(4): keyName = 'key%02d' % i for ii in xrange(10): op['query_content'].append({"#query": {keyName: {"#gt": i*ii}}}) expected.add(keyName) op['predicates'][keyName] = constants.PRED_TYPE_RANGE expected = sorted(expected) #print "EXPECTED:", expected fields = workload.getReferencedFields(op) #print "FIELDS:", fields self.assertIsNotNone(fields) self.assertIsInstance(fields, tuple) self.assertEquals(len(expected), len(fields)) for i in xrange(len(expected)): self.assertEquals(expected[i], fields[i])
def generateCollectionHistograms(self): col_keys = dict([(col_name, Histogram()) for col_name in self.collections]) for sess in self.workload: for op in sess["operations"]: if op["collection"].find("$cmd") != -1: continue if not op["collection"] in col_keys: LOG.warn("Missing: " + op["collection"]) continue fields = workload.getReferencedFields(op) h = col_keys[op["collection"]] for i in xrange(1, len(fields)+1): map(h.put, itertools.combinations(fields, i)) ## FOR (op) ## FOR (sess) return (col_keys)
def generateCollectionHistograms(self): col_keys = dict([(col_name, Histogram()) for col_name in self.collections]) for sess in self.workload: for op in sess["operations"]: if op["collection"].find("$cmd") != -1: continue if not op["collection"] in col_keys: LOG.warn("Missing: " + op["collection"]) continue fields = workload.getReferencedFields(op) h = col_keys[op["collection"]] for i in xrange(1, len(fields) + 1): map(h.put, itertools.combinations(fields, i)) ## FOR (op) ## FOR (sess) return (col_keys)
def fixInvalidCollections(self): searchKey = { "operations.collection": constants.INVALID_COLLECTION_MARKER } for session in self.metadata_db.Session.find(searchKey): for op in session["operations"]: dirty = False if op["collection"] != constants.INVALID_COLLECTION_MARKER: continue if self.debug: LOG.debug("Attempting to fix corrupted Operation:\n%s" % pformat(op)) # For each field referenced in the query, build a histogram of # which collections have a field with the same name fields = workload.getReferencedFields(op) h = Histogram() for c in self.metadata_db.Collection.find(): for f in c['fields']: if f in fields: h.put(c['name']) ## FOR ## FOR matches = h.getMaxCountKeys() if len(matches) == 0: LOG.warn( "No matching collection was found for corrupted operation\n%s" % pformat(op)) continue elif len(matches) > 1: LOG.warn( "More than one matching collection was found for corrupted operation %s\n%s" % (matches, pformat(op))) continue else: op["collection"] = matches[0] dirty = True self.fix_ctr += 1 LOG.info("Fix corrupted collection in operation\n%s" % pformat(op)) ## IF ## FOR (operations) if dirty: session.save()
def fixInvalidCollections(self): searchKey = {"operations.collection": constants.INVALID_COLLECTION_MARKER} for session in self.metadata_db.Session.find(searchKey): for op in session["operations"]: dirty = False if op["collection"] != constants.INVALID_COLLECTION_MARKER: continue if self.debug: LOG.debug("Attempting to fix corrupted Operation:\n%s" % pformat(op)) # For each field referenced in the query, build a histogram of # which collections have a field with the same name fields = workload.getReferencedFields(op) h = Histogram() for c in self.metadata_db.Collection.find(): for f in c["fields"]: if f in fields: h.put(c["name"]) ## FOR ## FOR matches = h.getMaxCountKeys() if len(matches) == 0: LOG.warn("No matching collection was found for corrupted operation\n%s" % pformat(op)) continue elif len(matches) > 1: LOG.warn( "More than one matching collection was found for corrupted operation %s\n%s" % (matches, pformat(op)) ) continue else: op["collection"] = matches[0] dirty = True self.fix_ctr += 1 LOG.info("Fix corrupted collection in operation\n%s" % pformat(op)) ## IF ## FOR (operations) if dirty: session.save()