def testGetReferencedFields(self):
     op = {
         'collection': 'blah',
         'predicates': { },
         'query_aggregate': True,
         'query_content': [ ],
         'resp_content': [{'n': 16, 'ok': 1}],
         'type': constants.OP_TYPE_QUERY,
     }
     expected = set()
     for i in xrange(4):
         keyName = 'key%02d' % i
         for ii in xrange(10):
             op['query_content'].append({"#query": {keyName: {"#gt": i*ii}}})
         expected.add(keyName)                
         op['predicates'][keyName] = constants.PRED_TYPE_RANGE
     expected = sorted(expected)
     #print "EXPECTED:", expected
     
     fields = workload.getReferencedFields(op)
     #print "FIELDS:", fields
     self.assertIsNotNone(fields)
     self.assertIsInstance(fields, tuple)
     self.assertEquals(len(expected), len(fields))
     
     for i in xrange(len(expected)):
         self.assertEquals(expected[i], fields[i])
 def generateCollectionHistograms(self):
     col_keys = dict([(col_name, Histogram()) for col_name in self.collections])
     for sess in self.workload:
         for op in sess["operations"]:
             if op["collection"].find("$cmd") != -1:
                 continue
             if not op["collection"] in col_keys:
                 LOG.warn("Missing: " + op["collection"])
                 continue
             fields = workload.getReferencedFields(op)
             h = col_keys[op["collection"]]
             for i in xrange(1, len(fields)+1):
                 map(h.put, itertools.combinations(fields, i))
         ## FOR (op)
     ## FOR (sess)
     return (col_keys)
Example #3
0
 def generateCollectionHistograms(self):
     col_keys = dict([(col_name, Histogram())
                      for col_name in self.collections])
     for sess in self.workload:
         for op in sess["operations"]:
             if op["collection"].find("$cmd") != -1:
                 continue
             if not op["collection"] in col_keys:
                 LOG.warn("Missing: " + op["collection"])
                 continue
             fields = workload.getReferencedFields(op)
             h = col_keys[op["collection"]]
             for i in xrange(1, len(fields) + 1):
                 map(h.put, itertools.combinations(fields, i))
         ## FOR (op)
     ## FOR (sess)
     return (col_keys)
Example #4
0
    def fixInvalidCollections(self):
        searchKey = {
            "operations.collection": constants.INVALID_COLLECTION_MARKER
        }
        for session in self.metadata_db.Session.find(searchKey):
            for op in session["operations"]:
                dirty = False
                if op["collection"] != constants.INVALID_COLLECTION_MARKER:
                    continue

                if self.debug:
                    LOG.debug("Attempting to fix corrupted Operation:\n%s" %
                              pformat(op))

                # For each field referenced in the query, build a histogram of
                # which collections have a field with the same name
                fields = workload.getReferencedFields(op)
                h = Histogram()
                for c in self.metadata_db.Collection.find():
                    for f in c['fields']:
                        if f in fields:
                            h.put(c['name'])
                    ## FOR
                ## FOR

                matches = h.getMaxCountKeys()
                if len(matches) == 0:
                    LOG.warn(
                        "No matching collection was found for corrupted operation\n%s"
                        % pformat(op))
                    continue
                elif len(matches) > 1:
                    LOG.warn(
                        "More than one matching collection was found for corrupted operation %s\n%s"
                        % (matches, pformat(op)))
                    continue
                else:
                    op["collection"] = matches[0]
                    dirty = True
                    self.fix_ctr += 1
                    LOG.info("Fix corrupted collection in operation\n%s" %
                             pformat(op))
                    ## IF
                    ## FOR (operations)

            if dirty: session.save()
    def fixInvalidCollections(self):
        searchKey = {"operations.collection": constants.INVALID_COLLECTION_MARKER}
        for session in self.metadata_db.Session.find(searchKey):
            for op in session["operations"]:
                dirty = False
                if op["collection"] != constants.INVALID_COLLECTION_MARKER:
                    continue

                if self.debug:
                    LOG.debug("Attempting to fix corrupted Operation:\n%s" % pformat(op))

                # For each field referenced in the query, build a histogram of
                # which collections have a field with the same name
                fields = workload.getReferencedFields(op)
                h = Histogram()
                for c in self.metadata_db.Collection.find():
                    for f in c["fields"]:
                        if f in fields:
                            h.put(c["name"])
                    ## FOR
                ## FOR

                matches = h.getMaxCountKeys()
                if len(matches) == 0:
                    LOG.warn("No matching collection was found for corrupted operation\n%s" % pformat(op))
                    continue
                elif len(matches) > 1:
                    LOG.warn(
                        "More than one matching collection was found for corrupted operation %s\n%s"
                        % (matches, pformat(op))
                    )
                    continue
                else:
                    op["collection"] = matches[0]
                    dirty = True
                    self.fix_ctr += 1
                    LOG.info("Fix corrupted collection in operation\n%s" % pformat(op))
                    ## IF
                    ## FOR (operations)

            if dirty:
                session.save()