def generateDesignCandidates(self, collections, isShardingEnabled=True, isIndexesEnabled=True, isDenormalizationEnabled=True): dc = DesignCandidates() valid_collection = set() for col_info in collections.itervalues(): shardKeys = [] indexKeys = [] denorm = [] interesting = col_info['interesting'] valid_collection.add(col_info['name']) interesting = self.__remove_heuristicaly_bad_key__(col_info, interesting) # Make sure that none of our interesting fields start with # the character that we used to convert $ commands for key in interesting: assert not key.startswith(constants.REPLACE_KEY_DOLLAR_PREFIX), \ "Unexpected candidate key '%s.%s'" % (col_info["name"], key) if constants.SKIP_MONGODB_ID_FIELD and "_id" in interesting: interesting = interesting[:] interesting.remove("_id") # deal with shards if isShardingEnabled: LOG.debug("Sharding is enabled") shardKeys = interesting # deal with indexes if isIndexesEnabled: LOG.debug("Indexes is enabled") for o in xrange(1, len(interesting) + 1) : if o > constants.MAX_INDEX_SIZE: break for i in itertools.permutations(interesting, o): indexKeys.append(i) ## FOR ## FOR # deal with de-normalization if len(indexKeys) > 10: LOG.warn("Too many index keys: %s", len(indexKeys)) if isDenormalizationEnabled: LOG.debug("Denormalization is enabled") for k,v in col_info['fields'].iteritems() : if v['parent_col'] <> None and v['parent_col'] not in denorm and v['parent_col'] in valid_collection: denorm.append(v['parent_col']) dc.addCollection(col_info['name'], indexKeys, shardKeys, denorm) ## FOR return dc
def generateDesignCandidates( self, collections, isShardingEnabled=True, isIndexesEnabled=True, isDenormalizationEnabled=True ): dc = DesignCandidates() valid_collection = set() for col_info in collections.itervalues(): valid_collection.add(col_info["name"]) for col_info in collections.itervalues(): shardKeys = [] indexKeys = [] denorm = [] interesting = col_info["interesting"] # interesting = self.__remove_heuristicaly_bad_key__(col_info, interesting) # Make sure that none of our interesting fields start with # the character that we used to convert $ commands for key in interesting: assert not key.startswith(constants.REPLACE_KEY_DOLLAR_PREFIX), "Unexpected candidate key '%s.%s'" % ( col_info["name"], key, ) if constants.SKIP_MONGODB_ID_FIELD and "_id" in interesting: interesting = interesting[:] interesting.remove("_id") # deal with shards if isShardingEnabled: LOG.debug("Sharding is enabled") fields = col_info["fields"] max_query_use_count = 1 interesting_scores = [] for interesting_key in interesting: cardinality = fields[interesting_key]["cardinality"] if cardinality > 0: cardinality = math.log(cardinality, 2) else: cardinality = 1 query_use_count = fields[interesting_key]["query_use_count"] if query_use_count > max_query_use_count: max_query_use_count = query_use_count score = cardinality * query_use_count interesting_scores.append((interesting_key, score, query_use_count)) sorted(interesting_scores, key=itemgetter(1), reverse=True) shardKeys = [] for interesting_key in interesting_scores: if (interesting_key[2] / float(max_query_use_count)) > 0.1: shardKeys.append(interesting_key[0]) # deal with indexes if isIndexesEnabled: LOG.debug("Indexes is enabled") for o in xrange(1, len(interesting) + 1): if o > constants.MAX_INDEX_SIZE: break for i in itertools.permutations(interesting, o): indexKeys.append(i) ## FOR ## FOR # deal with de-normalization if len(indexKeys) > 10: LOG.warn("Too many index keys: %s", len(indexKeys)) if isDenormalizationEnabled: LOG.debug("Denormalization is enabled") for k, v in col_info["fields"].iteritems(): if ( v["parent_col"] <> None and v["parent_col"] not in denorm and v["parent_col"] in valid_collection ): denorm.append(v["parent_col"]) dc.addCollection(col_info["name"], indexKeys, shardKeys, denorm) ## FOR return dc