def MRsimple(collection, FunMap, FunReduce=None, query={}, out={"replace": 'mr_tmp'}, finalize=None, scope={}, sort=None, jsMode=False, verbose=1): """ simplified generic Map Reduce see: http://docs.mongodb.org/manual/reference/method/db.collection.mapReduce/ returns (MR response object, results collection or results list if out={"inline":1}) Reduce function defaults to one that increments value count optimize by sorting on emit fields see: http://edgystuff.tumblr.com/post/7624019777/optimizing-map-reduce-with-mongodb docs.mongodb.org/manual/reference/method/db.collection.mapReduce/#db.collection.mapReduce sort i.e: sort= { "_id":1 } jsMode should be False if we expect more than 500K dictinct ids """ if len(out.viewkeys()) > 1: command = MRCommand_(out) out = SON([(command, out[command]), ('db', out.get('db')), ('nonAtomic', out.get('nonAtomic', False))]) #nonAtomic not allowed on replace FunMap = Code(FunMap, {}) if FunReduce is None: FunReduce = u"""function (key, values) {var total = 0; for (var i = 0; i < values.length; i++) { total += values[i]; } return total;} """ FunReduce = Code(FunReduce, {}) if verbose > 2: print "Start MRsimple collection = %s"\ "query = %s\nMap=\n%s\nReduce=\n%s\nFinalize=%s\nscope=%s sort=%s" \ % tuple(map(str, (out, query, FunMap, FunReduce, finalize, scope, sort))) if sort: sort = SON(sort) r = collection.map_reduce(FunMap, FunReduce, out=out, query=query, finalize=finalize, scope=scope, sort=sort, full_response=True) if verbose > 1: print "End MRsimple collection=%s, query=%s\nresulsts=\n %s"\ % (collection.name, str(query), str(r)) if 'db' in out.viewkeys(): #@note: can be dict or SON, either way it has property viewkeys results = collection.database.connection[r['result']['db']][ r['result']['collection']] else: results = r['results'] if out.keys( )[0] == 'inline' else collection.database[r['result']] #@note: results is a list if inline else a collection return r, results
class MemoryStorage(AbstractStorage): """ """ def __init__(self, repository, storage_config): super(MemoryStorage, self).__init__(repository, storage_config) self._repo = SON() def database_create(self, db_name): self._repo[db_name] = SON() def database_drop(self, db_name): if db_name in self._repo: del self._repo[db_name] def database_list(self): return list(self._repo.keys())
def MRsimple(collection, FunMap, FunReduce=None, query={}, out={"replace": 'mr_tmp'}, finalize=None, scope={}, sort=None, jsMode=False, verbose=1): """ simplified generic Map Reduce see: http://docs.mongodb.org/manual/reference/method/db.collection.mapReduce/ returns (MR response object, results collection or results list if out={"inline":1}) Reduce function defaults to one that increments value count optimize by sorting on emit fields see: http://edgystuff.tumblr.com/post/7624019777/optimizing-map-reduce-with-mongodb docs.mongodb.org/manual/reference/method/db.collection.mapReduce/#db.collection.mapReduce sort i.e: sort= { "_id":1 } jsMode should be False if we expect more than 500K dictinct ids """ if len(out.viewkeys()) > 1: command = MRCommand_(out) out = SON([(command, out[command]), ('db', out.get('db')), ('nonAtomic', out.get('nonAtomic', False))]) #nonAtomic not allowed on replace FunMap = Code(FunMap, {}) if FunReduce is None: FunReduce = u"""function (key, values) {var total = 0; for (var i = 0; i < values.length; i++) { total += values[i]; } return total;} """ FunReduce = Code(FunReduce, {}) if verbose > 2: print "Start MRsimple collection = %s"\ "query = %s\nMap=\n%s\nReduce=\n%s\nFinalize=%s\nscope=%s sort=%s" \ % tuple(map(str, (out, query, FunMap, FunReduce, finalize, scope, sort))) if sort: sort = SON(sort) r = collection.map_reduce(FunMap, FunReduce, out=out, query=query, finalize=finalize, scope=scope, sort=sort, full_response=True) if verbose > 1: print "End MRsimple collection=%s, query=%s\nresulsts=\n %s"\ % (collection.name, str(query), str(r)) if 'db' in out.viewkeys(): #@note: can be dict or SON, either way it has property viewkeys results = collection.database.connection[r['result']['db']][r['result']['collection']] else: results = r['results'] if out.keys()[0] == 'inline' else collection.database[r['result']] #@note: results is a list if inline else a collection return r, results