def test_unique_filter(self): """Test unique_filter filter""" rows = [{'k': 1}, {'r': 1}, {'r': 1}] result = [r for r in unique_filter(rows)] expect = [{'k': 1}, {'r': 1}] self.assertEqual(result, expect) rows = [{'k': 1}, {'k': 1}, {'r': 1}] result = [r for r in unique_filter(rows)] expect = [{'k': 1}, {'r': 1}] self.assertEqual(result, expect) rows = [{'k': 1, '_id': 1}, {'k': 1, '_id': 2}, {'r': 1}] result = [r for r in unique_filter(rows)] expect = [{'k': 1, '_id': 1}, {'r': 1}] self.assertEqual(result, expect) rows = [{ 'k': 1, '_id': 1 }, { 'k': 1, '_id': 2 }, { 'k': 1, '_id': 3 }, { 'r': 1 }] result = [r for r in unique_filter(rows)] expect = [{'k': 1, '_id': 1}, {'r': 1}] self.assertEqual(result, expect)
def get_records(self, col, spec, fields, skeys, idx, limit, unique=False): "Generator to get records from MongoDB. It correctly applies" if fields: for key in fields: # ensure that fields keys will be presented if key not in self.das_internal_keys and \ not spec.has_key(key): spec.update({key: {'$exists':True}}) try: res = col.find(spec=spec, fields=fields) if skeys: res = res.sort(skeys) if not unique: if idx: res = res.skip(idx) if limit: res = res.limit(limit) except Exception as exp: print_exc(exp) row = {'exception': str(exp)} res = [] yield row if unique: if limit: gen = itertools.islice(unique_filter(res), idx, idx+limit) else: gen = unique_filter(res) for row in gen: yield row else: for row in res: yield row
def nresults(self, dasquery, collection='merge'): """Return number of results for given query.""" if dasquery.aggregators: return len(dasquery.aggregators) # Distinguish 2 use cases, unique filter and general query # in first one we should count only unique records, in later # we can rely on DB count() method. Pleas keep in mind that # usage of fields in find doesn't account for counting, since it # is a view over records found with spec, so we don't need to use it. col = self.mdb[collection] fields, filter_cond = self.get_fields(dasquery) if not fields: spec = dasquery.mongo_query.get('spec', {}) else: spec = {'qhash':dasquery.qhash, 'das.empty_record':0} if filter_cond: spec.update(filter_cond) if dasquery.unique_filter: skeys = self.mongo_sort_keys(collection, dasquery) if skeys: gen = col.find(spec=spec).sort(skeys) else: gen = col.find(spec=spec) res = len([r for r in unique_filter(gen)]) else: res = col.find(spec=spec).count() msg = "%s" % res self.logger.info(msg) return res
def get_records(self, coll, spec, fields, skeys, idx, limit, unique=False): "Generator to get records from MongoDB." try: conn = db_connection(self.dburi) mdb = conn[self.dbname] mdb.add_son_manipulator(self.das_son_manipulator) col = mdb[coll] nres = col.find(spec, exhaust=True).count() if nres == 1 or nres <= limit: limit = 0 if limit: res = col.find(spec=spec, fields=fields, sort=skeys, skip=idx, limit=limit) else: res = col.find(spec=spec, fields=fields, sort=skeys, exhaust=True) if unique: res = unique_filter(res) for row in res: yield row except Exception as exp: print_exc(exp) row = {'exception': str(exp)} res = [] yield row
def test_unique_filter(self): """Test unique_filter filter""" rows = [{'k':1}, {'r':1}, {'r':1}] result = [r for r in unique_filter(rows)] expect = [{'k':1}, {'r':1}] self.assertEqual(result, expect) rows = [{'k':1}, {'k':1}, {'r':1}] result = [r for r in unique_filter(rows)] expect = [{'k':1}, {'r':1}] self.assertEqual(result, expect) rows = [{'k':1, '_id':1}, {'k':1, '_id':2}, {'r':1}] result = [r for r in unique_filter(rows)] expect = [{'k':1, '_id':1}, {'r':1}] self.assertEqual(result, expect) rows = [{'k':1, '_id':1}, {'k':1, '_id':2}, {'k':1, '_id':3}, {'r':1}] result = [r for r in unique_filter(rows)] expect = [{'k':1, '_id':1}, {'r':1}] self.assertEqual(result, expect)
def nresults(self, dasquery, collection='merge'): """Return number of results for given query.""" if dasquery.aggregators: return len(dasquery.aggregators) # Distinguish 2 use cases, unique filter and general query # in first one we should count only unique records, in later # we can rely on DB count() method. Pleas keep in mind that # usage of fields in find doesn't account for counting, since it # is a view over records found with spec, so we don't need to use it. fields, filter_cond = self.get_fields(dasquery) if not fields: spec = dasquery.mongo_query.get('spec', {}) elif dasquery.hashes: spec = { 'qhash': { '$in': dasquery.hashes }, 'das.record': spec4data_records() } else: spec = {'qhash': dasquery.qhash, 'das.record': spec4data_records()} if filter_cond: spec.update(filter_cond) conn = db_connection(self.dburi) mdb = conn[self.dbname] mdb.add_son_manipulator(self.das_son_manipulator) col = mdb[collection] if dasquery.unique_filter: skeys = self.mongo_sort_keys(collection, dasquery) if skeys: gen = col.find(spec, **PYMONGO_OPTS).sort(skeys) else: gen = col.find(spec, **PYMONGO_OPTS) res = len([r for r in unique_filter(gen)]) else: res = col.find(spec, **PYMONGO_OPTS).count() if not res: # double check that this is really the case time.sleep(1) res = col.find(spec, **PYMONGO_OPTS).count() msg = "%s" % res self.logger.info(msg) return res
def nresults(self, dasquery, collection='merge'): """Return number of results for given query.""" if dasquery.aggregators: return len(dasquery.aggregators) # Distinguish 2 use cases, unique filter and general query # in first one we should count only unique records, in later # we can rely on DB count() method. Pleas keep in mind that # usage of fields in find doesn't account for counting, since it # is a view over records found with spec, so we don't need to use it. fields, filter_cond = self.get_fields(dasquery) if not fields: spec = dasquery.mongo_query.get('spec', {}) elif dasquery.hashes: spec = {'qhash':{'$in':dasquery.hashes}, 'das.record': spec4data_records()} else: spec = {'qhash':dasquery.qhash, 'das.record': spec4data_records()} if filter_cond: spec.update(filter_cond) self.check_filters(collection, spec, fields) conn = db_connection(self.dburi) mdb = conn[self.dbname] mdb.add_son_manipulator(self.das_son_manipulator) col = mdb[collection] if dasquery.unique_filter: skeys = self.mongo_sort_keys(collection, dasquery) if skeys: gen = col.find(spec=spec, exhaust=True).sort(skeys) else: gen = col.find(spec=spec, exhaust=True) res = len([r for r in unique_filter(gen)]) else: res = col.find(spec=spec, exhaust=True).count() if not res: # double check that this is really the case time.sleep(1) res = col.find(spec=spec, exhaust=True).count() msg = "%s" % res self.logger.info(msg) return res
def get_records(self, coll, spec, fields, skeys, idx, limit, unique=False): "Generator to get records from MongoDB." try: conn = db_connection(self.dburi) mdb = conn[self.dbname] mdb.add_son_manipulator(self.das_son_manipulator) col = mdb[coll] nres = col.find(spec, **PYMONGO_OPTS).count() if nres == 1 or nres <= limit: limit = 0 if limit: res = col.find(spec, fields, sort=skeys, skip=idx, limit=limit) else: res = col.find(spec, fields, sort=skeys, **PYMONGO_OPTS) if unique: res = unique_filter(res) for row in res: yield row except Exception as exp: print_exc(exp) row = {'exception': str(exp)} res = [] yield row