Exemple #1
0
    def test_unique_filter(self):
        """Test unique_filter filter"""
        rows = [{'k': 1}, {'r': 1}, {'r': 1}]
        result = [r for r in unique_filter(rows)]
        expect = [{'k': 1}, {'r': 1}]
        self.assertEqual(result, expect)

        rows = [{'k': 1}, {'k': 1}, {'r': 1}]
        result = [r for r in unique_filter(rows)]
        expect = [{'k': 1}, {'r': 1}]
        self.assertEqual(result, expect)

        rows = [{'k': 1, '_id': 1}, {'k': 1, '_id': 2}, {'r': 1}]
        result = [r for r in unique_filter(rows)]
        expect = [{'k': 1, '_id': 1}, {'r': 1}]
        self.assertEqual(result, expect)

        rows = [{
            'k': 1,
            '_id': 1
        }, {
            'k': 1,
            '_id': 2
        }, {
            'k': 1,
            '_id': 3
        }, {
            'r': 1
        }]
        result = [r for r in unique_filter(rows)]
        expect = [{'k': 1, '_id': 1}, {'r': 1}]
        self.assertEqual(result, expect)
Exemple #2
0
 def get_records(self, col, spec, fields, skeys, idx, limit, unique=False):
     "Generator to get records from MongoDB. It correctly applies"
     if  fields:
         for key in fields: # ensure that fields keys will be presented
             if  key not in self.das_internal_keys and \
                 not spec.has_key(key):
                 spec.update({key: {'$exists':True}})
     try:
         res = col.find(spec=spec, fields=fields)
         if  skeys:
             res = res.sort(skeys)
         if  not unique:
             if  idx:
                 res = res.skip(idx)
             if  limit:
                 res = res.limit(limit)
     except Exception as exp:
         print_exc(exp)
         row = {'exception': str(exp)}
         res = []
         yield row
     if  unique:
         if  limit:
             gen = itertools.islice(unique_filter(res), idx, idx+limit)
         else:
             gen = unique_filter(res)
         for row in gen:
             yield row
     else:
         for row in res:
             yield row
Exemple #3
0
 def nresults(self, dasquery, collection='merge'):
     """Return number of results for given query."""
     if  dasquery.aggregators:
         return len(dasquery.aggregators)
     # Distinguish 2 use cases, unique filter and general query
     # in first one we should count only unique records, in later
     # we can rely on DB count() method. Pleas keep in mind that
     # usage of fields in find doesn't account for counting, since it
     # is a view over records found with spec, so we don't need to use it.
     col  = self.mdb[collection]
     fields, filter_cond = self.get_fields(dasquery)
     if  not fields:
         spec = dasquery.mongo_query.get('spec', {})
     else:
         spec = {'qhash':dasquery.qhash, 'das.empty_record':0}
     if  filter_cond:
         spec.update(filter_cond)
     if  dasquery.unique_filter:
         skeys = self.mongo_sort_keys(collection, dasquery)
         if  skeys:
             gen = col.find(spec=spec).sort(skeys)
         else:
             gen = col.find(spec=spec)
         res = len([r for r in unique_filter(gen)])
     else:
         res = col.find(spec=spec).count()
     msg = "%s" % res
     self.logger.info(msg)
     return res
Exemple #4
0
 def get_records(self, coll, spec, fields, skeys, idx, limit, unique=False):
     "Generator to get records from MongoDB."
     try:
         conn = db_connection(self.dburi)
         mdb  = conn[self.dbname]
         mdb.add_son_manipulator(self.das_son_manipulator)
         col = mdb[coll]
         nres = col.find(spec, exhaust=True).count()
         if  nres == 1 or nres <= limit:
             limit = 0
         if  limit:
             res = col.find(spec=spec, fields=fields,
                     sort=skeys, skip=idx, limit=limit)
         else:
             res = col.find(spec=spec, fields=fields,
                     sort=skeys, exhaust=True)
         if  unique:
             res = unique_filter(res)
         for row in res:
             yield row
     except Exception as exp:
         print_exc(exp)
         row = {'exception': str(exp)}
         res = []
         yield row
Exemple #5
0
    def test_unique_filter(self):
        """Test unique_filter filter"""
        rows = [{'k':1}, {'r':1}, {'r':1}]
        result = [r for r in unique_filter(rows)]
        expect = [{'k':1}, {'r':1}]
        self.assertEqual(result, expect)

        rows = [{'k':1}, {'k':1}, {'r':1}]
        result = [r for r in unique_filter(rows)]
        expect = [{'k':1}, {'r':1}]
        self.assertEqual(result, expect)

        rows = [{'k':1, '_id':1}, {'k':1, '_id':2}, {'r':1}]
        result = [r for r in unique_filter(rows)]
        expect = [{'k':1, '_id':1}, {'r':1}]
        self.assertEqual(result, expect)

        rows = [{'k':1, '_id':1}, {'k':1, '_id':2}, {'k':1, '_id':3}, {'r':1}]
        result = [r for r in unique_filter(rows)]
        expect = [{'k':1, '_id':1}, {'r':1}]
        self.assertEqual(result, expect)
Exemple #6
0
 def nresults(self, dasquery, collection='merge'):
     """Return number of results for given query."""
     if dasquery.aggregators:
         return len(dasquery.aggregators)
     # Distinguish 2 use cases, unique filter and general query
     # in first one we should count only unique records, in later
     # we can rely on DB count() method. Pleas keep in mind that
     # usage of fields in find doesn't account for counting, since it
     # is a view over records found with spec, so we don't need to use it.
     fields, filter_cond = self.get_fields(dasquery)
     if not fields:
         spec = dasquery.mongo_query.get('spec', {})
     elif dasquery.hashes:
         spec = {
             'qhash': {
                 '$in': dasquery.hashes
             },
             'das.record': spec4data_records()
         }
     else:
         spec = {'qhash': dasquery.qhash, 'das.record': spec4data_records()}
     if filter_cond:
         spec.update(filter_cond)
     conn = db_connection(self.dburi)
     mdb = conn[self.dbname]
     mdb.add_son_manipulator(self.das_son_manipulator)
     col = mdb[collection]
     if dasquery.unique_filter:
         skeys = self.mongo_sort_keys(collection, dasquery)
         if skeys:
             gen = col.find(spec, **PYMONGO_OPTS).sort(skeys)
         else:
             gen = col.find(spec, **PYMONGO_OPTS)
         res = len([r for r in unique_filter(gen)])
     else:
         res = col.find(spec, **PYMONGO_OPTS).count()
         if not res:  # double check that this is really the case
             time.sleep(1)
             res = col.find(spec, **PYMONGO_OPTS).count()
     msg = "%s" % res
     self.logger.info(msg)
     return res
Exemple #7
0
 def nresults(self, dasquery, collection='merge'):
     """Return number of results for given query."""
     if  dasquery.aggregators:
         return len(dasquery.aggregators)
     # Distinguish 2 use cases, unique filter and general query
     # in first one we should count only unique records, in later
     # we can rely on DB count() method. Pleas keep in mind that
     # usage of fields in find doesn't account for counting, since it
     # is a view over records found with spec, so we don't need to use it.
     fields, filter_cond = self.get_fields(dasquery)
     if  not fields:
         spec = dasquery.mongo_query.get('spec', {})
     elif dasquery.hashes:
         spec = {'qhash':{'$in':dasquery.hashes},
                 'das.record': spec4data_records()}
     else:
         spec = {'qhash':dasquery.qhash,
                 'das.record': spec4data_records()}
     if  filter_cond:
         spec.update(filter_cond)
     self.check_filters(collection, spec, fields)
     conn = db_connection(self.dburi)
     mdb  = conn[self.dbname]
     mdb.add_son_manipulator(self.das_son_manipulator)
     col  = mdb[collection]
     if  dasquery.unique_filter:
         skeys = self.mongo_sort_keys(collection, dasquery)
         if  skeys:
             gen = col.find(spec=spec, exhaust=True).sort(skeys)
         else:
             gen = col.find(spec=spec, exhaust=True)
         res = len([r for r in unique_filter(gen)])
     else:
         res = col.find(spec=spec, exhaust=True).count()
         if  not res: # double check that this is really the case
             time.sleep(1)
             res = col.find(spec=spec, exhaust=True).count()
     msg = "%s" % res
     self.logger.info(msg)
     return res
Exemple #8
0
 def get_records(self, coll, spec, fields, skeys, idx, limit, unique=False):
     "Generator to get records from MongoDB."
     try:
         conn = db_connection(self.dburi)
         mdb = conn[self.dbname]
         mdb.add_son_manipulator(self.das_son_manipulator)
         col = mdb[coll]
         nres = col.find(spec, **PYMONGO_OPTS).count()
         if nres == 1 or nres <= limit:
             limit = 0
         if limit:
             res = col.find(spec, fields, sort=skeys, skip=idx, limit=limit)
         else:
             res = col.find(spec, fields, sort=skeys, **PYMONGO_OPTS)
         if unique:
             res = unique_filter(res)
         for row in res:
             yield row
     except Exception as exp:
         print_exc(exp)
         row = {'exception': str(exp)}
         res = []
         yield row