コード例 #1
0
ファイル: utils.py プロジェクト: zdenekmaxa/DAS
def translate(notations, api, rec):
    """
    Translate given row to DAS notations according to provided notations
    and api. Each entry in notations list is a form of

    .. doctest::

        {"notation":"site.resource.name", "map":"site.name", "api":""}
    """
    for row in notations:
        count    = 0
        notation = row['notation']
        dasmap   = row['map']
        api2use  = row['api']
        if  not api2use or api2use == api:
            record = dict(rec)
            rows = access(rec, notation)
            keys = dasmap.split(".")
            keys.reverse()
            for val in rows:
                item, newval = convert_dot_notation(dasmap, val)
                recval = record[item]
                if  isinstance(recval, dict):
                    recval.update(newval)
                else: 
                    record[item] = newval
                count += 1
                delete_elem(record, notation)
            yield record
        if  not count:
            yield rec
コード例 #2
0
ファイル: utils.py プロジェクト: zdenekmaxa/DAS
def row2das(mapper, system, api, row):
    """
    Transform keys of row into DAS notations, e.g. bytes to size
    If compound key found, e.g. block.replica.name, it will
    be converted into appropriate dict, e.g. {'block':{'replica':{'name':val}}
    """
    if  not isinstance(row, dict):
        return
    for key in row.keys():
        newkey = mapper(system, key, api)
        val = row[key]
        if  newkey != key:
            row.pop(key)
            nkey, nval = convert_dot_notation(newkey, val)
            row.update({nkey:nval})
        if  isinstance(val, dict):
            row2das(mapper, system, api, val)
        elif isinstance(val, list):
            for item in val:
                if  isinstance(item, dict):
                    row2das(mapper, system, api, item)
コード例 #3
0
ファイル: das_mongocache.py プロジェクト: zdenekmaxa/DAS
 def merge_records(self, dasquery):
     """
     Merge DAS records for provided query. We perform the following
     steps:
     1. get all queries from das.cache by ordering them by primary key
     2. run aggregtor function to merge neighbors
     3. insert records into das.merge
     """
     self.logger.debug(dasquery)
     id_list = []
     expire  = 9999999999 # future
     # get all API records for given DAS query
     spec    = {'qhash':dasquery.qhash, 'query':{'$exists':True}}
     records = self.col.find(spec)
     for row in records:
         # find smallest expire timestamp to be used by aggregator
         if  row['das']['expire'] < expire:
             expire = row['das']['expire']
         if  row['_id'] not in id_list:
             id_list.append(row['_id'])
     inserted = 0
     lookup_keys = set()
     fields = dasquery.mongo_query.get('fields')
     if  not fields: # Mongo
         fields = []
     for key in fields:
         for pkey in self.mapping.mapkeys(key):
             lookup_keys.add(pkey)
     for pkey in lookup_keys:
         skey = [(pkey, DESCENDING)]
         # lookup all service records
         spec = {'das_id': {'$in': id_list}, 'das.primary_key': pkey}
         if  self.verbose:
             nrec = self.col.find(spec).sort(skey).count()
             msg  = "merging %s records, for %s key" % (nrec, pkey) 
         else:
             msg  = "merging records, for %s key" % pkey
         self.logger.debug(msg)
         records = self.col.find(spec).sort(skey)
         # aggregate all records
         agen = aggregator(dasquery, records, expire)
         # diff aggregated records
         gen  = das_diff(agen, self.mapping.diff_keys(pkey.split('.')[0]))
         # insert all records into das.merge using bulk insert
         size = self.cache_size
         try:
             while True:
                 nres = self.merge.insert(\
                     itertools.islice(gen, size), safe=True)
                 if  nres and isinstance(nres, list):
                     inserted += len(nres)
                 else:
                     break
         except InvalidDocument as exp:
             msg = "Caught bson error: " + str(exp)
             self.logger.info(msg)
             records = self.col.find(spec).sort(skey)
             gen = aggregator(dasquery, records, expire)
             genrows = parse2gridfs(self.gfs, pkey, gen, self.logger)
             das_dict = {'das':{'expire':expire, 'empty_record': 0,
                     'primary_key':[k for k in lookup_keys],
                     'system': ['gridfs']}, 'qhash':dasquery.qhash,
                     'cache_id':[], 'das_id': id_list}
             for row in genrows:
                 row.update(das_dict)
                 self.merge.insert(row, safe=True)
         except InvalidOperation:
             pass
     if  inserted:
         self.logdb.insert('merge', {'insert': inserted})
     elif  not lookup_keys: # we get query w/o fields
         pass
     else: # we didn't merge anything, it is DB look-up failure
         empty_expire = time.time() + 20 # secs, short enough to expire
         empty_record = {'das':{'expire':empty_expire,
                                'primary_key':list(lookup_keys),
                                'empty_record': 1},
                         'cache_id':[], 'das_id': id_list}
         for key, val in dasquery.mongo_query['spec'].iteritems():
             if  key.find('.') == -1:
                 empty_record[key] = []
             else: # it is compound key, e.g. site.name
                 newkey, newval = convert_dot_notation(key, val)
                 empty_record[newkey] = adjust_mongo_keyvalue(newval)
         self.merge.insert(empty_record, safe=True)
         # update DAS records (both meta and data ones, by using qhash)
         nval = {'$set': {'das.expire':empty_expire}}
         spec = {'qhash':dasquery.qhash}
         self.col.update(spec, nval, multi=True, safe=True)
コード例 #4
0
ファイル: das_mongocache.py プロジェクト: dmwm/DAS
    def merge_records(self, dasquery, attempt=0):
        """
        Merge DAS records for provided query. We perform the following
        steps:
        1. get all queries from das.cache by ordering them by primary key
        2. run aggregtor function to merge neighbors
        3. insert records into das.merge
        """
        ### TMP for asyncio
#         time.sleep(attempt+3) # pymongo 3.2 don't yet flush in time

        # remove any entries in merge collection for this query
        self.merge.delete_many({'qhash':dasquery.qhash})
        # proceed
        self.logger.debug(dasquery)
        id_list = []
        expire  = 9999999999 # future
        # get all API records for given DAS query
        spec    = {'qhash':dasquery.qhash,
                   'das.expire':{'$gt':time.time()},
                   'das.record':record_codes('query_record')}
        records = self.col.find(spec, **PYMONGO_OPTS)
        for row in records:
            # find smallest expire timestamp to be used by aggregator
            rexpire = row.get('das', {}).get('expire', expire)
            if  rexpire < expire:
                expire = rexpire
            if  row['_id'] not in id_list:
                id_list.append(row['_id'])
        inserted = 0
        lookup_keys = set()
        fields = dasquery.mongo_query.get('fields')
        if  not fields: # Mongo
            fields = []
        for key in fields:
            for pkey in self.mapping.mapkeys(key):
                lookup_keys.add(pkey)
        for pkey in lookup_keys:
            skey = [(pkey, DESCENDING)]
            # lookup all service records
            spec = {'das_id': {'$in': id_list}, 'das.primary_key': pkey}
            if  self.verbose:
                nrec = self.col.find(spec, **PYMONGO_OPTS).sort(skey).count()
                msg  = "merging %s records, for %s key" % (nrec, pkey)
            else:
                msg  = "merging records, for %s key" % pkey
            self.logger.debug(msg)
            # use exhaust=False since we process all records in aggregator
            # and it can be delay in processing
            records = self.col.find(spec, **PYMONGO_NOEXHAUST).sort(skey)
            # aggregate all records
            agen = aggregator(dasquery, records, expire)
            # diff aggregated records
            gen = das_diff(agen, self.mapping.diff_keys(pkey.split('.')[0]))
            # insert all records into das.merge using bulk insert
            size = self.cache_size
            try:
                res = self.merge.insert_many(gen)
                inserted += len(res.inserted_ids)
            except InvalidDocument as exp:
                print(dastimestamp('DAS WARNING'), 'InvalidDocument during merge', str(exp))
                msg = "Caught bson error: " + str(exp)
                self.logger.info(msg)
                records = self.col.find(spec, **PYMONGO_OPTS).sort(skey)
                gen = aggregator(dasquery, records, expire)
                genrows = parse2gridfs(self.gfs, pkey, gen, self.logger)
                das_dict = {'das':{'expire':expire,
                        'das.record': record_codes('gridfs_record'),
                        'primary_key':[k for k in lookup_keys],
                        'system': ['gridfs']}, 'qhash':dasquery.qhash,
                        'cache_id':[], 'das_id': id_list}
                for row in genrows:
                    row.update(das_dict)
                    self.merge.insert(row)
            except InvalidOperation as exp:
                pass
            except DuplicateKeyError as err:
                print(dastimestamp('DAS WARNING'), 'DuplicateKeyError during merge')
                if  not isinstance(gen, list):
                    raise err
        status = 'fail'
        if  inserted:
            status = 'ok'
        elif  not lookup_keys: # we get query w/o fields
            msg = 'qhash %s, no lookup_keys' % dasquery.qhash
            print(dastimestamp('DAS WARNING'), msg)
            status = 'ok'
        else: # we didn't merge anything, it is DB look-up failure
            msg  = 'qhash %s, did not insert into das.merge, attempt %s' \
                    % (dasquery.qhash, attempt)
            print(dastimestamp('DAS WARNING'), msg)
            empty_expire = etstamp()
            lkeys = list(lookup_keys)
            das = dict(expire=empty_expire, primary_key=lkeys[0],
                       condition_keys=lkeys,
                       instance=dasquery.instance,
                       system=['das'], services=dasquery.services,
                       record=record_codes('empty_record'),
                       ts=time.time(), api=[])
            empty_record = {'das':das, 'qhash': dasquery.qhash,
                            'cache_id':[], 'das_id': id_list}
            for key in lkeys:
                empty_record.update({key.split('.')[0]:[]})
            for key, val in dasquery.mongo_query['spec'].items():
                if  key.find('.') == -1:
                    empty_record[key] = []
                else: # it is compound key, e.g. site.name
                    newkey, newval = convert_dot_notation(key, val)
                    empty_record[newkey] = adjust_mongo_keyvalue(newval)
            self.merge.insert(empty_record)
            # update DAS records (both meta and data ones, by using qhash)
            nval = {'$set': {'das.expire':empty_expire}}
            spec = {'qhash':dasquery.qhash}
            self.col.update_many(spec, nval)
        return status
コード例 #5
0
    def merge_records(self, dasquery, attempt=0):
        """
        Merge DAS records for provided query. We perform the following
        steps:
        1. get all queries from das.cache by ordering them by primary key
        2. run aggregtor function to merge neighbors
        3. insert records into das.merge
        """
        ### TMP for asyncio
        #         time.sleep(attempt+3) # pymongo 3.2 don't yet flush in time

        # remove any entries in merge collection for this query
        self.merge.delete_many({'qhash': dasquery.qhash})
        # proceed
        self.logger.debug(dasquery)
        id_list = []
        expire = 9999999999  # future
        # get all API records for given DAS query
        spec = {
            'qhash': dasquery.qhash,
            'das.expire': {
                '$gt': time.time()
            },
            'das.record': record_codes('query_record')
        }
        records = self.col.find(spec, **PYMONGO_OPTS)
        for row in records:
            # find smallest expire timestamp to be used by aggregator
            rexpire = row.get('das', {}).get('expire', expire)
            if rexpire < expire:
                expire = rexpire
            if row['_id'] not in id_list:
                id_list.append(row['_id'])
        inserted = 0
        lookup_keys = set()
        fields = dasquery.mongo_query.get('fields')
        if not fields:  # Mongo
            fields = []
        for key in fields:
            for pkey in self.mapping.mapkeys(key):
                lookup_keys.add(pkey)
        for pkey in lookup_keys:
            skey = [(pkey, DESCENDING)]
            # lookup all service records
            spec = {'das_id': {'$in': id_list}, 'das.primary_key': pkey}
            if self.verbose:
                nrec = self.col.find(spec, **PYMONGO_OPTS).sort(skey).count()
                msg = "merging %s records, for %s key" % (nrec, pkey)
            else:
                msg = "merging records, for %s key" % pkey
            self.logger.debug(msg)
            # use exhaust=False since we process all records in aggregator
            # and it can be delay in processing
            records = self.col.find(spec, **PYMONGO_NOEXHAUST).sort(skey)
            # aggregate all records
            agen = aggregator(dasquery, records, expire)
            # diff aggregated records
            gen = das_diff(agen, self.mapping.diff_keys(pkey.split('.')[0]))
            # insert all records into das.merge using bulk insert
            size = self.cache_size
            try:
                res = self.merge.insert_many(gen)
                inserted += len(res.inserted_ids)
            except InvalidDocument as exp:
                print(dastimestamp('DAS WARNING'),
                      'InvalidDocument during merge', str(exp))
                msg = "Caught bson error: " + str(exp)
                self.logger.info(msg)
                records = self.col.find(spec, **PYMONGO_OPTS).sort(skey)
                gen = aggregator(dasquery, records, expire)
                genrows = parse2gridfs(self.gfs, pkey, gen, self.logger)
                das_dict = {
                    'das': {
                        'expire': expire,
                        'das.record': record_codes('gridfs_record'),
                        'primary_key': [k for k in lookup_keys],
                        'system': ['gridfs']
                    },
                    'qhash': dasquery.qhash,
                    'cache_id': [],
                    'das_id': id_list
                }
                for row in genrows:
                    row.update(das_dict)
                    self.merge.insert(row)
            except InvalidOperation as exp:
                pass
            except DuplicateKeyError as err:
                print(dastimestamp('DAS WARNING'),
                      'DuplicateKeyError during merge')
                if not isinstance(gen, list):
                    raise err
        status = 'fail'
        if inserted:
            status = 'ok'
        elif not lookup_keys:  # we get query w/o fields
            msg = 'qhash %s, no lookup_keys' % dasquery.qhash
            print(dastimestamp('DAS WARNING'), msg)
            status = 'ok'
        else:  # we didn't merge anything, it is DB look-up failure
            msg  = 'qhash %s, did not insert into das.merge, attempt %s' \
                    % (dasquery.qhash, attempt)
            print(dastimestamp('DAS WARNING'), msg)
            empty_expire = etstamp()
            lkeys = list(lookup_keys)
            das = dict(expire=empty_expire,
                       primary_key=lkeys[0],
                       condition_keys=lkeys,
                       instance=dasquery.instance,
                       system=['das'],
                       services=dasquery.services,
                       record=record_codes('empty_record'),
                       ts=time.time(),
                       api=[])
            empty_record = {
                'das': das,
                'qhash': dasquery.qhash,
                'cache_id': [],
                'das_id': id_list
            }
            for key in lkeys:
                empty_record.update({key.split('.')[0]: []})
            for key, val in dasquery.mongo_query['spec'].items():
                if key.find('.') == -1:
                    empty_record[key] = []
                else:  # it is compound key, e.g. site.name
                    newkey, newval = convert_dot_notation(key, val)
                    empty_record[newkey] = adjust_mongo_keyvalue(newval)
            self.merge.insert(empty_record)
            # update DAS records (both meta and data ones, by using qhash)
            nval = {'$set': {'das.expire': empty_expire}}
            spec = {'qhash': dasquery.qhash}
            self.col.update_many(spec, nval)
        return status