def translate(notations, api, rec): """ Translate given row to DAS notations according to provided notations and api. Each entry in notations list is a form of .. doctest:: {"notation":"site.resource.name", "map":"site.name", "api":""} """ for row in notations: count = 0 notation = row['notation'] dasmap = row['map'] api2use = row['api'] if not api2use or api2use == api: record = dict(rec) rows = access(rec, notation) keys = dasmap.split(".") keys.reverse() for val in rows: item, newval = convert_dot_notation(dasmap, val) recval = record[item] if isinstance(recval, dict): recval.update(newval) else: record[item] = newval count += 1 delete_elem(record, notation) yield record if not count: yield rec
def row2das(mapper, system, api, row): """ Transform keys of row into DAS notations, e.g. bytes to size If compound key found, e.g. block.replica.name, it will be converted into appropriate dict, e.g. {'block':{'replica':{'name':val}} """ if not isinstance(row, dict): return for key in row.keys(): newkey = mapper(system, key, api) val = row[key] if newkey != key: row.pop(key) nkey, nval = convert_dot_notation(newkey, val) row.update({nkey:nval}) if isinstance(val, dict): row2das(mapper, system, api, val) elif isinstance(val, list): for item in val: if isinstance(item, dict): row2das(mapper, system, api, item)
def merge_records(self, dasquery): """ Merge DAS records for provided query. We perform the following steps: 1. get all queries from das.cache by ordering them by primary key 2. run aggregtor function to merge neighbors 3. insert records into das.merge """ self.logger.debug(dasquery) id_list = [] expire = 9999999999 # future # get all API records for given DAS query spec = {'qhash':dasquery.qhash, 'query':{'$exists':True}} records = self.col.find(spec) for row in records: # find smallest expire timestamp to be used by aggregator if row['das']['expire'] < expire: expire = row['das']['expire'] if row['_id'] not in id_list: id_list.append(row['_id']) inserted = 0 lookup_keys = set() fields = dasquery.mongo_query.get('fields') if not fields: # Mongo fields = [] for key in fields: for pkey in self.mapping.mapkeys(key): lookup_keys.add(pkey) for pkey in lookup_keys: skey = [(pkey, DESCENDING)] # lookup all service records spec = {'das_id': {'$in': id_list}, 'das.primary_key': pkey} if self.verbose: nrec = self.col.find(spec).sort(skey).count() msg = "merging %s records, for %s key" % (nrec, pkey) else: msg = "merging records, for %s key" % pkey self.logger.debug(msg) records = self.col.find(spec).sort(skey) # aggregate all records agen = aggregator(dasquery, records, expire) # diff aggregated records gen = das_diff(agen, self.mapping.diff_keys(pkey.split('.')[0])) # insert all records into das.merge using bulk insert size = self.cache_size try: while True: nres = self.merge.insert(\ itertools.islice(gen, size), safe=True) if nres and isinstance(nres, list): inserted += len(nres) else: break except InvalidDocument as exp: msg = "Caught bson error: " + str(exp) self.logger.info(msg) records = self.col.find(spec).sort(skey) gen = aggregator(dasquery, records, expire) genrows = parse2gridfs(self.gfs, pkey, gen, self.logger) das_dict = {'das':{'expire':expire, 'empty_record': 0, 'primary_key':[k for k in lookup_keys], 'system': ['gridfs']}, 'qhash':dasquery.qhash, 'cache_id':[], 'das_id': id_list} for row in genrows: row.update(das_dict) self.merge.insert(row, safe=True) except InvalidOperation: pass if inserted: self.logdb.insert('merge', {'insert': inserted}) elif not lookup_keys: # we get query w/o fields pass else: # we didn't merge anything, it is DB look-up failure empty_expire = time.time() + 20 # secs, short enough to expire empty_record = {'das':{'expire':empty_expire, 'primary_key':list(lookup_keys), 'empty_record': 1}, 'cache_id':[], 'das_id': id_list} for key, val in dasquery.mongo_query['spec'].iteritems(): if key.find('.') == -1: empty_record[key] = [] else: # it is compound key, e.g. site.name newkey, newval = convert_dot_notation(key, val) empty_record[newkey] = adjust_mongo_keyvalue(newval) self.merge.insert(empty_record, safe=True) # update DAS records (both meta and data ones, by using qhash) nval = {'$set': {'das.expire':empty_expire}} spec = {'qhash':dasquery.qhash} self.col.update(spec, nval, multi=True, safe=True)
def merge_records(self, dasquery, attempt=0): """ Merge DAS records for provided query. We perform the following steps: 1. get all queries from das.cache by ordering them by primary key 2. run aggregtor function to merge neighbors 3. insert records into das.merge """ ### TMP for asyncio # time.sleep(attempt+3) # pymongo 3.2 don't yet flush in time # remove any entries in merge collection for this query self.merge.delete_many({'qhash':dasquery.qhash}) # proceed self.logger.debug(dasquery) id_list = [] expire = 9999999999 # future # get all API records for given DAS query spec = {'qhash':dasquery.qhash, 'das.expire':{'$gt':time.time()}, 'das.record':record_codes('query_record')} records = self.col.find(spec, **PYMONGO_OPTS) for row in records: # find smallest expire timestamp to be used by aggregator rexpire = row.get('das', {}).get('expire', expire) if rexpire < expire: expire = rexpire if row['_id'] not in id_list: id_list.append(row['_id']) inserted = 0 lookup_keys = set() fields = dasquery.mongo_query.get('fields') if not fields: # Mongo fields = [] for key in fields: for pkey in self.mapping.mapkeys(key): lookup_keys.add(pkey) for pkey in lookup_keys: skey = [(pkey, DESCENDING)] # lookup all service records spec = {'das_id': {'$in': id_list}, 'das.primary_key': pkey} if self.verbose: nrec = self.col.find(spec, **PYMONGO_OPTS).sort(skey).count() msg = "merging %s records, for %s key" % (nrec, pkey) else: msg = "merging records, for %s key" % pkey self.logger.debug(msg) # use exhaust=False since we process all records in aggregator # and it can be delay in processing records = self.col.find(spec, **PYMONGO_NOEXHAUST).sort(skey) # aggregate all records agen = aggregator(dasquery, records, expire) # diff aggregated records gen = das_diff(agen, self.mapping.diff_keys(pkey.split('.')[0])) # insert all records into das.merge using bulk insert size = self.cache_size try: res = self.merge.insert_many(gen) inserted += len(res.inserted_ids) except InvalidDocument as exp: print(dastimestamp('DAS WARNING'), 'InvalidDocument during merge', str(exp)) msg = "Caught bson error: " + str(exp) self.logger.info(msg) records = self.col.find(spec, **PYMONGO_OPTS).sort(skey) gen = aggregator(dasquery, records, expire) genrows = parse2gridfs(self.gfs, pkey, gen, self.logger) das_dict = {'das':{'expire':expire, 'das.record': record_codes('gridfs_record'), 'primary_key':[k for k in lookup_keys], 'system': ['gridfs']}, 'qhash':dasquery.qhash, 'cache_id':[], 'das_id': id_list} for row in genrows: row.update(das_dict) self.merge.insert(row) except InvalidOperation as exp: pass except DuplicateKeyError as err: print(dastimestamp('DAS WARNING'), 'DuplicateKeyError during merge') if not isinstance(gen, list): raise err status = 'fail' if inserted: status = 'ok' elif not lookup_keys: # we get query w/o fields msg = 'qhash %s, no lookup_keys' % dasquery.qhash print(dastimestamp('DAS WARNING'), msg) status = 'ok' else: # we didn't merge anything, it is DB look-up failure msg = 'qhash %s, did not insert into das.merge, attempt %s' \ % (dasquery.qhash, attempt) print(dastimestamp('DAS WARNING'), msg) empty_expire = etstamp() lkeys = list(lookup_keys) das = dict(expire=empty_expire, primary_key=lkeys[0], condition_keys=lkeys, instance=dasquery.instance, system=['das'], services=dasquery.services, record=record_codes('empty_record'), ts=time.time(), api=[]) empty_record = {'das':das, 'qhash': dasquery.qhash, 'cache_id':[], 'das_id': id_list} for key in lkeys: empty_record.update({key.split('.')[0]:[]}) for key, val in dasquery.mongo_query['spec'].items(): if key.find('.') == -1: empty_record[key] = [] else: # it is compound key, e.g. site.name newkey, newval = convert_dot_notation(key, val) empty_record[newkey] = adjust_mongo_keyvalue(newval) self.merge.insert(empty_record) # update DAS records (both meta and data ones, by using qhash) nval = {'$set': {'das.expire':empty_expire}} spec = {'qhash':dasquery.qhash} self.col.update_many(spec, nval) return status
def merge_records(self, dasquery, attempt=0): """ Merge DAS records for provided query. We perform the following steps: 1. get all queries from das.cache by ordering them by primary key 2. run aggregtor function to merge neighbors 3. insert records into das.merge """ ### TMP for asyncio # time.sleep(attempt+3) # pymongo 3.2 don't yet flush in time # remove any entries in merge collection for this query self.merge.delete_many({'qhash': dasquery.qhash}) # proceed self.logger.debug(dasquery) id_list = [] expire = 9999999999 # future # get all API records for given DAS query spec = { 'qhash': dasquery.qhash, 'das.expire': { '$gt': time.time() }, 'das.record': record_codes('query_record') } records = self.col.find(spec, **PYMONGO_OPTS) for row in records: # find smallest expire timestamp to be used by aggregator rexpire = row.get('das', {}).get('expire', expire) if rexpire < expire: expire = rexpire if row['_id'] not in id_list: id_list.append(row['_id']) inserted = 0 lookup_keys = set() fields = dasquery.mongo_query.get('fields') if not fields: # Mongo fields = [] for key in fields: for pkey in self.mapping.mapkeys(key): lookup_keys.add(pkey) for pkey in lookup_keys: skey = [(pkey, DESCENDING)] # lookup all service records spec = {'das_id': {'$in': id_list}, 'das.primary_key': pkey} if self.verbose: nrec = self.col.find(spec, **PYMONGO_OPTS).sort(skey).count() msg = "merging %s records, for %s key" % (nrec, pkey) else: msg = "merging records, for %s key" % pkey self.logger.debug(msg) # use exhaust=False since we process all records in aggregator # and it can be delay in processing records = self.col.find(spec, **PYMONGO_NOEXHAUST).sort(skey) # aggregate all records agen = aggregator(dasquery, records, expire) # diff aggregated records gen = das_diff(agen, self.mapping.diff_keys(pkey.split('.')[0])) # insert all records into das.merge using bulk insert size = self.cache_size try: res = self.merge.insert_many(gen) inserted += len(res.inserted_ids) except InvalidDocument as exp: print(dastimestamp('DAS WARNING'), 'InvalidDocument during merge', str(exp)) msg = "Caught bson error: " + str(exp) self.logger.info(msg) records = self.col.find(spec, **PYMONGO_OPTS).sort(skey) gen = aggregator(dasquery, records, expire) genrows = parse2gridfs(self.gfs, pkey, gen, self.logger) das_dict = { 'das': { 'expire': expire, 'das.record': record_codes('gridfs_record'), 'primary_key': [k for k in lookup_keys], 'system': ['gridfs'] }, 'qhash': dasquery.qhash, 'cache_id': [], 'das_id': id_list } for row in genrows: row.update(das_dict) self.merge.insert(row) except InvalidOperation as exp: pass except DuplicateKeyError as err: print(dastimestamp('DAS WARNING'), 'DuplicateKeyError during merge') if not isinstance(gen, list): raise err status = 'fail' if inserted: status = 'ok' elif not lookup_keys: # we get query w/o fields msg = 'qhash %s, no lookup_keys' % dasquery.qhash print(dastimestamp('DAS WARNING'), msg) status = 'ok' else: # we didn't merge anything, it is DB look-up failure msg = 'qhash %s, did not insert into das.merge, attempt %s' \ % (dasquery.qhash, attempt) print(dastimestamp('DAS WARNING'), msg) empty_expire = etstamp() lkeys = list(lookup_keys) das = dict(expire=empty_expire, primary_key=lkeys[0], condition_keys=lkeys, instance=dasquery.instance, system=['das'], services=dasquery.services, record=record_codes('empty_record'), ts=time.time(), api=[]) empty_record = { 'das': das, 'qhash': dasquery.qhash, 'cache_id': [], 'das_id': id_list } for key in lkeys: empty_record.update({key.split('.')[0]: []}) for key, val in dasquery.mongo_query['spec'].items(): if key.find('.') == -1: empty_record[key] = [] else: # it is compound key, e.g. site.name newkey, newval = convert_dot_notation(key, val) empty_record[newkey] = adjust_mongo_keyvalue(newval) self.merge.insert(empty_record) # update DAS records (both meta and data ones, by using qhash) nval = {'$set': {'das.expire': empty_expire}} spec = {'qhash': dasquery.qhash} self.col.update_many(spec, nval) return status