def add_members(self, system, urn, members): """ Add a list of data members for a given API (system, urn, url), and generate, which are stored as separate records. """ msg = "system=%s, urn=%s, members=%s)" % (system, urn, members) self.logger.info(msg) result = self.col.find_one({'system': system, 'urn': urn}) if result: self.col.update({'_id': result['_id']}, {'$addToSet': {'members': {'$each': members}}}) else: keys = self.mapping.api2daskey(system, urn) self.col.insert({'system': system, 'urn': urn, 'keys': keys, 'members': members}) for member in members: if not self.col.find_one({'member': member}): self.col.insert({'member': member, 'stems': self.stem(member)}) index_list = [('system', 1), ('urn', 1), ('members', 1), ('stems', 1)] create_indexes(self.col, index_list)
def init(self): """ Establish connection to MongoDB back-end and create DB. """ col = None try: conn = db_connection(self.dburi) if conn: dbc = conn[self.dbname] col = dbc[self.colname] # print "### DASMapping:init started successfully" except ConnectionFailure as _err: tstamp = dastimestamp("") thread = threading.current_thread() print "### MongoDB connection failure thread=%s, id=%s, time=%s" % (thread.name, thread.ident, tstamp) except Exception as exc: print_exc(exc) if col: index = [ ("type", DESCENDING), ("system", DESCENDING), ("urn", DESCENDING), ("das_map.das_key", DESCENDING), ("das_map.rec_key", DESCENDING), ("das_map.api_arg", DESCENDING), ] create_indexes(col, index)
def __init__(self, dburi, dbname='das', dbcoll='requests', lifetime=86400): self.con = db_connection(dburi) self.col = self.con[dbname][dbcoll] self.hold = self.con[dbname][dbcoll + '_onhold'] create_indexes(self.col , [('ts', ASCENDING)]) create_indexes(self.hold, [('ts', ASCENDING)]) self.lifetime = lifetime # default 1 hour
def add_summary(self, identifier, start, finish, **payload): """ Add an analyzer summary, with given analyzer identifier, start and finish times and payload. It is intended that a summary document is deposited on each run of an analyzer (if desirable) and is thereafter immutable. """ msg = '(%s, %s->%s, %s)' % (identifier, start, finish, payload) self.logger.debug(msg) # clean-up analyzer records whose start timestamp is too old spec = {'start':{'$lt':time.time()-self.history}, 'analyzer': {'$exists': True}} self.col.remove(spec) # insert new analyzer record record = {'analyzer':identifier, 'start': start, 'finish': finish} payload.update(record) #ensure key fields are set correctly self.col.insert(payload) # ensure summary items are indexed for quick extract create_indexes(self.col, [('analyzer', DESCENDING), ('start', ASCENDING)])
def init(self): """Takes care of MongoDB connection""" try: indexes = [('dataset', DESCENDING), ('site', DESCENDING), ('ts', DESCENDING)] for index in indexes: create_indexes(self.coll, [index]) dasmapping = DASMapping(self.dasconfig) service_name = self.config.get('name', 'combined') service_api = self.config.get('api', 'dataset4site_release') mapping = dasmapping.servicemap(service_name) self.urls = mapping[service_api]['services'] self.expire = mapping[service_api]['expire'] services = self.dasconfig['services'] if not self.wthr: # Worker thread which update dbs/phedex DB self.wthr = start_new_thread('dbs_phedex_worker', worker, \ (self.urls, which_dbs, self.uri, \ self.dbname, self.collname, self.expire)) msg = "### DBSPhedexService:init started" print(msg) except Exception as exc: print("### Fail DBSPhedexService:init\n", str(exc)) self.urls = None self.expire = 60 self.wthr = None
def add_api(self, system, query, api, args): """ Add API info to analytics DB. Here args is a dict of API parameters. """ orig_query = query if isinstance(query, dict): query = encode_mongo_query(query) msg = '(%s, %s, %s, %s)' % (system, query, api, args) self.logger.debug(msg) # find query record qhash = genkey(query) record = self.col.find_one({'qhash':qhash}, fields=['dasquery']) if not record: self.add_query("", orig_query) # find api record record = self.col.find_one({'qhash':qhash, 'system':system, 'api.name':api, 'api.params':args}) apidict = dict(name=api, params=args) if record: self.col.update({'_id':record['_id']}, {'$inc':{'counter':1}}) else: record = dict(system=system, api=apidict, qhash=qhash, counter=1) self.col.insert(record) index = [('system', DESCENDING), ('dasquery', DESCENDING), ('api.name', DESCENDING), ('qhash', DESCENDING) ] create_indexes(self.col, index)
def add_query(self, query, mongoquery): """ Add DAS-QL/MongoDB-QL queries into analytics. A unique record is contained for each (qhash, dhash) pair. For each an array of call-times is contained. """ if isinstance(mongoquery, dict): mongoquery = encode_mongo_query(mongoquery) msg = 'query=%s, mongoquery=%s' % (query, mongoquery) self.logger.debug(msg) dhash = genkey(query) qhash = genkey(mongoquery) now = time.time() existing = self.col.find_one({'qhash': qhash, 'dhash': dhash}) if existing: # check if times contains very old timestamps rec = self.col.find({'_id': ObjectId(existing['_id']), 'times':{'$lt' : now - self.history}}) if rec: self.col.update({'_id': ObjectId(existing['_id'])}, {'$pull': {'times': {'$lt' : now - self.history}}}) # update times array with new timestamp self.col.update({'_id': ObjectId(existing['_id'])}, {'$push': {'times': now}}) else: record = dict(query=query, mongoquery=mongoquery, qhash=qhash, dhash=dhash, times=[now]) self.col.insert(record) index = [('qhash', DESCENDING), ('dhash', DESCENDING)] create_indexes(self.col, index)
def __new__(cls, config): """ creates a new instance of the class and cache it or return an existing instance if one exists (only when the params match). only the last instance is cached, but this simplifies the implementation as the param 'config' might be a complex unhashable object. """ # check if we can reuse an existing instance if cls.__cached_inst and cls.__cached_params == config: if config['verbose']: print("DASMapping::__new__: returning a cached instance") return cls.__cached_inst # otherwise create and initialize a new instance if config['verbose']: print("DASMapping::__new__: creating a new instance") self = object.__new__(cls) self.verbose = config['verbose'] self.logger = PrintManager('DASMapping', self.verbose) self.services = config['services'] self.dburi = config['mongodb']['dburi'] self.dbname = config['mappingdb']['dbname'] self.colname = config['mappingdb']['collname'] self.map_test = config.get('map_test', True) self.main_dbs = config['das'].get('main_dbs', 'dbs3') self.dbsinsts = config['das'].get('dbs_instances', []) msg = "%s@%s" % (self.dburi, self.dbname) self.logger.info(msg) self.das_son_manipulator = DAS_SONManipulator() index = [('type', DESCENDING),\ ('system', DESCENDING),\ ('urn', DESCENDING),\ ('das_map.das_key', DESCENDING),\ ('das_map.rec_key', DESCENDING),\ ('das_map.api_arg', DESCENDING),\ ] create_indexes(self.col, index) self.daskeyscache = {} # to be filled at run time self.systems = [] # to be filled at run time self.dasmapscache = {} # to be filled at run time self.keymap = {} # to be filled at run time self.presentationcache = {} # to be filled at run time self.reverse_presentation = {} # to be filled at run time self.notationcache = {} # to be filled at run time self.diffkeycache = {} # to be filled at run time self.apicache = {} # to be filled at run time self.dbs_global_url = None # to be determined at run time self.dbs_inst_names = None # to be determined at run time self.load_maps() # cache the instance and return it DASMapping.__cached_inst = self DASMapping.__cached_params = config return self
def add_mapreduce(self, name, fmap, freduce): """ Add mapreduce record and assign it to given name. """ print("Add %s map/reduce function" % name) exists = find_one(self.mapreduce, {'name':name}) if exists: raise Exception('Map/reduce functions for %s already exists' % name) self.mapreduce.insert(dict(name=name, map=fmap, reduce=freduce)) create_indexes(self.mapreduce, [('name', DESCENDING)])
def create_db(self): """ Create db collection """ conn = db_connection(self.dburi) dbn = conn[self.dbname] if self.colname not in dbn.collection_names(): dbn.create_collection(self.colname, capped=True, size=self.sizecap) col = dbn[self.colname] index_list = [('qhash', DESCENDING)] create_indexes(col, index_list)
def add_mapreduce(self, name, fmap, freduce): """ Add mapreduce record and assign it to given name. """ print("Add %s map/reduce function" % name) exists = find_one(self.mapreduce, {'name': name}) if exists: raise Exception('Map/reduce functions for %s already exists' % name) self.mapreduce.insert(dict(name=name, map=fmap, reduce=freduce)) create_indexes(self.mapreduce, [('name', DESCENDING)])
def init(self): """Takes care of MongoDB connection""" try: conn = db_connection(self.uri) self.coll = conn[self.dbname][self.collname] indexes = [('name', DESCENDING), ('site', DESCENDING), ('timestamp', DESCENDING)] for index in indexes: create_indexes(self.coll, [index]) except Exception, _exp: self.coll = None
def create_db(self): """ Create db collection """ conn = db_connection(self.dburi) dbn = conn[self.dbname] if self.colname not in dbn.collection_names(): dbn.create_collection(self.colname, capped=True, size=self.sizecap) col = dbn[self.colname] index_list = [('qhash', DESCENDING)] create_indexes(col, index_list)
def init(self): """ Init db connection and check that it is alive """ try: indexes = [('value', ASCENDING), ('ts', ASCENDING)] create_indexes(self.col, indexes) if not KEEP_EXISTING_RECORDS_ON_RESTART: self.col.remove() except Exception as exc: print(dastimestamp(), exc)
def init(self): """ Init db connection and check that it is alive """ try: indexes = [('dataset', ASCENDING), ('ts', ASCENDING)] create_indexes(self.col, indexes) if not self.preserve_on_restart: self.col.delete_many({}) except Exception as _exp: pass
def init(self): """ Init db connection and check that it is alive """ try: indexes = [("dataset", ASCENDING), ("ts", ASCENDING)] create_indexes(self.col, indexes) if not self.preserve_on_restart: self.col.remove() except Exception as _exp: pass
def init(self): """ Init db connection and check that it is alive """ try: indexes = [('value', ASCENDING), ('ts', ASCENDING)] create_indexes(self.col, indexes) if not KEEP_EXISTING_RECORDS_ON_RESTART: self.col.remove() except Exception as exc: print(dastimestamp(), exc)
def init(self): """ Init db connection and check that it is alive """ try: conn = db_connection(self.dburi) self.col = conn[self.dbname][self.dbcoll] indexes = [('dataset', ASCENDING), ('ts', ASCENDING)] create_indexes(self.col, indexes) self.col.remove() except Exception as _exp: self.col = None if not is_db_alive(self.dburi): self.col = None
def __init__(self, config): self.verbose = config['verbose'] self.logger = PrintManager('DASKeyLearning', self.verbose) self.services = config['services'] self.dburi = config['mongodb']['dburi'] self.dbname = config['keylearningdb']['dbname'] self.colname = config['keylearningdb']['collname'] self.mapping = config['dasmapping'] msg = "%s@%s" % (self.dburi, self.dbname) self.logger.info(msg) self.das_son_manipulator = DAS_SONManipulator() index_list = [('system', ASCENDING), ('urn', ASCENDING), \ ('members', ASCENDING), ('stems', ASCENDING)] create_indexes(self.col, index_list)
def __init__(self, config): self.verbose = config['verbose'] self.logger = PrintManager('DASKeyLearning', self.verbose) self.services = config['services'] self.dburi = config['mongodb']['dburi'] self.dbname = config['keylearningdb']['dbname'] self.colname = config['keylearningdb']['collname'] self.mapping = config['dasmapping'] msg = "%s@%s" % (self.dburi, self.dbname) self.logger.info(msg) self.das_son_manipulator = DAS_SONManipulator() index_list = [('system', ASCENDING), ('urn', ASCENDING), \ ('members', ASCENDING), ('stems', ASCENDING)] create_indexes(self.col, index_list)
def add(self, record): """ Add new record into mapping DB. Example of URI record .. doctest:: { system:dbs, urn : listBlocks, url : "http://a.b.com/api" params : [ {"apiversion":1_2_2, test:"*"} ] daskeys: [ {"key" : "block", "map":"block.name", "pattern":""} ] das2api: [ {"das_key":"site", "api_param":"se", "pattern":"re.compile('^T[0-3]_')"} ] } Example of notation record: .. doctest:: notations: [ {"notation" : "storage_element_name", "map":"site", "api": ""}, ] """ msg = "record=%s" % record self.logger.debug(msg) self.col.insert(record) index = None if record.has_key("urn"): index = [("system", DESCENDING), ("daskeys", DESCENDING), ("urn", DESCENDING)] elif record.has_key("notations"): index = [("system", DESCENDING), ("notations.api_param", DESCENDING)] elif record.has_key("presentation"): index = [] else: msg = "Invalid record %s" % record raise Exception(msg) if index: create_indexes(self.col, index)
def insert_apicall(self, system, query, url, api, api_params, expire): """ Remove obsolete apicall records and insert into Analytics DB provided information about API call. Moved from AbstractService. Updated so that we do not have multiple records when performing forced updates (ie, the old record is not yet expired) - now look for an existing record with the same parameters (I'm hoping the fact that some of the variables are indexed will make this fast even though not all are), and if it exists just update the expiry. Otherwise insert a new record. """ msg = 'query=%s, url=%s,' % (query, url) msg += 'api=%s, args=%s, expire=%s' % (api, api_params, expire) self.logger.debug(msg) expire = expire_timestamp(expire) query = encode_mongo_query(query) qhash = genkey(query) self.remove_expired() existing = self.col.find_one({'apicall.system': system, 'apicall.url': url, 'apicall.api': api, 'apicall.api_params': api_params, 'apicall.qhash': qhash}) if existing: self.logger.debug("updating") self.col.update({'_id': existing['_id']}, {'$set':{'apicall.expire': expire}}) else: self.col.insert({'apicall':{'api_params': api_params, 'url': url, 'api': api, 'system': system, 'expire': expire, 'qhash': qhash}}) index_list = [('apicall.url', DESCENDING), ('apicall.api', DESCENDING), ('qhash', DESCENDING)] create_indexes(self.col, index_list)
def __init__(self, config): self.emptyset_expire = expire_timestamp(\ config['das'].get('emptyset_expire', 5)) self.dburi = config['mongodb']['dburi'] self.cache_size = config['mongodb']['bulkupdate_size'] self.dbname = config['dasdb']['dbname'] self.verbose = config['verbose'] self.logger = PrintManager('DASMongocache', self.verbose) self.mapping = config['dasmapping'] self.conn = db_connection(self.dburi) self.mdb = self.conn[self.dbname] self.col = self.mdb[config['dasdb']['cachecollection']] self.mrcol = self.mdb[config['dasdb']['mrcollection']] self.merge = self.mdb[config['dasdb']['mergecollection']] self.gfs = db_gridfs(self.dburi) self.logdb = DASLogdb(config) self.das_internal_keys = ['das_id', 'das', 'cache_id', 'qhash'] msg = "%s@%s" % (self.dburi, self.dbname) self.logger.info(msg) self.add_manipulator() # ensure that we have the following indexes index_list = [('das.expire', ASCENDING), ('das_id', ASCENDING), ('das.system', ASCENDING), ('qhash', DESCENDING), ('das.empty_record', ASCENDING)] create_indexes(self.col, index_list) index_list = [('das.expire', ASCENDING), ('das_id', ASCENDING), ('qhash', DESCENDING), ('das.empty_record', ASCENDING), ('das.ts', ASCENDING)] create_indexes(self.merge, index_list)
def __init__(self, config): self.config = config self.emptyset_expire = \ expire_timestamp(config['das'].get('emptyset_expire', 5)) self.dburi = config['mongodb']['dburi'] self.cache_size = config['mongodb']['bulkupdate_size'] self.dbname = config['dasdb']['dbname'] self.verbose = config['verbose'] self.logger = PrintManager('DASMongocache', self.verbose) self.mapping = config['dasmapping'] self.logging = config['dasdb'].get('logging', False) self.rec_ttl = config['dasdb'].get('record_ttl', 24 * 60 * 60) self.del_ttl = config['dasdb'].get('delta_ttl', 60) self.cleanup_del_ttl = config['dasdb'].get('cleanup_delta_ttl', 3600) self.retry = config['dasdb'].get('retry', 3) self.das_son_manipulator = DAS_SONManipulator() # Initialize MongoDB connection self.col_ = self.config['dasdb']['cachecollection'] self.mrcol_ = self.config['dasdb']['mrcollection'] self.merge_ = self.config['dasdb']['mergecollection'] self.gfs = db_gridfs(self.dburi) msg = "%s@%s" % (self.dburi, self.dbname) self.logger.info(msg) # ensure that we have the following indexes common_idx = [ ('file.name', DESCENDING), ('dataset.name', DESCENDING), ('block.name', DESCENDING), ('run.run_number', DESCENDING), ] index_list = [('das.expire', ASCENDING), ('das_id', ASCENDING), ('das.system', ASCENDING), ('qhash', DESCENDING), ('das.record', ASCENDING)] create_indexes(self.col, index_list + common_idx) index_list = [('das.expire', ASCENDING), ('das_id', ASCENDING), ('qhash', DESCENDING), ('das.record', ASCENDING), ('das.ts', ASCENDING)] create_indexes(self.merge, index_list) # NOTE: I found that creating index in merge collection leads to # MongoDB error when records contains multiple arrays on indexed # keys. For example, when we query file,run,lumi both file and run # are arrays in MongoDB. In this case the final sort in MongoDB # bark with the following message: # cannot sort with keys that are parallel arrays # it looks like that there is no fix for that yet # see # http://stackoverflow.com/questions/6516725/how-do-i-index-two-arrays-in-mongodb # therefore I temporary disabled create_indexes call on merge # collection which was used to have index to ease final sort, # especially in a case when a lot of records correspond to inital # query, e.g. file records. # On another hand, the most common use case where sort fails is # getting file records, and I can add one compound key to ease sort # but I can't add another compound key on array field, e.g. run common_idx = [[('qhash', DESCENDING), ('file.name', DESCENDING)]] create_indexes(self.merge, index_list + common_idx) # thread which clean-up DAS collections thname = 'mongocache_cleanup' cols = [ config['dasdb']['cachecollection'], config['dasdb']['mrcollection'], config['dasdb']['mergecollection'] ]
def __init__(self, config): self.config = config self.emptyset_expire = \ expire_timestamp(config['das'].get('emptyset_expire', 5)) self.dburi = config['mongodb']['dburi'] self.cache_size = config['mongodb']['bulkupdate_size'] self.dbname = config['dasdb']['dbname'] self.verbose = config['verbose'] self.logger = PrintManager('DASMongocache', self.verbose) self.mapping = config['dasmapping'] self.logging = config['dasdb'].get('logging', False) self.rec_ttl = config['dasdb'].get('record_ttl', 24*60*60) self.del_ttl = config['dasdb'].get('delta_ttl', 60) self.cleanup_del_ttl = config['dasdb'].get('cleanup_delta_ttl', 3600) self.retry = config['dasdb'].get('retry', 3) self.das_son_manipulator = DAS_SONManipulator() # Initialize MongoDB connection self.col_ = self.config['dasdb']['cachecollection'] self.mrcol_ = self.config['dasdb']['mrcollection'] self.merge_ = self.config['dasdb']['mergecollection'] self.gfs = db_gridfs(self.dburi) msg = "%s@%s" % (self.dburi, self.dbname) self.logger.info(msg) # ensure that we have the following indexes common_idx = [ ('file.name', DESCENDING), ('dataset.name', DESCENDING), ('block.name', DESCENDING), ('run.run_number', DESCENDING), ] index_list = [('das.expire', ASCENDING), ('das_id', ASCENDING), ('das.system', ASCENDING), ('qhash', DESCENDING), ('das.record', ASCENDING)] create_indexes(self.col, index_list + common_idx) index_list = [('das.expire', ASCENDING), ('das_id', ASCENDING), ('qhash', DESCENDING), ('das.record', ASCENDING), ('das.ts', ASCENDING)] create_indexes(self.merge, index_list) # NOTE: I found that creating index in merge collection leads to # MongoDB error when records contains multiple arrays on indexed # keys. For example, when we query file,run,lumi both file and run # are arrays in MongoDB. In this case the final sort in MongoDB # bark with the following message: # cannot sort with keys that are parallel arrays # it looks like that there is no fix for that yet # see # http://stackoverflow.com/questions/6516725/how-do-i-index-two-arrays-in-mongodb # therefore I temporary disabled create_indexes call on merge # collection which was used to have index to ease final sort, # especially in a case when a lot of records correspond to inital # query, e.g. file records. # On another hand, the most common use case where sort fails is # getting file records, and I can add one compound key to ease sort # but I can't add another compound key on array field, e.g. run common_idx = [[('qhash', DESCENDING), ('file.name', DESCENDING)]] create_indexes(self.merge, index_list + common_idx) # thread which clean-up DAS collections thname = 'mongocache_cleanup' cols = [config['dasdb']['cachecollection'], config['dasdb']['mrcollection'], config['dasdb']['mergecollection']]