Beispiel #1
0
 def add_members(self, system, urn, members):
     """
     Add a list of data members for a given API (system, urn, url),
     and generate, which are stored as separate records.
     """
     msg = "system=%s, urn=%s, members=%s)" % (system, urn, members)
     self.logger.info(msg)
     
     result = self.col.find_one({'system': system, 'urn': urn})
     if result:       
         self.col.update({'_id': result['_id']},
                         {'$addToSet': {'members': {'$each': members}}})
     else:
         keys = self.mapping.api2daskey(system, urn)
         self.col.insert({'system': system,
                          'urn': urn,
                          'keys': keys,
                          'members': members})
             
     for member in members:
         if not self.col.find_one({'member': member}):
             self.col.insert({'member': member,
                              'stems': self.stem(member)})
             
     index_list = [('system', 1), ('urn', 1), ('members', 1), ('stems', 1)]
     create_indexes(self.col, index_list)
Beispiel #2
0
 def init(self):
     """
     Establish connection to MongoDB back-end and create DB.
     """
     col = None
     try:
         conn = db_connection(self.dburi)
         if conn:
             dbc = conn[self.dbname]
             col = dbc[self.colname]
     #            print "### DASMapping:init started successfully"
     except ConnectionFailure as _err:
         tstamp = dastimestamp("")
         thread = threading.current_thread()
         print "### MongoDB connection failure thread=%s, id=%s, time=%s" % (thread.name, thread.ident, tstamp)
     except Exception as exc:
         print_exc(exc)
     if col:
         index = [
             ("type", DESCENDING),
             ("system", DESCENDING),
             ("urn", DESCENDING),
             ("das_map.das_key", DESCENDING),
             ("das_map.rec_key", DESCENDING),
             ("das_map.api_arg", DESCENDING),
         ]
         create_indexes(col, index)
Beispiel #3
0
 def __init__(self, dburi, dbname='das', dbcoll='requests', lifetime=86400):
     self.con  = db_connection(dburi)
     self.col  = self.con[dbname][dbcoll]
     self.hold = self.con[dbname][dbcoll + '_onhold']
     create_indexes(self.col , [('ts', ASCENDING)])
     create_indexes(self.hold, [('ts', ASCENDING)])
     self.lifetime = lifetime # default 1 hour
Beispiel #4
0
    def add_summary(self, identifier, start, finish, **payload):
        """
        Add an analyzer summary, with given analyzer identifier,
        start and finish times and payload.
        
        It is intended that a summary document is deposited on
        each run of an analyzer (if desirable) and is thereafter
        immutable.
        """
        msg = '(%s, %s->%s, %s)' % (identifier, start, finish, payload)
        self.logger.debug(msg)
        
        # clean-up analyzer records whose start timestamp is too old
        spec = {'start':{'$lt':time.time()-self.history},
                'analyzer': {'$exists': True}}
        self.col.remove(spec)

        # insert new analyzer record
        record = {'analyzer':identifier,
                  'start': start,
                  'finish': finish}
        payload.update(record) #ensure key fields are set correctly
        self.col.insert(payload)
        # ensure summary items are indexed for quick extract
        create_indexes(self.col, [('analyzer', DESCENDING), ('start', ASCENDING)])
Beispiel #5
0
 def init(self):
     """Takes care of MongoDB connection"""
     try:
         indexes = [('dataset', DESCENDING), ('site', DESCENDING),
                    ('ts', DESCENDING)]
         for index in indexes:
             create_indexes(self.coll, [index])
         dasmapping   = DASMapping(self.dasconfig)
         service_name = self.config.get('name', 'combined')
         service_api  = self.config.get('api', 'dataset4site_release')
         mapping      = dasmapping.servicemap(service_name)
         self.urls    = mapping[service_api]['services']
         self.expire  = mapping[service_api]['expire']
         services     = self.dasconfig['services']
         if  not self.wthr:
             # Worker thread which update dbs/phedex DB
             self.wthr = start_new_thread('dbs_phedex_worker', worker, \
                  (self.urls, which_dbs, self.uri, \
                  self.dbname, self.collname, self.expire))
         msg = "### DBSPhedexService:init started"
         print(msg)
     except Exception as exc:
         print("### Fail DBSPhedexService:init\n", str(exc))
         self.urls       = None
         self.expire     = 60
         self.wthr       = None
Beispiel #6
0
 def add_api(self, system, query, api, args):
     """
     Add API info to analytics DB. 
     Here args is a dict of API parameters.
     """
     orig_query = query
     if  isinstance(query, dict):
         query = encode_mongo_query(query)
     msg = '(%s, %s, %s, %s)' % (system, query, api, args)
     self.logger.debug(msg)
     # find query record
     qhash = genkey(query)
     record = self.col.find_one({'qhash':qhash}, fields=['dasquery'])
     if  not record:
         self.add_query("", orig_query)
     # find api record
     record = self.col.find_one({'qhash':qhash, 'system':system,
                     'api.name':api, 'api.params':args}) 
     apidict = dict(name=api, params=args)
     if  record:
         self.col.update({'_id':record['_id']}, {'$inc':{'counter':1}})
     else:
         record = dict(system=system, api=apidict, qhash=qhash, counter=1)
         self.col.insert(record)
     index = [('system', DESCENDING), ('dasquery', DESCENDING),
              ('api.name', DESCENDING), ('qhash', DESCENDING) ]
     create_indexes(self.col, index)
Beispiel #7
0
    def add_query(self, query, mongoquery):
        """
        Add DAS-QL/MongoDB-QL queries into analytics.
        
        A unique record is contained for each (qhash, dhash) pair.
        For each an array of call-times is contained.
        """
        if  isinstance(mongoquery, dict):
            mongoquery = encode_mongo_query(mongoquery)
        msg = 'query=%s, mongoquery=%s' % (query, mongoquery)
        self.logger.debug(msg)
        dhash = genkey(query)
        qhash = genkey(mongoquery)

        now = time.time()

        existing = self.col.find_one({'qhash': qhash, 'dhash': dhash})
        if existing:
            # check if times contains very old timestamps
            rec = self.col.find({'_id': ObjectId(existing['_id']), 
                                 'times':{'$lt' : now - self.history}})
            if  rec:
                self.col.update({'_id': ObjectId(existing['_id'])},
                    {'$pull': {'times': {'$lt' : now - self.history}}})
            # update times array with new timestamp
            self.col.update({'_id': ObjectId(existing['_id'])},
                            {'$push': {'times': now}})
        else:
            record = dict(query=query, mongoquery=mongoquery,
                        qhash=qhash, dhash=dhash, times=[now])
            self.col.insert(record)

        index = [('qhash', DESCENDING),
                 ('dhash', DESCENDING)]
        create_indexes(self.col, index)
Beispiel #8
0
    def __new__(cls, config):
        """
        creates a new instance of the class and cache it or return an existing
         instance if one exists (only when the params match).

        only the last instance is cached, but this simplifies the implementation
        as the param 'config' might be a complex unhashable object.
        """
        # check if we can reuse an existing instance
        if cls.__cached_inst and cls.__cached_params == config:
            if  config['verbose']:
                print("DASMapping::__new__: returning a cached instance")
            return cls.__cached_inst

        # otherwise create and initialize a new instance
        if  config['verbose']:
            print("DASMapping::__new__: creating a new instance")
        self = object.__new__(cls)

        self.verbose  = config['verbose']
        self.logger   = PrintManager('DASMapping', self.verbose)
        self.services = config['services']
        self.dburi    = config['mongodb']['dburi']
        self.dbname   = config['mappingdb']['dbname']
        self.colname  = config['mappingdb']['collname']
        self.map_test = config.get('map_test', True)
        self.main_dbs = config['das'].get('main_dbs', 'dbs3')
        self.dbsinsts = config['das'].get('dbs_instances', [])

        msg = "%s@%s" % (self.dburi, self.dbname)
        self.logger.info(msg)

        self.das_son_manipulator = DAS_SONManipulator()
        index = [('type', DESCENDING),\
                 ('system', DESCENDING),\
                 ('urn', DESCENDING),\
                 ('das_map.das_key', DESCENDING),\
                 ('das_map.rec_key', DESCENDING),\
                 ('das_map.api_arg', DESCENDING),\
                 ]
        create_indexes(self.col, index)

        self.daskeyscache = {}         # to be filled at run time
        self.systems = []              # to be filled at run time
        self.dasmapscache = {}         # to be filled at run time
        self.keymap = {}               # to be filled at run time
        self.presentationcache = {}    # to be filled at run time
        self.reverse_presentation = {} # to be filled at run time
        self.notationcache = {}        # to be filled at run time
        self.diffkeycache = {}         # to be filled at run time
        self.apicache = {}             # to be filled at run time
        self.dbs_global_url = None     # to be determined at run time
        self.dbs_inst_names = None     # to be determined at run time
        self.load_maps()

        # cache the instance and return it
        DASMapping.__cached_inst = self
        DASMapping.__cached_params = config
        return self
Beispiel #9
0
 def add_mapreduce(self, name, fmap, freduce):
     """
     Add mapreduce record and assign it to given name.
     """
     print("Add %s map/reduce function" % name)
     exists = find_one(self.mapreduce, {'name':name})
     if  exists:
         raise Exception('Map/reduce functions for %s already exists' % name)
     self.mapreduce.insert(dict(name=name, map=fmap, reduce=freduce))
     create_indexes(self.mapreduce, [('name', DESCENDING)])
Beispiel #10
0
 def create_db(self):
     """
     Create db collection
     """
     conn = db_connection(self.dburi)
     dbn = conn[self.dbname]
     if self.colname not in dbn.collection_names():
         dbn.create_collection(self.colname, capped=True, size=self.sizecap)
     col = dbn[self.colname]
     index_list = [('qhash', DESCENDING)]
     create_indexes(col, index_list)
Beispiel #11
0
 def add_mapreduce(self, name, fmap, freduce):
     """
     Add mapreduce record and assign it to given name.
     """
     print("Add %s map/reduce function" % name)
     exists = find_one(self.mapreduce, {'name': name})
     if exists:
         raise Exception('Map/reduce functions for %s already exists' %
                         name)
     self.mapreduce.insert(dict(name=name, map=fmap, reduce=freduce))
     create_indexes(self.mapreduce, [('name', DESCENDING)])
Beispiel #12
0
 def init(self):
     """Takes care of MongoDB connection"""
     try:
         conn = db_connection(self.uri)
         self.coll = conn[self.dbname][self.collname]
         indexes = [('name', DESCENDING), ('site', DESCENDING), 
                    ('timestamp', DESCENDING)]
         for index in indexes:
             create_indexes(self.coll, [index])
     except Exception, _exp:
         self.coll = None
Beispiel #13
0
 def create_db(self):
     """
     Create db collection
     """
     conn = db_connection(self.dburi)
     dbn  = conn[self.dbname]
     if  self.colname not in dbn.collection_names():
         dbn.create_collection(self.colname, capped=True, size=self.sizecap)
     col = dbn[self.colname]
     index_list = [('qhash', DESCENDING)]
     create_indexes(col, index_list)
Beispiel #14
0
    def init(self):
        """
        Init db connection and check that it is alive
        """
        try:
            indexes = [('value', ASCENDING), ('ts', ASCENDING)]
            create_indexes(self.col, indexes)

            if not KEEP_EXISTING_RECORDS_ON_RESTART:
                self.col.remove()
        except Exception as exc:
            print(dastimestamp(), exc)
Beispiel #15
0
    def init(self):
        """
        Init db connection and check that it is alive
        """
        try:
            indexes = [('dataset', ASCENDING), ('ts', ASCENDING)]
            create_indexes(self.col, indexes)

            if not self.preserve_on_restart:
                self.col.delete_many({})
        except Exception as _exp:
            pass
Beispiel #16
0
    def init(self):
        """
        Init db connection and check that it is alive
        """
        try:
            indexes = [("dataset", ASCENDING), ("ts", ASCENDING)]
            create_indexes(self.col, indexes)

            if not self.preserve_on_restart:
                self.col.remove()
        except Exception as _exp:
            pass
Beispiel #17
0
    def init(self):
        """
        Init db connection and check that it is alive
        """
        try:
            indexes = [('value', ASCENDING), ('ts', ASCENDING)]
            create_indexes(self.col, indexes)

            if not KEEP_EXISTING_RECORDS_ON_RESTART:
                self.col.remove()
        except Exception as exc:
            print(dastimestamp(), exc)
Beispiel #18
0
 def init(self):
     """
     Init db connection and check that it is alive
     """
     try:
         conn = db_connection(self.dburi)
         self.col = conn[self.dbname][self.dbcoll]
         indexes = [('dataset', ASCENDING), ('ts', ASCENDING)]
         create_indexes(self.col, indexes)
         self.col.remove()
     except Exception as _exp:
         self.col = None
     if  not is_db_alive(self.dburi):
         self.col = None
Beispiel #19
0
    def __init__(self, config):
        self.verbose  = config['verbose']
        self.logger   = PrintManager('DASKeyLearning', self.verbose)
        self.services = config['services']
        self.dburi    = config['mongodb']['dburi']
        self.dbname   = config['keylearningdb']['dbname']
        self.colname  = config['keylearningdb']['collname']

        self.mapping  = config['dasmapping']

        msg = "%s@%s" % (self.dburi, self.dbname)
        self.logger.info(msg)

        self.das_son_manipulator = DAS_SONManipulator()
        index_list = [('system', ASCENDING), ('urn', ASCENDING), \
                ('members', ASCENDING), ('stems', ASCENDING)]
        create_indexes(self.col, index_list)
Beispiel #20
0
    def __init__(self, config):
        self.verbose = config['verbose']
        self.logger = PrintManager('DASKeyLearning', self.verbose)
        self.services = config['services']
        self.dburi = config['mongodb']['dburi']
        self.dbname = config['keylearningdb']['dbname']
        self.colname = config['keylearningdb']['collname']

        self.mapping = config['dasmapping']

        msg = "%s@%s" % (self.dburi, self.dbname)
        self.logger.info(msg)

        self.das_son_manipulator = DAS_SONManipulator()
        index_list = [('system', ASCENDING), ('urn', ASCENDING), \
                ('members', ASCENDING), ('stems', ASCENDING)]
        create_indexes(self.col, index_list)
Beispiel #21
0
    def add(self, record):
        """
        Add new record into mapping DB. Example of URI record

        .. doctest::

            {
             system:dbs, 
             urn : listBlocks, 
             url : "http://a.b.com/api"
             params : [
                 {"apiversion":1_2_2, test:"*"}
             ]
             daskeys: [
                 {"key" : "block", "map":"block.name", "pattern":""}
             ]
             das2api: [
                 {"das_key":"site", "api_param":"se", 
                       "pattern":"re.compile('^T[0-3]_')"}
             ]
            }

        Example of notation record:

        .. doctest::

             notations: [
                 {"notation" : "storage_element_name", "map":"site", "api": ""},
             ]
        """
        msg = "record=%s" % record
        self.logger.debug(msg)
        self.col.insert(record)
        index = None
        if record.has_key("urn"):
            index = [("system", DESCENDING), ("daskeys", DESCENDING), ("urn", DESCENDING)]
        elif record.has_key("notations"):
            index = [("system", DESCENDING), ("notations.api_param", DESCENDING)]
        elif record.has_key("presentation"):
            index = []
        else:
            msg = "Invalid record %s" % record
            raise Exception(msg)
        if index:
            create_indexes(self.col, index)
Beispiel #22
0
 def insert_apicall(self, system, query, url, api, api_params, expire):
     """
     Remove obsolete apicall records and
     insert into Analytics DB provided information about API call.
     Moved from AbstractService.
     
     Updated so that we do not have multiple records when performing
     forced updates (ie, the old record is not yet expired) - now
     look for an existing record with the same parameters (I'm hoping
     the fact that some of the variables are indexed will make this
     fast even though not all are), and if it exists just update
     the expiry. Otherwise insert a new record.
     """
     msg = 'query=%s, url=%s,' % (query, url)
     msg += 'api=%s, args=%s, expire=%s' % (api, api_params, expire)
     self.logger.debug(msg)
     expire = expire_timestamp(expire)
     query = encode_mongo_query(query)
     qhash = genkey(query)
     self.remove_expired()
     existing = self.col.find_one({'apicall.system':     system,
                                   'apicall.url':        url,
                                   'apicall.api':        api,
                                   'apicall.api_params': api_params,
                                   'apicall.qhash':      qhash})
     if existing:
         self.logger.debug("updating")
         self.col.update({'_id': existing['_id']},
                         {'$set':{'apicall.expire': expire}})
     else:
         self.col.insert({'apicall':{'api_params':   api_params,
                                     'url':          url,
                                     'api':          api,
                                     'system':       system,
                                     'expire':       expire,
                                     'qhash':        qhash}})
     index_list = [('apicall.url', DESCENDING),
                   ('apicall.api', DESCENDING),
                   ('qhash', DESCENDING)]
     create_indexes(self.col, index_list)
Beispiel #23
0
    def __init__(self, config):
        self.emptyset_expire = expire_timestamp(\
            config['das'].get('emptyset_expire', 5))
        self.dburi   = config['mongodb']['dburi']
        self.cache_size = config['mongodb']['bulkupdate_size']
        self.dbname  = config['dasdb']['dbname']
        self.verbose = config['verbose']
        self.logger  = PrintManager('DASMongocache', self.verbose)
        self.mapping = config['dasmapping']

        self.conn    = db_connection(self.dburi)
        self.mdb     = self.conn[self.dbname]
        self.col     = self.mdb[config['dasdb']['cachecollection']]
        self.mrcol   = self.mdb[config['dasdb']['mrcollection']]
        self.merge   = self.mdb[config['dasdb']['mergecollection']]
        self.gfs     = db_gridfs(self.dburi)

        self.logdb   = DASLogdb(config)

        self.das_internal_keys = ['das_id', 'das', 'cache_id', 'qhash']

        msg = "%s@%s" % (self.dburi, self.dbname)
        self.logger.info(msg)

        self.add_manipulator()

        # ensure that we have the following indexes
        index_list = [('das.expire', ASCENDING), ('das_id', ASCENDING),
                      ('das.system', ASCENDING),
                      ('qhash', DESCENDING),
                      ('das.empty_record', ASCENDING)]
        create_indexes(self.col, index_list)
        index_list = [('das.expire', ASCENDING), ('das_id', ASCENDING),
                      ('qhash', DESCENDING),
                      ('das.empty_record', ASCENDING), ('das.ts', ASCENDING)]
        create_indexes(self.merge, index_list)
Beispiel #24
0
    def __init__(self, config):
        self.config = config
        self.emptyset_expire = \
                expire_timestamp(config['das'].get('emptyset_expire', 5))
        self.dburi = config['mongodb']['dburi']
        self.cache_size = config['mongodb']['bulkupdate_size']
        self.dbname = config['dasdb']['dbname']
        self.verbose = config['verbose']
        self.logger = PrintManager('DASMongocache', self.verbose)
        self.mapping = config['dasmapping']
        self.logging = config['dasdb'].get('logging', False)
        self.rec_ttl = config['dasdb'].get('record_ttl', 24 * 60 * 60)
        self.del_ttl = config['dasdb'].get('delta_ttl', 60)
        self.cleanup_del_ttl = config['dasdb'].get('cleanup_delta_ttl', 3600)
        self.retry = config['dasdb'].get('retry', 3)
        self.das_son_manipulator = DAS_SONManipulator()

        # Initialize MongoDB connection
        self.col_ = self.config['dasdb']['cachecollection']
        self.mrcol_ = self.config['dasdb']['mrcollection']
        self.merge_ = self.config['dasdb']['mergecollection']
        self.gfs = db_gridfs(self.dburi)

        msg = "%s@%s" % (self.dburi, self.dbname)
        self.logger.info(msg)

        # ensure that we have the following indexes
        common_idx = [
            ('file.name', DESCENDING),
            ('dataset.name', DESCENDING),
            ('block.name', DESCENDING),
            ('run.run_number', DESCENDING),
        ]
        index_list = [('das.expire', ASCENDING), ('das_id', ASCENDING),
                      ('das.system', ASCENDING), ('qhash', DESCENDING),
                      ('das.record', ASCENDING)]
        create_indexes(self.col, index_list + common_idx)
        index_list = [('das.expire', ASCENDING), ('das_id', ASCENDING),
                      ('qhash', DESCENDING), ('das.record', ASCENDING),
                      ('das.ts', ASCENDING)]
        create_indexes(self.merge, index_list)
        # NOTE: I found that creating index in merge collection leads to
        # MongoDB error when records contains multiple arrays on indexed
        # keys. For example, when we query file,run,lumi both file and run
        # are arrays in MongoDB. In this case the final sort in MongoDB
        # bark with the following message:
        # cannot sort with keys that are parallel arrays
        # it looks like that there is no fix for that yet
        # see
        # http://stackoverflow.com/questions/6516725/how-do-i-index-two-arrays-in-mongodb
        # therefore I temporary disabled create_indexes call on merge
        # collection which was used to have index to ease final sort,
        # especially in a case when a lot of records correspond to inital
        # query, e.g. file records.
        # On another hand, the most common use case where sort fails is
        # getting file records, and I can add one compound key to ease sort
        # but I can't add another compound key on array field, e.g. run
        common_idx = [[('qhash', DESCENDING), ('file.name', DESCENDING)]]
        create_indexes(self.merge, index_list + common_idx)

        # thread which clean-up DAS collections
        thname = 'mongocache_cleanup'
        cols = [
            config['dasdb']['cachecollection'],
            config['dasdb']['mrcollection'], config['dasdb']['mergecollection']
        ]
Beispiel #25
0
    def __init__(self, config):
        self.config  = config
        self.emptyset_expire = \
                expire_timestamp(config['das'].get('emptyset_expire', 5))
        self.dburi   = config['mongodb']['dburi']
        self.cache_size = config['mongodb']['bulkupdate_size']
        self.dbname  = config['dasdb']['dbname']
        self.verbose = config['verbose']
        self.logger  = PrintManager('DASMongocache', self.verbose)
        self.mapping = config['dasmapping']
        self.logging = config['dasdb'].get('logging', False)
        self.rec_ttl = config['dasdb'].get('record_ttl', 24*60*60)
        self.del_ttl = config['dasdb'].get('delta_ttl', 60)
        self.cleanup_del_ttl = config['dasdb'].get('cleanup_delta_ttl', 3600)
        self.retry   = config['dasdb'].get('retry', 3)
        self.das_son_manipulator = DAS_SONManipulator()

        # Initialize MongoDB connection
        self.col_    = self.config['dasdb']['cachecollection']
        self.mrcol_  = self.config['dasdb']['mrcollection']
        self.merge_  = self.config['dasdb']['mergecollection']
        self.gfs     = db_gridfs(self.dburi)

        msg = "%s@%s" % (self.dburi, self.dbname)
        self.logger.info(msg)

        # ensure that we have the following indexes
        common_idx = [
                      ('file.name', DESCENDING),
                      ('dataset.name', DESCENDING),
                      ('block.name', DESCENDING),
                      ('run.run_number', DESCENDING),
                      ]
        index_list = [('das.expire', ASCENDING), ('das_id', ASCENDING),
                      ('das.system', ASCENDING),
                      ('qhash', DESCENDING),
                      ('das.record', ASCENDING)]
        create_indexes(self.col, index_list + common_idx)
        index_list = [('das.expire', ASCENDING), ('das_id', ASCENDING),
                      ('qhash', DESCENDING),
                      ('das.record', ASCENDING),
                      ('das.ts', ASCENDING)]
        create_indexes(self.merge, index_list)
        # NOTE: I found that creating index in merge collection leads to
        # MongoDB error when records contains multiple arrays on indexed
        # keys. For example, when we query file,run,lumi both file and run
        # are arrays in MongoDB. In this case the final sort in MongoDB
        # bark with the following message:
        # cannot sort with keys that are parallel arrays
        # it looks like that there is no fix for that yet
        # see
        # http://stackoverflow.com/questions/6516725/how-do-i-index-two-arrays-in-mongodb
        # therefore I temporary disabled create_indexes call on merge
        # collection which was used to have index to ease final sort,
        # especially in a case when a lot of records correspond to inital
        # query, e.g. file records.
        # On another hand, the most common use case where sort fails is
        # getting file records, and I can add one compound key to ease sort
        # but I can't add another compound key on array field, e.g. run
        common_idx = [[('qhash', DESCENDING), ('file.name', DESCENDING)]]
        create_indexes(self.merge, index_list + common_idx)

        # thread which clean-up DAS collections
        thname = 'mongocache_cleanup'
        cols   = [config['dasdb']['cachecollection'],
                  config['dasdb']['mrcollection'],
                  config['dasdb']['mergecollection']]