Exemple #1
0
 def init(self):
     """Init DAS web server, connect to DAS Core"""
     try:
         self.logcol     = DASLogdb(self.dasconfig)
         self.reqmgr     = RequestManager(self.dburi, lifetime=self.lifetime)
         self.dasmgr     = DASCore(engine=self.engine)
         self.repmgr     = CMSRepresentation(self.dasconfig, self.dasmgr)
         self.daskeys    = self.dasmgr.das_keys()
         self.gfs        = db_gridfs(self.dburi)
         self.daskeys.sort()
         self.dasmapping = self.dasmgr.mapping
         self.dasmapping.init_presentationcache()
         self.colors = {}
         for system in self.dasmgr.systems:
             self.colors[system] = gen_color(system)
         self.sitedbmgr   = SiteDBService(self.dasconfig)
     except Exception as exc:
         print_exc(exc)
         self.dasmgr = None
         self.daskeys = []
         self.colors = {}
         return
     # Start Onhold_request daemon
     if  self.dasconfig['web_server'].get('onhold_daemon', False):
         self.process_requests_onhold()
Exemple #2
0
 def get_status(self, dasquery):
     """
     Look-up status of provided query in a cache.
     Return status of the query request and its hash.
     """
     status = None
     error = None
     reason = None
     if dasquery and "fields" in dasquery.mongo_query:
         fields = dasquery.mongo_query["fields"]
         if fields and isinstance(fields, list) and "queries" in fields:
             return "ok", error, reason
     record = self.rawcache.find(dasquery)
     error, reason = self.rawcache.is_error_in_records(dasquery)
     try:
         if record and "das" in record and "status" in record["das"]:
             status = record["das"]["status"]
             if not error:
                 error = record["das"].get("error", error)
             if not reason:
                 reason = record["das"].get("reason", reason)
             return status, error, reason
     except Exception as exc:
         print_exc(exc)
         status = error = reason = None
         self.rawcache.remove_from_cache(dasquery)
     return status, error, reason
Exemple #3
0
def quote(data):
    """
    Sanitize the data using cgi.escape.
    """
    if  isinstance(data, int) or isinstance(data, float):
        res = data
    elif  isinstance(data, dict):
        res = data
    elif  isinstance(data, list):
        res = data
    elif  isinstance(data, long) or isinstance(data, int) or\
          isinstance(data, float):
        res = data
    elif  isinstance(data, ObjectId):
        res = str(data)
    else:
        try:
            if  data:
                res = cgi.escape(data, quote=True)
            else:
                res = ""
        except Exception as exc:
            print_exc(exc)
            print("Unable to cgi.escape(%s, quote=True)" % data)
            res = ""
    return res
Exemple #4
0
 def get_status(self, dasquery):
     """
     Look-up status of provided query in a cache.
     Return status of the query request and its hash.
     """
     status = None
     error  = None
     reason = None
     for col in ['merge', 'cache']:
         self.rawcache.remove_expired(dasquery, col)
     if  dasquery and 'fields' in dasquery.mongo_query:
         fields = dasquery.mongo_query['fields']
         if  fields and isinstance(fields, list) and 'queries' in fields:
             return 'ok', error, reason
     record = self.rawcache.find(dasquery)
     error, reason = self.rawcache.is_error_in_records(dasquery)
     try:
         if  record and 'das' in record and 'status' in record['das']:
             status = record['das']['status']
             if  not error:
                 error = record['das'].get('error', error)
             if  not reason:
                 reason = record['das'].get('reason', reason)
             return status, error, reason
     except Exception as exc:
         print_exc(exc)
         status = error = reason = None
         self.rawcache.remove_from_cache(dasquery)
     return status, error, reason
Exemple #5
0
    def apicall(self, dasquery, url, api, args, dformat, expire):
        """
        Data service api method, can be defined by data-service class.
        It parse input query and invoke appropriate data-service API
        call. All results are stored into the DAS cache along with
        api call inserted into Analytics DB.

        We invoke explicitly close call for our datastream instead
        of using context manager since this method as well as
        getdata/parser can be overwritten by child classes.
        """
        datastream  = None
        try:
            args    = self.inspect_params(api, args)
            time0   = time.time()
            headers = make_headers(dformat)
            datastream, expire = self.getdata(url, args, expire, headers)
            self.logger.info("%s expire %s" % (api, expire))
            rawrows = self.parser(dasquery, dformat, datastream, api)
            dasrows = self.translator(api, rawrows)
            ctime   = time.time() - time0
            self.write_to_cache(dasquery, expire, url, api, args,
                    dasrows, ctime)
        except Exception as exc:
            msg  = 'Fail to process: url=%s, api=%s, args=%s' \
                    % (url, api, args)
            print(msg)
            print_exc(exc)
        close(datastream)
Exemple #6
0
def create_indexes(coll, index_list):
    """
    Create indexes for provided collection/index_list and
    ensure that they are in place
    """
    index_info = coll.index_information().values()
    for pair in index_list:
        index_exists = 0
        for item in index_info:
            if  item['key'] == [pair]:
                index_exists = 1
        if  not index_exists:
            try:
                if  isinstance(pair, list):
                    coll.create_index(pair)
                else:
                    coll.create_index([pair])
            except Exception as exp:
                print_exc(exp)
        try:
            spec = pair
            if  not isinstance(pair, list):
                spec = [pair]
            coll.create_index(spec)
        except Exception as exp:
            print_exc(exp)
Exemple #7
0
 def check_pid(self, pid, ahash):
     """
     Check status of given pid and return appropriate page content.
     This is a server callback function for ajaxCheckPid, see
     js/ajax_utils.js
     """
     cherrypy.response.headers['Cache-Control'] = 'no-cache'
     cherrypy.response.headers['Pragma'] = 'no-cache'
     img  = '<img src="%s/images/loading.gif" alt="loading"/>' % self.base
     page = ''
     try:
         if  self.taskmgr.is_alive(pid):
             page = img + " processing PID=%s" % pid
         else:
             kwargs = self.reqmgr.get(pid)
             if  kwargs and kwargs.has_key('dasquery'):
                 del kwargs['dasquery']
             # if no kwargs (another request delete it)
             # use logging DB to look-up user request via ahash
             if  not kwargs:
                 spec = {'ahash':ahash}
                 skey = [('ts', DESCENDING)]
                 res  = [r for r in self.logcol.find(spec).sort(skey)]
                 kwargs = res[0]['args']
                 self.adjust_input(kwargs)
             self.reqmgr.remove(pid)
             page = self.get_page_content(kwargs)
     except Exception as err:
         msg = 'check_pid fails for pid=%s' % pid
         print dastimestamp('DAS WEB ERROR '), msg
         print_exc(err)
         self.reqmgr.remove(pid)
         self.taskmgr.remove(pid)
         return self.error(gen_error_msg({'pid':pid}), wrap=False)
     return page
Exemple #8
0
def onhold_worker(dasmgr, taskmgr, reqmgr, limit):
    "Worker daemon to process onhold requests"
    if  not dasmgr or not taskmgr or not reqmgr:
        return
    print "### START onhold_worker", time.time()
    jobs = []
    while True:
        try:
            while jobs:
                try:
                    reqmgr.remove(jobs.pop(0))
                except:
                    break
            nrequests = reqmgr.size()
            for rec in reqmgr.items_onhold():
                dasquery  = DASQuery(rec['uinput'])
                addr      = rec['ip']
                kwargs    = {'input':rec['uinput']}
                if  (nrequests - taskmgr.nworkers()) < limit:
                    _evt, pid = taskmgr.spawn(\
                        dasmgr.call, dasquery, \
                            addr, pid=dasquery.qhash)
                    jobs.append(pid)
                    reqmgr.remove_onhold(str(rec['_id']))
        except AutoReconnect:
            pass
        except Exception as err:
            print_exc(err)
            pass
        time.sleep(5)
    print "### END onhold_worker", time.time()
Exemple #9
0
 def getdata_helper(self, url, params, expire, headers=None, post=None):
     "Helper function to get data from SiteDB or local cache"
     cname = url.split('/')[-1].replace('-', '_')
     col   = self.localcache.conn[self.name][cname]
     local = col.find_one({'expire':{'$gt':expire_timestamp(time.time())}})
     data  = None
     if  local:
         msg = 'SiteDBService reads from %s.%s' % (self.name, cname)
         self.logger.info(msg)
         try: # get data from local cache
             data = [r for r in col.find() if not r.has_key('expire')][0]
             del data['_id']
         except Exception as exc:
             print_exc(exc)
             data = {}
     if  not data or not local:
         headers = {'Accept':'application/json'}
         datastream, expire = getdata(\
                 url, params, headers, expire, post,
                 self.error_expire, self.verbose, self.ckey, self.cert,
                 system=self.name)
         try: # read data and write it to local cache
             data = json.load(datastream)
             datastream.close()
             col.remove()
             col.insert(data)
             col.insert({'expire':expire_timestamp(expire)})
         except Exception as exc:
             print_exc(exc)
     return data, expire
Exemple #10
0
 def get_records(self, col, spec, fields, skeys, idx, limit, unique=False):
     "Generator to get records from MongoDB. It correctly applies"
     if  fields:
         for key in fields: # ensure that fields keys will be presented
             if  key not in self.das_internal_keys and \
                 not spec.has_key(key):
                 spec.update({key: {'$exists':True}})
     try:
         res = col.find(spec=spec, fields=fields)
         if  skeys:
             res = res.sort(skeys)
         if  not unique:
             if  idx:
                 res = res.skip(idx)
             if  limit:
                 res = res.limit(limit)
     except Exception as exp:
         print_exc(exp)
         row = {'exception': str(exp)}
         res = []
         yield row
     if  unique:
         if  limit:
             gen = itertools.islice(unique_filter(res), idx, idx+limit)
         else:
             gen = unique_filter(res)
         for row in gen:
             yield row
     else:
         for row in res:
             yield row
Exemple #11
0
 def dbs_daemon(self, config):
     """Start DBS daemon if it is requested via DAS configuration"""
     try:
         main_dbs_url = self.dasconfig['dbs']['dbs_global_url']
         self.dbs_urls = []
         for inst in self.dbs_instances:
             self.dbs_urls.append(\
                     main_dbs_url.replace(self.dbs_global, inst))
         interval  = config.get('dbs_daemon_interval', 3600)
         dbsexpire = config.get('dbs_daemon_expire', 3600)
         self.dbsmgr = {} # dbs_urls vs dbs_daemons
         if  self.dataset_daemon:
             for dbs_url in self.dbs_urls:
                 dbsmgr = DBSDaemon(dbs_url, self.dburi, expire=dbsexpire)
                 self.dbsmgr[dbs_url] = dbsmgr
                 def dbs_updater(_dbsmgr, interval):
                     """DBS updater daemon"""
                     while True:
                         try:
                             _dbsmgr.update()
                         except:
                             pass
                         time.sleep(interval)
                 print "Start DBSDaemon for %s" % dbs_url
                 thread.start_new_thread(dbs_updater, (dbsmgr, interval, ))
     except Exception as exc:
         print_exc(exc)
Exemple #12
0
    def apicall(self, dasquery, url, api, args, dformat, expire):
        """
        Data service api method, can be defined by data-service class.
        It parse input query and invoke appropriate data-service API
        call. All results are stored into the DAS cache along with
        api call inserted into Analytics DB.

        We invoke explicitly close call for our datastream instead
        of using context manager since this method as well as
        getdata/parser can be overwritten by child classes.
        """
        datastream = None
        try:
            args = self.inspect_params(api, args)
            time0 = time.time()
            headers = make_headers(dformat)
            datastream, expire = self.getdata(url, args, expire, headers)
            self.logger.info("%s expire %s" % (api, expire))
            rawrows = self.parser(dasquery, dformat, datastream, api)
            dasrows = self.translator(api, rawrows)
            ctime = time.time() - time0
            self.write_to_cache(dasquery, expire, url, api, args, dasrows,
                                ctime)
        except Exception as exc:
            msg  = 'Fail to process: url=%s, api=%s, args=%s' \
                    % (url, api, args)
            print(msg)
            print_exc(exc)
        close(datastream)
Exemple #13
0
    def get_page_content(self, kwargs, complete_msg=True):
        """Retrieve page content for provided set of parameters"""
        html_views = ['list', 'table']
        page = ''
        try:
            view = kwargs.get('view', 'list')
            if  view == 'plain':
                if  'limit' in kwargs:
                    del kwargs['limit']
            if  view in ['json', 'xml', 'plain'] and complete_msg:
                page = 'Request completed. Reload the page ...'
            else:
                head, data = self.get_data(kwargs)

                allowed_views = ['list', 'table', 'plain', 'xml', 'json']
                if view not in allowed_views:
                    raise

                func = getattr(self, view + "view")
                page = func(head, data)
        except HTTPError as _err:
            raise
        except Exception as exc:
            print_exc(exc)
            msg  = gen_error_msg(kwargs)
            page = self.templatepage('das_error', msg=msg)
        return page
Exemple #14
0
    def stop(self):
        """
        Stop the daemon
        """
        # Get the pid from the pidfile
        try:
            pidf = file(self.pidfile, 'r')
            pid = int(pidf.read().strip())
            pidf.close()
        except IOError:
            pid = None

        if not pid:
            message = "pidfile %s does not exist. Daemon not running?\n"
            sys.stderr.write(message % self.pidfile)
            return # not an error in a restart

        # Try killing the daemon process
        try:
            while 1:
                os.kill(pid, SIGTERM)
                time.sleep(0.1)
        except OSError as err:
            if err.find("No such process") > 0:
                if os.path.exists(self.pidfile):
                    os.remove(self.pidfile)
            else:
                print_exc(err)
                sys.exit(1)
Exemple #15
0
 def datasets_dbs(self):
     """
     Retrieve a list of DBS datasets (DBS2)
     """
     query = "find dataset,dataset.status"
     params = {"api": "executeQuery", "apiversion": "DBS_2_0_9", "query": query}
     encoded_data = urllib.urlencode(params, doseq=True)
     url = self.dbs_url + "?" + encoded_data
     req = urllib2.Request(url)
     try:
         stream = urllib2.urlopen(req)
     except urllib2.HTTPError:
         msg = "Fail to contact %s" % url
         print dastimestamp("DAS ERROR"), msg
         raise Exception(msg)
     except Exception as exc:
         print_exc(exc)
         msg = "Fail to contact %s" % url
         print dastimestamp("DAS ERROR"), msg
         raise Exception(msg)
     gen = qlxml_parser(stream, "dataset")
     for row in gen:
         dataset = row["dataset"]["dataset"]
         rec = {"dataset": dataset}
         if self.write_hash:
             storage_query = {
                 "fields": ["dataset"],
                 "spec": [{"key": "dataset.name", "value": '"%s"' % dataset}],
                 "instance": self.dbcoll,
             }
             rec.update({"qhash": genkey(storage_query)})
         if row["dataset"]["dataset.status"] == "VALID":
             yield rec
     stream.close()
Exemple #16
0
    def stop(self):
        """
        Stop the daemon
        """
        # Get the pid from the pidfile
        try:
            pidf = file(self.pidfile, 'r')
            pid = int(pidf.read().strip())
            pidf.close()
        except IOError:
            pid = None

        if not pid:
            message = "pidfile %s does not exist. Daemon not running?\n"
            sys.stderr.write(message % self.pidfile)
            return # not an error in a restart

        # Try killing the daemon process
        try:
            while 1:
                os.kill(pid, SIGTERM)
                time.sleep(0.1)
        except OSError as err:
            if err.find("No such process") > 0:
                if os.path.exists(self.pidfile):
                    os.remove(self.pidfile)
            else:
                print_exc(err)
                sys.exit(1)
Exemple #17
0
 def getdata_helper(self, url, params, expire, headers=None, post=None):
     "Helper function to get data from SiteDB or local cache"
     cname = url.split('/')[-1].replace('-', '_')
     conn  = db_connection(self.dburi)
     col   = conn[self.name][cname]
     local = find_one(col, {'expire':{'$gt':expire_timestamp(time.time())}})
     data  = None
     if  local:
         msg = 'SiteDBService reads from %s.%s' % (self.name, cname)
         self.logger.info(msg)
         try: # get data from local cache
             data = [r for r in col.find() if 'expire' not in r][0]
             del data['_id']
         except Exception as exc:
             print_exc(exc)
             data = {}
     if  not data or not local:
         headers = {'Accept':'application/json'}
         datastream, expire = getdata(\
                 url, params, headers, expire, post,
                 self.error_expire, self.verbose, self.ckey, self.cert,
                 system=self.name)
         try: # read data and write it to local cache
             data = json.load(datastream)
             datastream.close()
             col.remove()
             col.insert(data)
             col.insert({'expire':expire_timestamp(expire)})
         except Exception as exc:
             print_exc(exc)
     return data, expire
Exemple #18
0
 def fltpage(self, row):
     """Prepare filter snippet for a given query"""
     rowkeys = []
     page = ''
     if  row and row.has_key('das') and row['das'].has_key('primary_key'):
         pkey = row['das']['primary_key']
         if  pkey and (isinstance(pkey, str) or isinstance(pkey, unicode)):
             try:
                 mkey = pkey.split('.')[0]
                 if  isinstance(row[mkey], list):
                     # take first five or less entries from the list to cover
                     # possible aggregated records and extract row keys
                     lmax    = len(row[mkey]) if len(row[mkey]) < 5 else 5
                     sublist = [row[mkey][i] for i in range(0, lmax)]
                     ndict   = DotDict({mkey:sublist})
                     rowkeys = [k for k in ndict.get_keys(mkey)]
                 else:
                     rowkeys = [k for k in DotDict(row).get_keys(mkey)]
                 rowkeys.sort()
                 rowkeys += ['das.conflict']
                 dflt = das_filters() + das_aggregators()
                 dflt.remove('unique')
                 page = self.templatepage('das_filters', \
                         filters=dflt, das_keys=rowkeys)
             except Exception as exc:
                 msg = "Fail to pkey.split('.') for pkey=%s" % pkey
                 print msg
                 print_exc(exc)
                 pass
     return page
Exemple #19
0
    def __init__(self, name, config):
        self.name = name
        try:
            self.verbose      = config['verbose']
            title             = 'DASAbstactService_%s' % self.name
            self.logger       = PrintManager(title, self.verbose)
            self.dasmapping   = config['dasmapping']
            self.analytics    = config['dasanalytics']
            self.write2cache  = config.get('write_cache', True)
            self.multitask    = config['das'].get('multitask', True)
            self.error_expire = config['das'].get('error_expire', 300) 
            if  config.has_key('dbs'):
                self.dbs_global = config['dbs'].get('dbs_global_instance', None)
            else:
                self.dbs_global = None
            dburi             = config['mongodb']['dburi']
            engine            = config.get('engine', None)
            self.gfs          = db_gridfs(dburi)
        except Exception as exc:
            print_exc(exc)
            raise Exception('fail to parse DAS config')

        # read key/cert info
        try:
            self.ckey, self.cert = get_key_cert()
        except Exception as exc:
            print_exc(exc)
            self.ckey = None
            self.cert = None

        if  self.multitask:
            nworkers = config['das'].get('api_workers', 3)
            thr_weights = config['das'].get('thread_weights', [])
            for system_weight in thr_weights:
                system, weight = system_weight.split(':')
                if  system == self.name:
                    nworkers *= int(weight)
            if  engine:
                thr_name = 'DASAbstractService:%s:PluginTaskManager' % self.name
                self.taskmgr = PluginTaskManager(\
                        engine, nworkers=nworkers, name=thr_name)
                self.taskmgr.subscribe()
            else:
                thr_name = 'DASAbstractService:%s:TaskManager' % self.name
                self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name)
        else:
            self.taskmgr = None

        self.map        = {}   # to be defined by data-service implementation
        self._keys      = None # to be defined at run-time in self.keys
        self._params    = None # to be defined at run-time in self.parameters
        self._notations = {}   # to be defined at run-time in self.notations

        self.logger.info('initialized')
        # define internal cache manager to put 'raw' results into cache
        if  config.has_key('rawcache') and config['rawcache']:
            self.localcache   = config['rawcache']
        else:
            msg = 'Undefined rawcache, please check your configuration'
            raise Exception(msg)
Exemple #20
0
 def get_records(self, coll, spec, fields, skeys, idx, limit, unique=False):
     "Generator to get records from MongoDB."
     try:
         conn = db_connection(self.dburi)
         mdb  = conn[self.dbname]
         mdb.add_son_manipulator(self.das_son_manipulator)
         col = mdb[coll]
         nres = col.find(spec, exhaust=True).count()
         if  nres == 1 or nres <= limit:
             limit = 0
         if  limit:
             res = col.find(spec=spec, fields=fields,
                     sort=skeys, skip=idx, limit=limit)
         else:
             res = col.find(spec=spec, fields=fields,
                     sort=skeys, exhaust=True)
         if  unique:
             res = unique_filter(res)
         for row in res:
             yield row
     except Exception as exp:
         print_exc(exp)
         row = {'exception': str(exp)}
         res = []
         yield row
Exemple #21
0
 def get_status(self, dasquery):
     """
     Look-up status of provided query in a cache.
     Return status of the query request and its hash.
     """
     status = None
     error = None
     reason = None
     if dasquery and 'fields' in dasquery.mongo_query:
         fields = dasquery.mongo_query['fields']
         if fields and isinstance(fields, list) and 'queries' in fields:
             return 'ok', error, reason
     record = self.rawcache.find(dasquery)
     error, reason = self.rawcache.is_error_in_records(dasquery)
     try:
         if record and 'das' in record and 'status' in record['das']:
             status = record['das']['status']
             if not error:
                 error = record['das'].get('error', error)
             if not reason:
                 reason = record['das'].get('reason', reason)
             return status, error, reason
     except Exception as exc:
         print_exc(exc)
         status = error = reason = None
         self.rawcache.remove_from_cache(dasquery)
     return status, error, reason
Exemple #22
0
 def pass_apicall(self, dasquery, url, api, api_params):
     """
     Filter provided apicall wrt existing apicall records in Analytics DB.
     """
     self.analytics.remove_expired()
     msg  = 'API=%s, args=%s' % (api, api_params)
     for row in self.analytics.list_apicalls(url=url, api=api):
         input_query = {'spec':api_params}
         exist_query = {'spec':row['apicall']['api_params']}
         if  compare_specs(input_query, exist_query):
             msg += '\nwill re-use existing api call with args=%s, query=%s'\
             % (row['apicall']['api_params'], exist_query)
             self.logger.info(msg)
             try:
                 # update DAS cache with empty result set
                 args = self.inspect_params(api, api_params)
                 cond   = {'das.qhash': row['apicall']['qhash']}
                 record = self.localcache.col.find_one(cond)
                 if  record and record.has_key('das') and \
                     record['das'].has_key('expire'):
                     expire = record['das']['expire']
                     self.write_to_cache(\
                             dasquery, expire, url, api, args, [], 0)
             except Exception as exc:
                 print_exc(exc)
                 msg  = 'failed api %s\n' % api
                 msg += 'input query %s\n' % input_query
                 msg += 'existing query %s\n' % exist_query
                 msg += 'Unable to look-up existing query and extract '
                 msg += 'expire timestamp'
                 raise Exception(msg)
             return False
     return True
Exemple #23
0
    def get_page_content(self, kwargs, complete_msg=True):
        """Retrieve page content for provided set of parameters"""
        page = ""
        try:
            view = kwargs.get("view", "list")
            if view == "plain":
                if "limit" in kwargs:
                    del kwargs["limit"]
            if view in ["json", "xml", "plain"] and complete_msg:
                page = "Request completed. Reload the page ..."
            else:
                head, data = self.get_data(kwargs)

                allowed_views = ["list", "table", "plain", "xml", "json"]
                if view not in allowed_views:
                    raise

                func = getattr(self, view + "view")
                page = func(head, data)
        except HTTPError as _err:
            raise
        except Exception as exc:
            print_exc(exc)
            msg = gen_error_msg(kwargs)
            page = self.templatepage("das_error", msg=msg)
        return page
Exemple #24
0
    def dbs_daemon(self, config):
        """Start DBS daemon if it is requested via DAS configuration"""
        try:
            main_dbs_url = self.dbs_url
            dbs_urls = []
            print "### DBS URL:", self.dbs_url
            print "### DBS instances:", self.dbs_instances
            if not self.dbs_url or not self.dbs_instances:
                return  # just quit
            for inst in self.dbs_instances:
                dbs_urls.append((main_dbs_url.replace(self.dbs_global, inst), inst))
            interval = config.get("dbs_daemon_interval", 3600)
            dbsexpire = config.get("dbs_daemon_expire", 3600)
            preserve_dbs_col = config.get("preserve_on_restart", False)
            dbs_config = {"expire": dbsexpire, "preserve_on_restart": preserve_dbs_col}
            if self.dataset_daemon:
                for dbs_url, inst in dbs_urls:
                    dbsmgr = DBSDaemon(dbs_url, self.dburi, dbs_config)
                    self.dbsmgr[(dbs_url, inst)] = dbsmgr

                    def dbs_updater(_dbsmgr, interval):
                        """DBS updater daemon"""
                        while True:
                            try:
                                _dbsmgr.update()
                            except:
                                pass
                            time.sleep(interval)

                    print "### Start DBSDaemon for %s" % dbs_url
                    thname = "dbs_updater:%s" % dbs_url
                    start_new_thread(thname, dbs_updater, (dbsmgr, interval))
        except Exception as exc:
            print_exc(exc)
Exemple #25
0
def sitedb_parser(source):
    """SiteDB parser"""
    if  isinstance(source, str) or isinstance(source, unicode):
        data = json.loads(source)
#     elif hasattr(source, "close") or isinstance(source, file):
    elif hasattr(source, "close"):
        # got data descriptor
        try:
            data = json.load(source)
        except Exception as exc:
            print_exc(exc)
            source.close()
            raise
        source.close()
    else:
        data = source
    if  not isinstance(data, dict):
        raise Exception('Wrong data type, %s' % type(data))
    if  'desc' in data:
        columns = data['desc']['columns']
        for row in data['result']:
            yield rowdict(columns, row)
    else:
        for row in data['result']:
            yield row
Exemple #26
0
 def init(self):
     """
     Establish connection to MongoDB back-end and create DB.
     """
     col = None
     try:
         conn = db_connection(self.dburi)
         if conn:
             dbc = conn[self.dbname]
             col = dbc[self.colname]
     #            print "### DASMapping:init started successfully"
     except ConnectionFailure as _err:
         tstamp = dastimestamp("")
         thread = threading.current_thread()
         print "### MongoDB connection failure thread=%s, id=%s, time=%s" % (thread.name, thread.ident, tstamp)
     except Exception as exc:
         print_exc(exc)
     if col:
         index = [
             ("type", DESCENDING),
             ("system", DESCENDING),
             ("urn", DESCENDING),
             ("das_map.das_key", DESCENDING),
             ("das_map.rec_key", DESCENDING),
             ("das_map.api_arg", DESCENDING),
         ]
         create_indexes(col, index)
Exemple #27
0
 def makepy(self, dataset, instance):
     """
     Request to create CMSSW py snippet for a given dataset
     """
     pat = re.compile('/.*/.*/.*')
     if  not pat.match(dataset):
         msg = 'Invalid dataset name'
         return self.error(msg)
     query = "file dataset=%s instance=%s | grep file.name" \
             % (dataset, instance)
     try:
         data   = self.dasmgr.result(query, idx=0, limit=0)
     except Exception as exc:
         print_exc(exc)
         msg    = 'Exception: %s\n' % str(exc)
         msg   += 'Unable to retrieve data for query=%s' % query
         return self.error(msg)
     lfns = []
     for rec in data:
         filename = DotDict(rec).get('file.name')
         if  filename not in lfns:
             lfns.append(filename)
     page = self.templatepage('das_files_py', lfnList=lfns, pfnList=[], isinstance=isinstance, list=list)
     cherrypy.response.headers['Content-Type'] = "text/plain"
     return page
Exemple #28
0
def create_indexes(coll, index_list):
    """
    Create indexes for provided collection/index_list and
    ensure that they are in place
    """
    index_info = coll.index_information().values()
    for pair in index_list:
        index_exists = 0
        for item in index_info:
            if item['key'] == [pair]:
                index_exists = 1
        if not index_exists:
            try:
                if isinstance(pair, list):
                    coll.create_index(pair)
                else:
                    coll.create_index([pair])
            except Exception as exp:
                print_exc(exp)
        try:
            spec = pair
            if not isinstance(pair, list):
                spec = [pair]
            coll.create_index(spec)
        except Exception as exp:
            print_exc(exp)
Exemple #29
0
    def __init__(self, query, **flags):
        """
        Accepts general form of DAS query, supported formats are
        DAS input query, DAS mongo query, DAS storage query. The
        supplied flags can carry any query attributes, e.g.
        filters, aggregators, system, instance, etc.
        """
        self._mongoparser   = None
        self._params        = {}
        self._service_apis_map = {}
        self._str           = ''
        self._query         = ''
        self._storage_query = {}
        self._mongo_query   = {}
        self._qhash         = None
        self._system        = None
        self._instance      = None
        self._loose_query   = None
        self._pattern_query = None
        self._sortkeys      = []
        self._filters       = {}
        self._mapreduce     = []
        self._aggregators   = []
        self._flags         = flags

        # loop over flags and set available attributes
        for key, val in flags.iteritems():
            setattr(self, '_%s' % key, val)

        # test data type of input query and apply appropriate initialization
        if  isinstance(query, basestring):
            self._query = query
            try:
                self._mongo_query = self.mongoparser.parse(query)
                for key, val in flags.iteritems():
                    if  key in ['mongoparser']:
                        continue
                    if  not self._mongo_query.has_key(key):
                        self._mongo_query[key] = val
            except Exception as exp:
                msg = "Fail to parse DAS query='%s'" % query
                print_exc(msg, print_traceback=False)
                raise exp
        elif isinstance(query, dict):
            newquery = {}
            for key, val in query.iteritems():
                newquery[key] = val
            if  isinstance(newquery.get('spec'), dict): # mongo query
                self._mongo_query = newquery
            else: # storage query
                self._storage_query = newquery
        elif isinstance(query, object) and hasattr(query, '__class__')\
            and query.__class__.__name__ == 'DASQuery':
            self._query = query.query
            self._mongo_query = query.mongo_query
            self._storage_query = query.storage_query
        else:
            raise Exception('Unsupport data type of DAS query')
        self.update_attr()
Exemple #30
0
 def process_requests_onhold(self):
     "Process requests which are on hold"
     try:
         limit = self.queue_limit/2
         thread.start_new_thread(onhold_worker, \
             (self.dasmgr, self.taskmgr, self.reqmgr, limit))
     except Exception as exc:
         print_exc(exc)
Exemple #31
0
    def __init__(self, name, config):
        self.name = name
        try:
            self.verbose = config['verbose']
            title = 'DASAbstactService_%s' % self.name
            self.logger = PrintManager(title, self.verbose)
            self.dasmapping = config['dasmapping']
            self.write2cache = config.get('write_cache', True)
            self.multitask = config['das'].get('multitask', True)
            self.error_expire = config['das'].get('error_expire', 300)
            self.dbs_global = None  # to be configured at run time
            self.dburi = config['mongodb']['dburi']
            engine = config.get('engine', None)
            self.gfs = db_gridfs(self.dburi)
        except Exception as exc:
            print_exc(exc)
            raise Exception('fail to parse DAS config')

        # read key/cert info
        try:
            self.ckey, self.cert = get_key_cert()
        except Exception as exc:
            print_exc(exc)
            self.ckey = None
            self.cert = None

        if self.multitask:
            nworkers = config['das'].get('api_workers', 3)
            thr_weights = config['das'].get('thread_weights', [])
            for system_weight in thr_weights:
                system, weight = system_weight.split(':')
                if system == self.name:
                    nworkers *= int(weight)
#             if  engine:
#                 thr_name = 'DASAbstractService:%s:PluginTaskManager' % self.name
#                 self.taskmgr = PluginTaskManager(\
#                         engine, nworkers=nworkers, name=thr_name)
#                 self.taskmgr.subscribe()
#             else:
#                 thr_name = 'DASAbstractService:%s:TaskManager' % self.name
#                 self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name)
            thr_name = 'DASAbstractService:%s:TaskManager' % self.name
            self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name)
        else:
            self.taskmgr = None

        self.map = {}  # to be defined by data-service implementation
        self._keys = None  # to be defined at run-time in self.keys
        self._params = None  # to be defined at run-time in self.parameters
        self._notations = {}  # to be defined at run-time in self.notations

        self.logger.info('initialized')
        # define internal cache manager to put 'raw' results into cache
        if 'rawcache' in config and config['rawcache']:
            self.localcache = config['rawcache']
        else:
            msg = 'Undefined rawcache, please check your configuration'
            raise Exception(msg)
Exemple #32
0
    def records(self, *args, **kwargs):
        """
        Retieve all records id's.
        """
        try:
            recordid = None
            if  args:
                recordid = args[0]
                spec = {'_id':ObjectId(recordid)}
                fields = None
                query = dict(fields=fields, spec=spec)
            elif  kwargs and kwargs.has_key('_id'):
                spec = {'_id': ObjectId(kwargs['_id'])}
                fields = None
                query = dict(fields=fields, spec=spec)
            else: # return all ids
                query = dict(fields=None, spec={})

            res      = ''
            time0    = time.time()
            idx      = getarg(kwargs, 'idx', 0)
            limit    = getarg(kwargs, 'limit', 10)
            coll     = kwargs.get('collection', 'merge')
            inst     = kwargs.get('instance', self.dbs_global)
            form     = self.form(uinput="")
            check, content = self.generate_dasquery(query, inst)
            if  check:
                return self.page(form + content, ctime=time.time()-time0)
            dasquery = content # returned content is valid DAS query
            nresults = self.dasmgr.rawcache.nresults(dasquery, coll)
            gen      = self.dasmgr.rawcache.get_from_cache\
                (dasquery, idx=idx, limit=limit, collection=coll)
            if  recordid: # we got id
                for row in gen:
                    res += das_json(row)
            else:
                for row in gen:
                    rid  = row['_id']
                    del row['_id']
                    res += self.templatepage('das_record', \
                            id=rid, collection=coll, daskeys=', '.join(row))
            if  recordid:
                page  = res
            else:
                url   = '/das/records?'
                if  nresults:
                    page = self.templatepage('das_pagination', \
                        nrows=nresults, idx=idx, limit=limit, url=url)
                else:
                    page = 'No results found, nresults=%s' % nresults
                page += res

            ctime   = (time.time()-time0)
            page = self.page(form + page, ctime=ctime)
            return page
        except Exception as exc:
            print_exc(exc)
            return self.error(gen_error_msg(kwargs))
Exemple #33
0
 def run(self):
     """Run thread loop."""
     while True:
         func, args, kargs = self.tasks.get()
         try:
             func(*args, **kargs)
         except Exception as exp:
             print_exc(exp)
         self.tasks.task_done()
Exemple #34
0
 def run(self):
     """Run thread loop."""
     while True:
         func, args, kargs = self.tasks.get()
         try:
             func(*args, **kargs)
         except Exception as exp:
             print_exc(exp)
         self.tasks.task_done()
Exemple #35
0
 def generate_dasquery(self, uinput, inst, html_error=True):
     """
     Check provided input as valid DAS input query.
     Returns status and content (either error message or valid DASQuery)
     """
     def helper(msg, html_error=None):
         """Helper function which provide error template"""
         if  not html_error:
             return msg
         guide = self.templatepage('dbsql_vs_dasql', 
                     operators=', '.join(das_operators()))
         page = self.templatepage('das_ambiguous', msg=msg, base=self.base,
                     guide=guide)
         return page
     if  not uinput:
         return 1, helper('No input query')
     # Generate DASQuery object, if it fails we catch the exception and
     # wrap it for upper layer (web interface)
     try:
         dasquery = DASQuery(uinput, instance=inst)
     except Exception as err:
         return 1, helper(das_parser_error(uinput, str(err)), html_error)
     fields = dasquery.mongo_query.get('fields', [])
     if  not fields:
         fields = []
     spec   = dasquery.mongo_query.get('spec', {})
     for word in fields+spec.keys():
         found = 0
         if  word in DAS_DB_KEYWORDS:
             found = 1
         for key in self.daskeys:
             if  word.find(key) != -1:
                 found = 1
         if  not found:
             msg = 'Provided input does not contain a valid DAS key'
             return 1, helper(msg, html_error)
     if  isinstance(uinput, dict): # DASQuery w/ {'spec':{'_id:id}}
         pass
     elif uinput.find('queries') != -1:
         pass
     elif uinput.find('records') != -1:
         pass
     else: # normal user DAS query
         try:
             service_map = dasquery.service_apis_map()
         except Exception as exc:
             msg = 'Fail to lookup DASQuery service API map'
             print msg
             print_exc(exc)
             return 1, helper(msg, html_error)
         if  not service_map:
             msg  = "None of the API's registered in DAS "
             msg += "can resolve this query"
             return 1, helper(msg, html_error)
     return 0, dasquery
Exemple #36
0
    def __init__(self, name, config):
        self.name = name
        try:
            self.verbose = config["verbose"]
            title = "DASAbstactService_%s" % self.name
            self.logger = PrintManager(title, self.verbose)
            self.dasmapping = config["dasmapping"]
            self.write2cache = config.get("write_cache", True)
            self.multitask = config["das"].get("multitask", True)
            self.error_expire = config["das"].get("error_expire", 300)
            self.dbs_global = None  # to be configured at run time
            self.dburi = config["mongodb"]["dburi"]
            engine = config.get("engine", None)
            self.gfs = db_gridfs(self.dburi)
        except Exception as exc:
            print_exc(exc)
            raise Exception("fail to parse DAS config")

        # read key/cert info
        try:
            self.ckey, self.cert = get_key_cert()
        except Exception as exc:
            print_exc(exc)
            self.ckey = None
            self.cert = None

        if self.multitask:
            nworkers = config["das"].get("api_workers", 3)
            thr_weights = config["das"].get("thread_weights", [])
            for system_weight in thr_weights:
                system, weight = system_weight.split(":")
                if system == self.name:
                    nworkers *= int(weight)
            if engine:
                thr_name = "DASAbstractService:%s:PluginTaskManager" % self.name
                self.taskmgr = PluginTaskManager(engine, nworkers=nworkers, name=thr_name)
                self.taskmgr.subscribe()
            else:
                thr_name = "DASAbstractService:%s:TaskManager" % self.name
                self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name)
        else:
            self.taskmgr = None

        self.map = {}  # to be defined by data-service implementation
        self._keys = None  # to be defined at run-time in self.keys
        self._params = None  # to be defined at run-time in self.parameters
        self._notations = {}  # to be defined at run-time in self.notations

        self.logger.info("initialized")
        # define internal cache manager to put 'raw' results into cache
        if "rawcache" in config and config["rawcache"]:
            self.localcache = config["rawcache"]
        else:
            msg = "Undefined rawcache, please check your configuration"
            raise Exception(msg)
Exemple #37
0
 def add_onhold(self, pid, uinput, addr, future_tstamp):
     """Add user input to onhold collection"""
     tstamp = time.strftime("%Y%m%d %H:%M:%S", time.localtime())
     doc = dict(_id=pid, ip=addr, uinput=uinput, \
                     ts=future_tstamp, timestamp=tstamp)
     try:
         self.hold.insert(doc, safe=True)
     except DuplicateKeyError:
         pass
     except Exception as err:
         print_exc(err)
Exemple #38
0
 def filter_bar(self, dasquery):
     "Construct filter bar UI element and returned for given input"
     if  dasquery.filters:
         # if we have filter/aggregator get one row from the given query
         try:
             if  dasquery.mongo_query:
                 fltpage = self.fltpage(dasquery)
         except Exception as exc:
             fltpage = 'N/A, please check DAS record for errors'
             msg = 'Fail to apply filter to query=%s' % dasquery.query
             print(msg)
             print_exc(exc)
     else:
         fltpage = ''
     return fltpage
Exemple #39
0
    def init(self):
        """Init DAS web server, connect to DAS Core"""
        try:
            self.reqmgr     = RequestManager(lifetime=self.lifetime)
            self.dasmgr     = DASCore(engine=self.engine)
            self.repmgr     = CMSRepresentation(self.dasconfig, self.dasmgr)
            self.daskeys    = self.dasmgr.das_keys()
            self.gfs        = db_gridfs(self.dburi)
            self.daskeys.sort()
            self.dasmapping = self.dasmgr.mapping
            self.dbs_url    = self.dasmapping.dbs_url()
            self.dbs_global = self.dasmapping.dbs_global_instance()
            self.dbs_instances = self.dasmapping.dbs_instances()
            self.dasmapping.init_presentationcache()
            self.colors = {'das':gen_color('das')}
            for system in self.dasmgr.systems:
                self.colors[system] = gen_color(system)
            if  not self.daskeyslist:
                keylist = [r for r in self.dasmapping.das_presentation_map()]
                keylist.sort(key=lambda r: r['das'])
                self.daskeyslist = keylist

        except ConnectionFailure as _err:
            tstamp = dastimestamp('')
            mythr  = threading.current_thread()
            print("### MongoDB connection failure thread=%s, id=%s, time=%s" \
                    % (mythr.name, mythr.ident, tstamp))
        except Exception as exc:
            print_exc(exc)
            self.dasmgr  = None
            self.reqmgr  = None
            self.dbs_url = None
            self.dbs_global = None
            self.dbs_instances = []
            self.daskeys = []
            self.colors  = {}
            self.q_rewriter = None
            return

        # KWS and Query Rewriting failures are not fatal
        try:
            # init query rewriter, if needed
            if self.dasconfig['query_rewrite']['pk_rewrite_on']:
                self.q_rewriter = CMSQueryRewrite(self.repmgr,
                                                  self.templatepage)
        except Exception as exc:
            print_exc(exc)
            self.q_rewriter = None
Exemple #40
0
def das_populator_helper(dasmgr, query, expire):
    """Process DAS query through DAS Core and sets new expire tstamp for it"""
    try:
        # To allow re-use of queries feeded by DAS populator
        # we need to ensure that instance is present in DAS query,
        # since web interface does it by default.
        dasquery = dasmgr.adjust_query(query)
        if 'instance' not in dasquery:
            raise Exception('Supplied query does not have DBS instance')
        newts = expire_timestamp(expire)
        # process DAS query
        dasmgr.call(dasquery)
        # update DAS expire timestamp
        dasmgr.rawcache.update_das_expire(dasquery, newts)
        print("\n### DAS populator", query, dasquery, expire, newts)
    except Exception as exc:
        print_exc(exc)
Exemple #41
0
 def init(self):
     """Init DAS web server, connect to DAS Core"""
     try:
         self.dasmgr = DASCore(multitask=False)
         self.dbs_instances = self.dasmgr.mapping.dbs_instances()
         self.dbs_global = self.dasmgr.mapping.dbs_global_instance()
         if KeywordSearchHandler:
             self.kws = KeywordSearchHandler(self.dasmgr)
     except ConnectionFailure:
         tstamp = dastimestamp('')
         mythr = threading.current_thread()
         print("### MongoDB connection failure thread=%s, id=%s, time=%s" \
               % (mythr.name, mythr.ident, tstamp))
     except Exception as exc:
         print_exc(exc)
         self.dasmgr = None
         self.kws = None
Exemple #42
0
def query_db(dbname, dbcol, query, idx=0, limit=10):
    """
    query a given db collection
    """

    conn = db_connection(get_db_uri())
    col = conn[dbname][dbcol]

    if col:
        try:
            if limit == -1:
                for row in col.find(query, **PYMONGO_OPTS):
                    yield row
            else:
                for row in col.find(query).skip(idx).limit(limit):
                    yield row
        except Exception as exc:  # we shall not catch GeneratorExit
            print_exc(exc)
Exemple #43
0
def quote(data):
    """
    Sanitize the data using cgi.escape.
    """
    if isinstance(data, (int, long, float, dict, list)):
        res = data
    elif isinstance(data, ObjectId):
        res = str(data)
    else:
        try:
            if data:
                res = cgi.escape(data, quote=True)
            else:
                res = ""
        except Exception as exc:
            print_exc(exc)
            print("Unable to cgi.escape(%s, quote=True)" % data)
            res = ""
    return res
Exemple #44
0
 def delete(self, system=None):
     """
     Delete expired documents in das.cache.
     """
     spec = {'das.expire':{'$lt':time.time()}}
     if  system:
         spec['das.system'] = system
     msg = "Found %s expired documents" % self.cache.find(spec).count()
     try:
         if  pymongo.version.startswith('3.'): # pymongo 3.X
             coll.delete_many(spec)
         else:
             self.cache.remove(spec)
         msg += ", delete operation [OK]"
         print(msg)
     except Exception as exc:
         msg += ", delete operation [FAIL]"
         print(msg)
         print_exc(exc)
Exemple #45
0
 def gridfs(self, **kwargs):
     """
     Retieve records from GridFS
     """
     time0 = time.time()
     if  'fid' not in kwargs:
         code = web_code('No file id')
         raise HTTPError(500, 'DAS error, code=%s' % code)
     fid  = kwargs.get('fid')
     data = {'status':'requested', 'fid':fid}
     try:
         fds = self.gfs.get(ObjectId(fid))
         return fds.read()
     except Exception as exc:
         print_exc(exc)
         code = web_code('Exception')
         raise HTTPError(500, 'DAS error, code=%s' % code)
     data['ctime'] = time.time() - time0
     return json.dumps(data)
Exemple #46
0
def dbs_find(entity, url, kwds, verbose=0):
    "Find DBS3 entity for given set of parameters"
    if  entity not in ['run', 'file', 'block']:
        msg = 'Unsupported entity key=%s' % entity
        raise Exception(msg)
    expire  = 600
    dataset = kwds.get('dataset', None)
    block   = kwds.get('block_name', None)
    if  not block:
        # TODO: this should go away when DBS will be retired (user in combined srv)
        block = kwds.get('block', None)
    lfn     = kwds.get('file', None)
    runs    = kwds.get('runs', [])
    if  not (dataset or block or lfn):
        return
    url = '%s/%ss' % (url, entity) # DBS3 APIs use plural entity value
    if  dataset:
        params = {'dataset':dataset}
    elif block:
        params = {'block_name': block}
    elif lfn:
        params = {'logical_file_name': lfn}
    if  runs:
        params.update({'run_num': runs})
    headers = {'Accept': 'application/json;text/json'}
    source, expire = \
        getdata(url, params, headers, expire, ckey=CKEY, cert=CERT,
                verbose=verbose)
    for row in json_parser(source, None):
        for rec in row:
            try:
                if  isinstance(rec, basestring):
                    print(dastimestamp('DBS3 ERROR:'), row)
                elif  entity == 'file':
                    yield rec['logical_file_name']
                elif  entity == 'block':
                    yield rec['block_name']
                elif  entity == 'file':
                    yield rec['dataset']
            except Exception as exp:
                msg = 'Fail to parse "%s", exception="%s"' % (rec, exp)
                print_exc(msg)
Exemple #47
0
    def check_pid(self, pid):
        """
        Check status of given pid. This is a server callback
        function for ajaxCheckPid, see js/ajax_utils.js
        """
        # do not allow caching
        set_no_cache_flags()

        img  = '<img src="%s/images/loading.gif" alt="loading"/>' % self.base
        page = ''
        try:
            if  self.taskmgr.is_alive(pid):
                page = img + " processing PID=%s" % pid
            else:
                # at this point we don't know if request arrived to this host
                # or it was processed. To distinguish the case we'll ask
                # request manager for that pid
                if  self.reqmgr.has_pid(pid):
                    self.reqmgr.remove(pid)
                    self.taskmgr.remove(pid)
                    page  = 'Request PID=%s is completed' % pid
                    page += ', please wait for results to load'
                else:
                    # there're no request on this server, re-initiate it
                    ref = cherrypy.request.headers.get('Referer', None)
                    if  ref:
                        url = urlparse(ref)
                        params = dict(parse_qsl(url.query))
                        return self.request(**params)
                    else:
                        msg  = 'No referer in cherrypy.request.headers'
                        msg += '\nHeaders: %s' % cherrypy.request.headers
                        dasprint(dastimestamp('DAS WEB ERROR '), msg)
        except Exception as err:
            msg = 'check_pid fails for pid=%s' % pid
            dasprint(dastimestamp('DAS WEB ERROR '), msg)
            print_exc(err)
            self.reqmgr.remove(pid)
            self.taskmgr.remove(pid)
            return self.error(gen_error_msg({'pid':pid}), wrap=False)
        return page
Exemple #48
0
    def apicall(self, dasquery, url, api, args, dformat, expire):
        """
        A service worker. It parses input query, invoke service API
        and return results in a list with provided row.
        """
        # NOTE: I use helper function since it is 2 step process
        # therefore the expire time stamp will not be changed, since
        # helper function will yield results
        time0 = time.time()
        if  api == 'dataset4site_release' or \
            api == 'site4dataset' or 'files4dataset_runs_site':
            genrows = self.helper(api, args, expire)
        # here I use directly the call to the service which returns
        # proper expire timestamp. Moreover I use HTTP header to look
        # at expires and adjust my expire parameter accordingly


# NOTE: disable dataset4site, lumi4site since they take too much load
#       see combined.yml
#        if  api == 'dataset4site':
#            headers = {'Accept': 'application/json;text/json'}
#            datastream, expire = \
#                    getdata(url, args, headers, expire, system='combined')
#            genrows = parse_data(datastream)
#        if  api == 'lumi4dataset':
#            headers = {'Accept': 'application/json;text/json'}
#            data, expire = \
#                    getdata(url, args, headers, expire, system='combined')
#            genrows = json_parser(data, None)

# proceed with standard workflow
        ctime = time.time() - time0
        try:
            if isinstance(url, dict):
                url = "combined: %s" % url.values()
            self.write_to_cache(dasquery, expire, url, api, \
                    args, genrows, ctime)
        except Exception as exc:
            print_exc(exc)
Exemple #49
0
 def get_new_connection(self, uri):
     "Get new MongoDB connection"
     key = self.genkey(uri)
     for idx in range(0, self.retry):
         try:
             dbinst = MongoClient(host=uri, **self.mongo_opts)
             #                dbinst = MongoConnection(uri, **self.mongo_opts).client()
             gfs = dbinst.gridfs
             fsinst = gridfs.GridFS(gfs)
             self.conndict[key] = (dbinst, fsinst)
             self.timedict[key] = time.time()
             return (dbinst, fsinst)
         except (ConnectionFailure, AutoReconnect) as exc:
             tstamp = dastimestamp('')
             thread = threading.current_thread()
             print("### MongoDB connection failure thread=%s, id=%s, time=%s" \
                     % (thread.name, thread.ident, tstamp))
             print_exc(exc)
         except Exception as exc:
             print_exc(exc)
         time.sleep(idx)
     return self.conndict.get(key, (None, None))
Exemple #50
0
 def get_records(self, coll, spec, fields, skeys, idx, limit, unique=False):
     "Generator to get records from MongoDB."
     try:
         conn = db_connection(self.dburi)
         mdb = conn[self.dbname]
         mdb.add_son_manipulator(self.das_son_manipulator)
         col = mdb[coll]
         nres = col.find(spec, **PYMONGO_OPTS).count()
         if nres == 1 or nres <= limit:
             limit = 0
         if limit:
             res = col.find(spec, fields, sort=skeys, skip=idx, limit=limit)
         else:
             res = col.find(spec, fields, sort=skeys, **PYMONGO_OPTS)
         if unique:
             res = unique_filter(res)
         for row in res:
             yield row
     except Exception as exp:
         print_exc(exp)
         row = {'exception': str(exp)}
         res = []
         yield row
Exemple #51
0
 def run(self):
     """Run thread loop."""
     while True:
         if  self.exit:
             return
         if  isinstance(self._tasks, PriorityQueue):
             _, uid, task = self._tasks.get()
         else:
             task = self._tasks.get()
         if  task == None:
             return
         evt, pid, func, args, kwargs = task
         try:
             if  isinstance(self._tasks, PriorityQueue):
                 self._uids.discard(uid)
             func(*args, **kwargs)
             self._pids.discard(pid)
         except Exception as err:
             self._pids.discard(pid)
             print_exc(err)
             print("\n### args", func, args, kwargs)
         self._tasks.task_done()
         evt.set()
Exemple #52
0
 def get_result_fieldlist(self, row):
     rowkeys = []
     if  row and 'das' in row  and 'primary_key' in row['das']:
         pkey = row['das']['primary_key']
         if  pkey and (isinstance(pkey, str) or isinstance(pkey, unicode)):
             try:
                 mkey = pkey.split('.')[0]
                 if  mkey not in row:
                     return []
                 if  isinstance(row[mkey], list):
                     # take first five or less entries from the list to cover
                     # possible aggregated records and extract row keys
                     ndict   = DotDict({mkey: row[mkey][:10]})
                     rowkeys = list(ndict.get_keys(mkey))
                 else:
                     rowkeys = list(DotDict(row).get_keys(mkey))
                 rowkeys.sort()
                 rowkeys += ['das.conflict']
             except Exception as exc:
                 # TODO: pkey.split fail only if called on non-string
                 msg = "Fail to pkey.split('.') for pkey=%s" % pkey
                 print(msg)
                 print_exc(exc)
     return rowkeys
Exemple #53
0
def das_json(dasquery, record, pad='', full=False):
    """
    Wrap provided jsonhtml code snippet into div/pre blocks. Provided jsonhtml
    snippet is sanitized by json2html function.
    """
    error = None
    if full:
        return das_json_full(record, pad)
    mquery = dasquery.mongo_query
    daskeys = ['das_id', 'cache_id', 'qhash', 'das', '_id']
    fields = mquery.get('fields', None)
    if fields:
        lkeys = [l for l in fields if l not in daskeys]
    else:
        lkeys = []
    # get das.systems and primary key
    das = record['das']
    if 'error' in record:
        error = {
            'error': record.get('error'),
            'reason': record.get('reason', '')
        }
    srvs = das.get('system', [])
    apis = das.get('api', [])
    prim_key = das.get('primary_key', '').split('.')[0]
    if not srvs or not prim_key or len(apis) != len(srvs):
        return das_json_full(record, pad)
    try:
        pval = record[prim_key]
    except Exception as exc:
        return das_json_full(record, pad)
    if isinstance(pval, list) and len(pval) != len(srvs):
        return das_json_full(record, pad)
    if not isinstance(pval, list):
        return das_json_full(record, pad)
    try:
        page = '<div class="code">'
        for idx in range(0, len(srvs)):
            srv = srvs[idx]
            api = apis[idx]
            if lkeys:
                rec = {prim_key: pval[idx]}
                for lkey in [l for l in lkeys if l != prim_key]:
                    if lkey != 'error' and lkey != 'reason':
                        rec[lkey] = record[lkey][idx]
                val = das_json_full(rec)
            else:
                val = das_json_full(pval[idx])
            style = 'background-color:%s;color:%s;' % gen_color(srv)
            page += '\n<b>DAS service:</b> '
            page += '<span style="%s;padding:3px">%s</span> ' % (style, srv)
            if srv == 'combined':
                page += das_json_services(srv, das)
            page += '<b>DAS api:</b> %s' % api
            page += '\n<pre style="%s">%s</pre>' % (style, val)
        page += '\n<b>DAS part:</b><pre>%s</pre>' % das_json_full(das)
        if error:
            page += '\n<b>Errors:</b><pre>%s</pre>' % das_json_full(error)
        rhash = {
            'qhash': record.get('qhash', None),
            'das_id': record.get('das_id', None),
            'cache_id': record.get('cache_id', None)
        }
        page += '<b>Hashes</b>: <pre>%s</pre>' % das_json_full(rhash)
        rlink = '/das/records/%s?collection=merge&view=json' % record['_id']
        page += '<br/>Download <a href="%s">raw record</a>' % rlink
        page += '</div>'
    except Exception as exc:
        print_exc(exc)
        return das_json_full(record, pad)
    return page
Exemple #54
0
    def apicall(self, dasquery, url, api, args, dformat, expire):
        """
        A service worker. It parses input query, invoke service API 
        and return results in a list with provided row.
        """
        cond = dasquery.mongo_query['spec']
        count = 0
        for key, value in cond.items():
            err = 'JobSummary does not support key=%s, value=%s' \
                    % (key, value)
            if not isinstance(value, dict):  # we got equal condition
                if key == 'date':
                    if isinstance(value, list) and len(value) != 2:
                        msg = 'Dashboard service requires 2 time stamps.'
                        msg += 'Please use either date last XXh format or'
                        msg += 'date in [YYYYMMDD, YYYYMMDD]'
                        raise Exception(msg)
                    if isinstance(value, str) or isinstance(value, unicode):
                        value = convert2date(value)
                    else:
                        value = [value, value + 24 * 60 * 60]
                    args['date1'] = convert_datetime(value[0])
                    args['date2'] = convert_datetime(value[1])
                    count += 1
                else:
                    for param in self.dasmapping.das2api(self.name, api, key):
                        args[param] = value
                        count += 1
            else:  # we got some operator, e.g. key :{'$in' : [1,2,3]}
                if key == 'date' or key == 'jobsummary':
                    if '$in' in value:
                        vallist = value['$in']
                    elif '$lte' in value and '$gte' in value:
                        vallist = (value['$gte'], value['$lte'])
                    else:
                        raise Exception(err)
                    args['date1'] = convert_datetime(vallist[0])
                    args['date2'] = convert_datetime(vallist[-1])
                    count += 1
                else:
                    raise Exception(err)
        if not count:
            # if no parameter are given, don't pass the API
            msg = 'DashboardService::api\n\n'
            msg += "--- %s reject API %s, parameters don't match, args=%s" \
                    % (self.name, api, args)
            self.logger.info(msg)
            return
        else:
            if not args['date1']:
                args['date1'] = convert_datetime(time.time() - 24 * 60 * 60)
            if not args['date2']:
                args['date2'] = convert_datetime(time.time())
        # drop date argument, since it's used by DAS not by dashboard data srv
        if 'date' in args:
            args.pop('date')

        time0 = time.time()
        res, expire = self.getdata(url, args, expire, headers=self.headers)
        rawrows = self.parser(res, api, args)
        dasrows = self.translator(api, rawrows)
        ctime = time.time() - time0
        try:
            self.write_to_cache(\
                dasquery, expire, url, api, args, dasrows, ctime)
        except Exception as exc:
            print_exc(exc)
Exemple #55
0
 def helper(self, api, args, expire):
     """
     Class helper function which yields results for given
     set of input parameters. It yeilds the data record which
     must contain combined attribute corresponding to systems
     used to produce record content.
     """
     dbs_url = self.map[api]['services'][self.dbs]
     phedex_url = self.map[api]['services']['phedex']
     # make phedex_api from url, but use xml version for processing
     phedex_api = phedex_url.replace('/json/', '/xml/') + '/blockReplicas'
     if  api == 'dataset4site_release' or \
         api == 'dataset4site_release_parent' or \
         api == 'child4site_release_dataset':
         # DBS part
         datasets = set()
         release = args['release']
         parent = args.get('parent', None)
         for row in dbs_dataset4release_parent(dbs_url, release, parent):
             datasets.add(row)
         # Phedex part
         if args['site'].find('.') != -1:  # it is SE
             phedex_args = {
                 'dataset': list(datasets),
                 'se': '%s' % args['site']
             }
         else:
             phedex_args = {
                 'dataset': list(datasets),
                 'node': '%s*' % args['site']
             }
         headers = {'Accept': 'text/xml'}
         source, expire = \
             getdata(phedex_api, phedex_args, headers, expire, system='phedex')
         prim_key = 'block'
         tags = 'block.replica.node'
         found = {}
         for rec in xml_parser(source, prim_key, tags):
             ddict = DotDict(rec)
             block = ddict.get('block.name')
             bbytes = ddict.get('block.bytes')
             files = ddict.get('block.files')
             found_dataset = block.split('#')[0]
             if found_dataset in found:
                 val = found[found_dataset]
                 found[found_dataset] = {
                     'bytes': val['bytes'] + bbytes,
                     'files': val['files'] + files
                 }
             else:
                 found[found_dataset] = {'bytes': bbytes, 'files': files}
         for name, val in found.items():
             record = dict(name=name, size=val['bytes'], files=val['files'])
             if api == 'child4site_release_dataset':
                 yield {'child': record}
             else:
                 yield {'dataset': record}
         del datasets
         del found
     if api == 'site4dataset':
         try:
             gen = site4dataset(dbs_url, phedex_api, args, expire)
             for row in gen:
                 sname = row.get('site', {}).get('name', '')
                 skind = self.site_info(phedex_url, sname)
                 row['site'].update({'kind': skind})
                 yield row
         except Exception as err:
             print_exc(err)
             tstamp = dastimestamp('')
             msg = tstamp + ' Exception while processing DBS/Phedex info:'
             msg += str(err)
             row = {
                 'site': {
                     'name': 'Fail to look-up site info',
                     'error': msg,
                     'dataset_fraction': 'N/A',
                     'block_fraction': 'N/A',
                     'block_completion': 'N/A'
                 },
                 'error': msg
             }
             yield row
     if  api == 'files4dataset_runs_site' or \
         api == 'files4block_runs_site':
         run_value = args.get('run', [])
         if isinstance(run_value, dict) and '$in' in run_value:
             runs = run_value['$in']
         elif isinstance(run_value, list):
             runs = run_value
         else:
             if int_number_pattern.match(str(run_value)):
                 runs = [run_value]
             else:
                 runs = []
         args.update({'runs': runs})
         files = dbs_find('file', dbs_url, args)
         site = args.get('site')
         phedex_api = phedex_url.replace('/json/',
                                         '/xml/') + '/fileReplicas'
         for fname in files4site(phedex_api, files, site):
             yield {'file': {'name': fname}}
Exemple #56
0
    def __init__(self, query, **flags):
        """
        Accepts general form of DAS query, supported formats are
        DAS input query, DAS mongo query, DAS storage query. The
        supplied flags can carry any query attributes, e.g.
        filters, aggregators, system, instance, etc.
        """
        check_query(query)
        self._mongoparser = None
        self._params = {}
        self._service_apis_map = {}
        self._str = ''
        self._query = ''
        self._query_pat = ''
        self._query_full = ''
        self._storage_query = {}
        self._mongo_query = {}
        self._qhash = None
        self._hashes = None
        self._system = None
        self._instance = None
        self._loose_query = None
        self._pattern_query = None
        self._sortkeys = []
        self._filters = {}
        self._mapreduce = []
        self._aggregators = []
        self._qcache = 0
        self._flags = flags
        self._error = ''

        # loop over flags and set available attributes
        for key, val in flags.items():
            setattr(self, '_%s' % key, val)

        # test data type of input query and apply appropriate initialization
        if isinstance(query, basestring):
            self._query = query
            try:
                self._mongo_query = self.mongoparser.parse(query)
                for key, val in flags.items():
                    if key in self.NON_CACHEABLE_FLAGS:
                        continue
                    if key not in self._mongo_query:
                        self._mongo_query[key] = val
            except Exception as exp:
                msg = "Fail to parse DAS query='%s', %s" % (query, str(exp))
                print_exc(msg, print_traceback=True)
                self._mongo_query = {'error': msg, 'spec': {}, 'fields': []}
                self._storage_query = {'error': msg}
                self._error = msg
#                 raise exp
        elif isinstance(query, dict):
            newquery = {}
            for key, val in query.items():
                newquery[key] = val
            if isinstance(newquery.get('spec'), dict):  # mongo query
                self._mongo_query = newquery
            else:  # storage query
                self._storage_query = newquery
        elif isinstance(query, object) and hasattr(query, '__class__')\
            and query.__class__.__name__ == 'DASQuery':
            self._query = query.query
            self._query_pat = query.query_pat
            self._hashes = query.hashes
            self._mongo_query = query.mongo_query
            self._storage_query = query.storage_query
        else:
            #             raise Exception('Unsupported data type of DAS query')
            self._error = 'Unsupported data type of DAS query'
        if self._error:
            return
        self.update_attr()

        # check dataset wild-cards
        for key, val in self._mongo_query['spec'].items():
            if key == 'dataset.name':
                if isinstance(val, dict):  # we get {'$in':[a,b]}
                    continue
                # only match dataset.name but do not primary_dataset.name
                if not RE_3SLASHES.match(val):

                    # TODO: we currently do not support wildcard matching
                    #       from command line interface
                    if not self._instance:
                        continue

                    # apply 3 slash pattern look-up, continuing only if one
                    # interpretation existings here, ticket #3071
                    self._handle_dataset_slashes(key, val)
Exemple #57
0
    def __init__(self,
                 config=None,
                 debug=0,
                 nores=False,
                 logger=None,
                 engine=None,
                 multitask=True):
        if config:
            dasconfig = config
        else:
            dasconfig = das_readconfig()
        verbose = dasconfig['verbose']
        self.stdout = debug
        if isinstance(debug, int) and debug:
            self.verbose = debug
            dasconfig['verbose'] = debug
        else:
            self.verbose = verbose
        das_timer('DASCore::init', self.verbose)
        self.operators = das_operators()
        self.collect_wait_time = dasconfig['das'].get('collect_wait_time', 120)

        # set noresults option
        self.noresults = False
        if nores:
            dasconfig['write_cache'] = True
            self.noresults = nores

        self.init_expire = dasconfig['das'].get('init_expire', 5 * 60)
        self.multitask = dasconfig['das'].get('multitask', True)
        if debug or self.verbose:
            self.multitask = False  # in verbose mode do not use multitask
            dasconfig['das']['multitask'] = False
        if not multitask:  # explicitly call DASCore ctor
            self.multitask = False
            dasconfig['das']['multitask'] = False
        dasconfig['engine'] = engine
        if self.multitask:
            nworkers = dasconfig['das'].get('core_workers', 5)
            #             if  engine:
            #                 thr_name = 'DASCore:PluginTaskManager'
            #                 self.taskmgr = PluginTaskManager(\
            #                         engine, nworkers=nworkers, name=thr_name)
            #                 self.taskmgr.subscribe()
            #             else:
            #                 thr_name = 'DASCore:TaskManager'
            #                 self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name)
            thr_name = 'DASCore:TaskManager'
            self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name)
        else:
            self.taskmgr = None

        if logger:
            self.logger = logger
        else:
            self.logger = PrintManager('DASCore', self.verbose)

        # define Mapping/Analytics/Parser in this order since Parser depends
        # on first two
        dasmapping = DASMapping(dasconfig)
        dasconfig['dasmapping'] = dasmapping
        self.mapping = dasmapping

        self.keylearning = DASKeyLearning(dasconfig)
        dasconfig['keylearning'] = self.keylearning

        # init DAS cache
        self.rawcache = DASMongocache(dasconfig)
        dasconfig['rawcache'] = self.rawcache

        # plug-in architecture: loop over registered data-services in
        # dasconfig; load appropriate module/class; register data
        # service with DASCore.
        self.systems = dasmapping.list_systems()
        # pointer to the DAS top level directory
        dasroot = '/'.join(__file__.split('/')[:-3])
        for name in self.systems:
            try:
                klass  = 'DAS/services/%s/%s_service.py' \
                    % (name, name)
                srvfile = os.path.join(dasroot, klass)
                with open(srvfile) as srvclass:
                    for line in srvclass:
                        if line.find('(DASAbstractService)') != -1:
                            klass = line.split('(DASAbstractService)')[0]
                            klass = klass.split('class ')[-1]
                            break
                mname = 'DAS.services.%s.%s_service' % (name, name)
                module = __import__(mname, fromlist=[klass])
                obj = getattr(module, klass)(dasconfig)
                setattr(self, name, obj)
            except IOError as err:
                if debug > 1:
                    # we have virtual services, so IOError can be correct
                    print_exc(err)
                try:
                    mname = 'DAS.services.generic_service'
                    module = __import__(mname, fromlist=['GenericService'])
                    obj = module.GenericService(name, dasconfig)
                    setattr(self, name, obj)
                except Exception as exc:
                    print_exc(exc)
                    msg = "Unable to load %s data-service plugin" % name
                    raise Exception(msg)
            except Exception as exc:
                print_exc(exc)
                msg = "Unable to load %s data-service plugin" % name
                raise Exception(msg)

        # loop over systems and get system keys, add mapping keys to final list
        self.service_keys = {}
        self.service_parameters = {}
        for name in self.systems:
            skeys = list(getattr(self, name).keys())
            self.service_keys[getattr(self, name).name] = skeys
            sparams = getattr(self, name).parameters()
            self.service_parameters[getattr(self, name).name] = sparams

        self.service_keys['special'] = das_special_keys()
        self.dasconfig = dasconfig
        das_timer('DASCore::init', self.verbose)