Esempio n. 1
0
File: das_core.py Progetto: ktf/DAS
    def insert_query_records(self, dasquery):
        """
        Insert DAS query records into DAS cache and return list of services
        which will answer this query
        """
        services = dasquery.services
        self.logger.info('Potential services = %s' % services)
        if  not services:
            msg  = 'No data-services for query %s' % dasquery
            msg += 'mongo_query: %s' % dasquery.mongo_query
            msg += 'params: %s' % dasquery.params()
            print dastimestamp('DAS WARNING '), msg

        # get list of URI which can answer this query
        ack_services = []
        for srv in services:
            gen = [t for t in getattr(getattr(self, srv), 'apimap')(dasquery)]
            for url, api, args, iformat, expire in gen:
                header = dasheader(srv, dasquery, expire, api, url, ctime=0)
                self.rawcache.insert_query_record(dasquery, header)
                if  srv not in ack_services:
                    ack_services.append(srv)
        if  not ack_services:
            ack_services = services
        if  dasquery.query.find('records ') != -1:
            srv_status = True # skip DAS queries w/ records request
        expire = 2*60 # 2 minutes, it should be overwriten by data-srv
        header = dasheader("das", dasquery, expire, api='das_core',
                services=dict(das=ack_services))
        header['lookup_keys'] = []
        self.rawcache.insert_query_record(dasquery, header)
        das_timer('das_record', self.verbose)
        return ack_services
Esempio n. 2
0
 def insert_query_record(self, dasquery, header):
     """
     Insert query record into DAS cache.
     """
     # check presence of API record in a cache
     dasheader   = header['das']
     system      = dasheader['system']
     api         = dasheader['api']
     collection  = 'cache'
     check_query = True
     expire = dasheader.get('expire', None)
     if  expire:
         dasheader['expire'] = adjust_expire(expire)
     if  not self.incache(dasquery, collection, system, api, check_query):
         msg = "query=%s, header=%s" % (dasquery, header)
         self.logger.debug(msg)
         q_record = dict(das=dasheader, query=dasquery.storage_query)
         q_record['das']['record'] = record_codes('query_record')
         q_record['das']['status'] = "requested"
         q_record['qhash'] = dasquery.qhash
         q_record['das']['ctime'] = [time.time()]
         res = self.col.insert_one(q_record)
         if  not res:
             msg = 'unable to insert query record'
             print(dastimestamp('DAS ERROR '), dasquery, msg, ', will retry')
             time.sleep(1)
             res = self.col.insert(q_record)
             if  not res:
                 print(dastimestamp('DAS ERROR '), dasquery, msg)
Esempio n. 3
0
 def insert_query_record(self, dasquery, header):
     """
     Insert query record into DAS cache.
     """
     # check presence of API record in a cache
     dasheader = header['das']
     system = dasheader['system']
     api = dasheader['api']
     collection = 'cache'
     check_query = True
     expire = dasheader.get('expire', None)
     if expire:
         dasheader['expire'] = adjust_expire(expire)
     if not self.incache(dasquery, collection, system, api, check_query):
         msg = "query=%s, header=%s" % (dasquery, header)
         self.logger.debug(msg)
         q_record = dict(das=dasheader, query=dasquery.storage_query)
         q_record['das']['record'] = record_codes('query_record')
         q_record['das']['status'] = "requested"
         q_record['qhash'] = dasquery.qhash
         q_record['das']['ctime'] = [time.time()]
         res = self.col.insert_one(q_record)
         if not res:
             msg = 'unable to insert query record'
             print(dastimestamp('DAS ERROR '), dasquery, msg,
                   ', will retry')
             time.sleep(1)
             res = self.col.insert(q_record)
             if not res:
                 print(dastimestamp('DAS ERROR '), dasquery, msg)
Esempio n. 4
0
 def datasets_dbs(self):
     """
     Retrieve a list of DBS datasets (DBS2)
     """
     query = "find dataset,dataset.status"
     params = {"api": "executeQuery", "apiversion": "DBS_2_0_9", "query": query}
     encoded_data = urllib.urlencode(params, doseq=True)
     url = self.dbs_url + "?" + encoded_data
     req = urllib2.Request(url)
     try:
         stream = urllib2.urlopen(req)
     except urllib2.HTTPError:
         msg = "Fail to contact %s" % url
         print dastimestamp("DAS ERROR"), msg
         raise Exception(msg)
     except Exception as exc:
         print_exc(exc)
         msg = "Fail to contact %s" % url
         print dastimestamp("DAS ERROR"), msg
         raise Exception(msg)
     gen = qlxml_parser(stream, "dataset")
     for row in gen:
         dataset = row["dataset"]["dataset"]
         rec = {"dataset": dataset}
         if self.write_hash:
             storage_query = {
                 "fields": ["dataset"],
                 "spec": [{"key": "dataset.name", "value": '"%s"' % dataset}],
                 "instance": self.dbcoll,
             }
             rec.update({"qhash": genkey(storage_query)})
         if row["dataset"]["dataset.status"] == "VALID":
             yield rec
     stream.close()
Esempio n. 5
0
def dataset_info(urls, datasetdict, verbose=0):
    """
    Request blockReplicas information from Phedex for a given
    dataset or a list of dataset (use POST request in later case).
    Update MongoDB with aggregated information about dataset:
    site, size, nfiles, nblocks.
    """
    url      = urls.get('phedex') + '/blockReplicas'
    params   = {'dataset': [d for d in datasetdict.keys()]}
    headers  = {'Accept':'application/json;text/json'}
    data, _  = getdata(url, params, headers, post=True, \
            ckey=CKEY, cert=CERT, verbose=verbose, system='dbs_phedex')
    if  isinstance(data, basestring): # no response
        dastimestamp('DBS_PHEDEX ERROR: %s' % data)
        return
    jsondict = json.load(data)
    data.close()
    for row in jsondict['phedex']['block']:
        dataset = row['name'].split('#')[0]
        for rep in row['replica']:
            rec = dict(dataset=dataset,
                        nfiles=row['files'],
                        size=row['bytes'],
                        site=rep['node'],
                        se=rep['se'],
                        custodial=rep['custodial'])
            rec.update(datasetdict[dataset])
            yield rec
    data.close()
Esempio n. 6
0
 def check_pid(self, pid, ahash):
     """
     Check status of given pid and return appropriate page content.
     This is a server callback function for ajaxCheckPid, see
     js/ajax_utils.js
     """
     cherrypy.response.headers['Cache-Control'] = 'no-cache'
     cherrypy.response.headers['Pragma'] = 'no-cache'
     img  = '<img src="%s/images/loading.gif" alt="loading"/>' % self.base
     page = ''
     try:
         if  self.taskmgr.is_alive(pid):
             page = img + " processing PID=%s" % pid
         else:
             kwargs = self.reqmgr.get(pid)
             if  kwargs and kwargs.has_key('dasquery'):
                 del kwargs['dasquery']
             # if no kwargs (another request delete it)
             # use logging DB to look-up user request via ahash
             if  not kwargs:
                 spec = {'ahash':ahash}
                 skey = [('ts', DESCENDING)]
                 res  = [r for r in self.logcol.find(spec).sort(skey)]
                 kwargs = res[0]['args']
                 self.adjust_input(kwargs)
             self.reqmgr.remove(pid)
             page = self.get_page_content(kwargs)
     except Exception as err:
         msg = 'check_pid fails for pid=%s' % pid
         print dastimestamp('DAS WEB ERROR '), msg
         print_exc(err)
         self.reqmgr.remove(pid)
         self.taskmgr.remove(pid)
         return self.error(gen_error_msg({'pid':pid}), wrap=False)
     return page
Esempio n. 7
0
    def get_das_records(self, dasquery, idx=0, limit=0, collection='merge'):
        "Generator which retrieves DAS records from the cache"
        msg = "(%s, %s, %s, coll=%s)" % (dasquery, idx, limit, collection)
        self.logger.info(msg)

        idx = int(idx)
        fields, filter_cond = self.get_fields(dasquery)
        if fields == None:
            fields = []
        if not fields:
            spec = dasquery.mongo_query.get('spec', {})
        elif dasquery.hashes:
            spec = {
                'qhash': {
                    '$in': dasquery.hashes
                },
                'das.record': spec4data_records()
            }
        else:
            spec = {'qhash': dasquery.qhash, 'das.record': spec4data_records()}
        if filter_cond:
            spec.update(filter_cond)
        if 'records' in dasquery.query:
            fields = None  # retrieve all fields for records DAS query
        else:
            # be sure to extract das internal keys
            fields += das_record_keys()
        # try to get sort keys all the time to get ordered list of
        # docs which allow unique_filter to apply afterwards
        skeys = self.mongo_sort_keys(collection, dasquery)
        res     = self.get_records(collection, spec, fields, skeys, \
                        idx, limit, dasquery.unique_filter)
        counter = 0
        for row in res:
            counter += 1
            yield row
        msg = 'qhash %s, found %s record(s) in %s collection' \
                % (dasquery.qhash, counter, collection)
        print(dastimestamp('DAS INFO '), msg)

        if counter:
            msg = "yield %s record(s)" % counter
            self.logger.info(msg)

        # if no raw records were yield we look-up possible error records
        # and reset timestamp for record with system:['das']
        if not counter:
            spec = {'qhash': dasquery.qhash}
            nrec = self.col.find(spec, **PYMONGO_OPTS).count()
            if nrec:
                msg = "for query %s, found %s non-result record(s)" \
                        % (dasquery, nrec)
                print(dastimestamp('DAS WARNING'), msg)
                for rec in self.col.find(spec, **PYMONGO_OPTS):
                    if 'query' in rec:
                        print(dastimestamp('DAS das record'), rec)
            self.update_das_expire(dasquery, etstamp())
Esempio n. 8
0
def getdata_pycurl(url,
                   params,
                   headers=None,
                   expire=3600,
                   post=None,
                   error_expire=300,
                   verbose=0,
                   ckey=None,
                   cert=None,
                   doseq=True,
                   system=None):
    "Fetch data via pycurl library"
    contact = 'data-service.'
    if system:
        contact = system + ' ' + contact
    if isinstance(params, dict):
        timer_key = '%s?%s' % (url, urllib.urlencode(params, doseq=True))
    else:
        timer_key = '%s?%s' % (url, params)
    das_timer(timer_key, verbose)
    handler = REQUEST_HANDLER
    try:
        data, expire = handler.getdata(url, params, headers, expire, post, \
                    error_expire, verbose, ckey, cert, doseq)
    except urllib2.HTTPError as httperror:
        msg  = 'urllib2.HTTPError, system=%s, url=%s, args=%s, headers=%s' \
                    % (system, url, json.dumps(params), json.dumps(headers))
        data = {
            'error': 'Received HTTP error from %s data-service' % contact,
            'reason': msg,
            'ts': time.time()
        }
        try:
            reason = extract_http_error(httperror.read())
            data.update({'reason': reason, 'request': msg})
            # TODO: err variable did not exit in this function!
            msg += '\n' + reason
        except Exception as exp:
            data.update({'httperror': None})
            msg += '\n' + str(exp)
        print(dastimestamp('getdata_pycurl'), msg)
        data = json.dumps(data)
        expire = expire_timestamp(error_expire)
    except Exception as exp:
        msg  = 'HTTPError, system=%s, url=%s, args=%s, headers=%s' \
                    % (system, url, json.dumps(params), json.dumps(headers))
        print(dastimestamp('getdata_pycurl'), msg + '\n' + str(exp))
        data = {
            'error': 'Received generic error from %s data-service' % contact,
            'reason': msg,
            'ts': time.time()
        }
        data = json.dumps(data)
        expire = expire_timestamp(error_expire)
    das_timer(timer_key, verbose)
    return data, expire
Esempio n. 9
0
    def get_das_records(self, dasquery, idx=0, limit=0, collection='merge'):
        "Generator which retrieves DAS records from the cache"
        msg = "(%s, %s, %s, coll=%s)" % (dasquery, idx, limit, collection)
        self.logger.info(msg)

        idx = int(idx)
        fields, filter_cond = self.get_fields(dasquery)
        if  fields == None:
            fields = []
        if  not fields:
            spec = dasquery.mongo_query.get('spec', {})
        elif dasquery.hashes:
            spec = {'qhash':{'$in':dasquery.hashes},
                    'das.record': spec4data_records()}
        else:
            spec = {'qhash':dasquery.qhash,
                    'das.record': spec4data_records()}
        if  filter_cond:
            spec.update(filter_cond)
        if  'records' in dasquery.query:
            fields  = None # retrieve all fields for records DAS query
        else:
            # be sure to extract das internal keys
            fields += das_record_keys()
        # try to get sort keys all the time to get ordered list of
        # docs which allow unique_filter to apply afterwards
        skeys   = self.mongo_sort_keys(collection, dasquery)
        res     = self.get_records(collection, spec, fields, skeys, \
                        idx, limit, dasquery.unique_filter)
        counter = 0
        for row in res:
            counter += 1
            yield row
        msg = 'qhash %s, found %s record(s) in %s collection' \
                % (dasquery.qhash, counter, collection)
        print(dastimestamp('DAS INFO '), msg)

        if  counter:
            msg = "yield %s record(s)" % counter
            self.logger.info(msg)

        # if no raw records were yield we look-up possible error records
        # and reset timestamp for record with system:['das']
        if  not counter:
            spec = {'qhash':dasquery.qhash}
            nrec = self.col.find(spec, **PYMONGO_OPTS).count()
            if  nrec:
                msg = "for query %s, found %s non-result record(s)" \
                        % (dasquery, nrec)
                print(dastimestamp('DAS WARNING'), msg)
                for rec in self.col.find(spec, **PYMONGO_OPTS):
                    if  'query' in rec:
                        print(dastimestamp('DAS das record'), rec)
            self.update_das_expire(dasquery, etstamp())
Esempio n. 10
0
 def busy(self):
     """
     Check server load and report busy status if
     nrequests - nworkers > queue limit
     """
     nrequests = self.reqmgr.size()
     if (nrequests - self.taskmgr.nworkers()) > self.queue_limit:
         msg = "#request=%s, queue_limit=%s, #workers=%s" % (nrequests, self.taskmgr.nworkers(), self.queue_limit)
         print dastimestamp("DAS WEB SERVER IS BUSY "), msg
         return True
     return False
Esempio n. 11
0
def process(gen):
    "Process generator from getdata"
    for row in gen:
        if "error" in row:
            error = row.get("error")
            reason = row.get("reason", "")
            print dastimestamp("DAS ERROR"), error, reason
            yield row
            continue
        if "data" in row:
            yield json.loads(row["data"])
Esempio n. 12
0
File: url_utils.py Progetto: ktf/DAS
def getdata_pycurl(
    url,
    params,
    headers=None,
    expire=3600,
    post=None,
    error_expire=300,
    verbose=0,
    ckey=None,
    cert=None,
    doseq=True,
    system=None,
):
    "Fetch data via pycurl library"
    contact = "data-service."
    if system:
        contact = system + " " + contact
    timer_key = "%s?%s" % (url, urllib.urlencode(params, doseq=True))
    das_timer(timer_key, verbose)
    handler = REQUEST_HANDLER
    try:
        data, expire = handler.getdata(url, params, headers, expire, post, error_expire, verbose, ckey, cert, doseq)
    except urllib2.HTTPError as httperror:
        msg = "urllib2.HTTPError, system=%s, url=%s, args=%s, headers=%s" % (
            system,
            url,
            json.dumps(params),
            json.dumps(headers),
        )
        data = {"error": "Unable to contact %s" % contact, "reason": msg, "ts": time.time()}
        try:
            reason = extract_http_error(httperror.read())
            data.update({"reason": reason, "request": msg})
            msg += "\n" + err
        except Exception as exp:
            data.update({"httperror": None})
            msg += "\n" + str(exp)
        print dastimestamp("getdata_pycurl"), msg
        data = json.dumps(data)
        expire = expire_timestamp(error_expire)
    except Exception as exp:
        msg = "HTTPError, system=%s, url=%s, args=%s, headers=%s" % (
            system,
            url,
            json.dumps(params),
            json.dumps(headers),
        )
        print dastimestamp("getdata_pycurl"), msg + "\n" + str(exp)
        data = {"error": "Unable to contact %s" % contact, "reason": msg, "ts": time.time()}
        data = json.dumps(data)
        expire = expire_timestamp(error_expire)
    das_timer(timer_key, verbose)
    return data, expire
Esempio n. 13
0
    def update(self):
        """
        Update DBS collection with a fresh copy of datasets. Upon first insert
        of datasets we add dataset:__POPULATED__ record to be used as a flag
        that cache was populated in this cache.
        """
        if SKIP_UPDATES:
            return None

        dbc = self.col
        if not dbc:
            print "%s DBSDaemon %s, no connection to DB" % (dastimestamp(), self.dbcoll)
            return

        try:
            time0 = round(time.time())
            udict = {"$set": {"ts": time0}}
            cdict = {"dataset": "__POPULATED__"}
            gen = self.datasets()
            msg = ""
            if not dbc.count():
                try:  # perform bulk insert operation
                    while True:
                        if not dbc.insert(itertools.islice(gen, self.cache_size)):
                            break
                except InvalidOperation as err:
                    # please note we need to inspect error message to
                    # distinguish InvalidOperation from generate exhastion
                    if str(err) == "cannot do an empty bulk insert":
                        dbc.insert(cdict)
                    pass
                except Exception as err:
                    pass
                # remove records with old ts
                dbc.remove({"ts": {"$lt": time0 - self.expire}})
                msg = "inserted new"
            else:  # we already have records, update their ts
                for row in gen:
                    spec = dict(dataset=row["dataset"])
                    dbc.update(spec, udict, upsert=True)
                msg = "updated old"

            if find_one(dbc, cdict):
                dbc.update(cdict, udict)
            print "%s DBSDaemon %s, %s %s records in %s sec" % (
                dastimestamp(),
                self.dbcoll,
                msg,
                dbc.count(),
                round(time.time() - time0),
            )
        except Exception as exc:
            print "%s DBSDaemon %s, fail to update, reason %s" % (dastimestamp(), self.dbcoll, str(exc))
Esempio n. 14
0
    def init(self):
        """
        Init db connection and check that it is alive
        """
        try:
            indexes = [('value', ASCENDING), ('ts', ASCENDING)]
            create_indexes(self.col, indexes)

            if not KEEP_EXISTING_RECORDS_ON_RESTART:
                self.col.remove()
        except Exception as exc:
            print dastimestamp(), exc
Esempio n. 15
0
    def update(self):
        """
        Update DBS collection with a fresh copy of datasets. Upon first insert
        of datasets we add dataset:__POPULATED__ record to be used as a flag
        that cache was populated in this cache.
        """
        if SKIP_UPDATES:
            return None

        dbc = self.col
        if  not dbc:
            print("%s DBSDaemon %s, no connection to DB" \
                % (dastimestamp(), self.dbcoll))
            return

        try:
            time0 = round(time.time())
            udict = {'$set':{'ts':time0}}
            cdict = {'dataset':'__POPULATED__'}
            gen = self.datasets()
            msg = ''
            if  not dbc.count():
                try: # perform bulk insert operation
                    res = dbc.insert_many(gen)
                except InvalidOperation as err:
                    # please note we need to inspect error message to
                    # distinguish InvalidOperation from generate exhastion
                    if  str(err) == 'cannot do an empty bulk insert':
                        dbc.insert(cdict)
                    pass
                except Exception as err:
                    pass
                # remove records with old ts
                spec = {'ts':{'$lt':time0-self.expire}}
                dbc.delete_many(spec)
                msg = 'inserted'
            else: # we already have records, update their ts
                for row in gen:
                    spec = dict(dataset=row['dataset'])
                    dbc.update(spec, udict, upsert=True)
                msg = 'updated'

            if  find_one(dbc, cdict):
                dbc.update(cdict, udict)
            print("%s DBSDaemon %s, %s %s records in %s sec" \
            % (dastimestamp(), self.dbcoll, msg, dbc.count(),
                    round(time.time()-time0)))
        except Exception as exc:
            print("%s DBSDaemon %s, fail to update, reason %s" \
                % (dastimestamp(), self.dbcoll, str(exc)))
Esempio n. 16
0
 def init(self):
     """
     Establish connection to MongoDB back-end and create DB.
     """
     col = None
     try:
         conn = db_connection(self.dburi)
         if conn:
             dbc = conn[self.dbname]
             col = dbc[self.colname]
     #            print "### DASMapping:init started successfully"
     except ConnectionFailure as _err:
         tstamp = dastimestamp("")
         thread = threading.current_thread()
         print "### MongoDB connection failure thread=%s, id=%s, time=%s" % (thread.name, thread.ident, tstamp)
     except Exception as exc:
         print_exc(exc)
     if col:
         index = [
             ("type", DESCENDING),
             ("system", DESCENDING),
             ("urn", DESCENDING),
             ("das_map.das_key", DESCENDING),
             ("das_map.rec_key", DESCENDING),
             ("das_map.api_arg", DESCENDING),
         ]
         create_indexes(col, index)
Esempio n. 17
0
    def update(self):
        """
        Update some the input values collection for current input field
        """
        if SKIP_UPDATES:
            return None

        time0 = time.time()
        values = self.fetch_values()
        #print gen
        if not self.col.count():
            try:  # perform bulk insert operation
                self.col.insert(itertools.islice(values, self.cache_size))
                #   break
            except InvalidOperation:
                pass
        else:  # we already have records, update their ts
            for val in values:
                spec = dict(value=val['value'])
                self.col.update(spec, {'$set': {'ts': time0}}, upsert=True)
                # remove records with old ts
        self.col.remove({'ts': {'$lt': time0 - self.expire}})
        print("%s InputValuesTracker updated" \
              " %s collection in %s sec, nrec=%s" \
              % (dastimestamp(), self.dbcoll, time.time() - time0,
                 self.col.count()))
Esempio n. 18
0
    def dasmap_reload_handler(self):
        """ reload KWS after DASMaps reloaded """
        print dastimestamp('KWS reloading on DASMaps reload')

        try:
            self.dbs_instances = self.dasmgr.mapping.dbs_instances()
            self.dbs_global = self.dasmgr.mapping.dbs_global_instance()
            self.kws = KeywordSearchHandler(self.dasmgr)
        except ConnectionFailure:
            tstamp = dastimestamp('')
            mythr = threading.current_thread()
            print "### MongoDB connection failure thread=%s, id=%s, time=%s" \
                  % (mythr.name, mythr.ident, tstamp)
        except Exception as exc:
            print_exc(exc)
            self.kws = None
Esempio n. 19
0
    def update(self):
        """
        Update some the input values collection for current input field
        """
        if SKIP_UPDATES:
            return None

        time0 = time.time()
        values = self.fetch_values()
        #print gen
        if not self.col.count():
            try:  # perform bulk insert operation
                self.col.insert(
                    itertools.islice(values, self.cache_size))
                #   break
            except InvalidOperation:
                pass
        else:  # we already have records, update their ts
            for val in values:
                spec = dict(value=val['value'])
                self.col.update(spec, {'$set': {'ts': time0}}, upsert=True)
                # remove records with old ts
        self.col.remove({'ts': {'$lt': time0 - self.expire}})
        print("%s InputValuesTracker updated" \
              " %s collection in %s sec, nrec=%s" \
              % (dastimestamp(), self.dbcoll, time.time() - time0,
                 self.col.count()))
Esempio n. 20
0
File: das_core.py Progetto: dmwm/DAS
    def insert_query_records(self, dasquery):
        """
        Insert DAS query records into DAS cache and return list of services
        which will answer this query
        """
        services = dasquery.services
        self.logger.info("Potential services = %s" % services)
        if not services:
            msg = "No data-services for query %s" % dasquery
            msg += "mongo_query: %s" % dasquery.mongo_query
            msg += "params: %s" % dasquery.params()
            print(dastimestamp("DAS WARNING "), msg)

        # get list of URI which can answer this query
        ack_services = []
        for srv in services:
            gen = [t for t in getattr(getattr(self, srv), "apimap")(dasquery)]
            for url, api, args, iformat, expire in gen:
                header = dasheader(srv, dasquery, expire, api, url, ctime=0)
                self.rawcache.insert_query_record(dasquery, header)
                if srv not in ack_services:
                    ack_services.append(srv)
        if not ack_services:
            ack_services = services
        if dasquery.query.find("records ") != -1:
            srv_status = True  # skip DAS queries w/ records request
        # create das record with initial expire tstamp 2 min in a future
        # it should be sufficient for processing data-srv records
        expire = time.time() + 2 * 60
        header = dasheader("das", dasquery, expire, api="das_core", services=dict(das=ack_services))
        header["lookup_keys"] = []
        self.rawcache.insert_query_record(dasquery, header)
        das_timer("das_record", self.verbose)
        return ack_services
Esempio n. 21
0
 def remove(self, pid):
     """Remove given pid"""
     self.clean()
     attempts = 0
     while True:
         try:
             self.col.remove(dict(_id=pid), safe=True)
             break
         except Exception as err:
             print_exc(err)
             time.sleep(0.01)
         attempts += 1
         if  attempts > 2:
             msg = '%s unable to remove pid=%s' % (self.col, pid)
             print dastimestamp('DAS ERROR '), msg
             break
Esempio n. 22
0
def error(query, idx, msg='DAS ERROR: parsing failure'):
    "Form error message and raise appropriate exception message"
    out = ' '.join(query)
    where = ''
    for jdx in range(0, idx):
        where += '-' * (len(query[jdx]) + 1)
    where += '^'
    msg = dastimestamp(msg) + '\n' + out + '\n' + where
    raise Exception(msg)
Esempio n. 23
0
def error(query, idx, msg='DAS ERROR: parsing failure'):
    "Form error message and raise appropriate exception message"
    out = ' '.join(query)
    where = ''
    for jdx in range(0, idx):
        where += '-'*(len(query[jdx])+1)
    where += '^'
    msg = dastimestamp(msg) + '\n' + out + '\n' + where
    raise Exception(msg)
Esempio n. 24
0
def ply_parse_query(query, keys, services, pdir='/tmp', verbose=False):
    """Get ply object for given query."""
    dasply = DASPLY(pdir, keys, services, verbose=verbose)
    dasply.build()
#    ply_query = dasply.parser.parse(query)
#    ply_query = spawn(dasply.parser.parse, query)
#    return ply_query
    error = None
    for trial in xrange(1, 3):
        try:
            ply_query = dasply.parser.parse(query)
            return ply_query
        except Exception as exc:
            msg = "Fail to parse query=%s, trial=%s, exception=%s" \
                    % (query, trial, str(exc))
            print dastimestamp('DAS WARNING ') + ' ' + msg
            error = exc
        time.sleep(trial/10.)
    raise error
Esempio n. 25
0
def process(gen):
    "Process generator from getdata"
    for row in gen:
        if  'error' in row:
            error = row.get('error')
            reason = row.get('reason', '')
            print(dastimestamp('DAS ERROR'), error, reason)
            yield row
            continue
        if  'data' in row:
            yield json.loads(row['data'])
Esempio n. 26
0
    def status(self):
        """Return list of all current requests in DAS queue"""
        requests = [r for r in self.reqmgr.items()]
        page = self.templatepage('das_status', requests=requests, time=time)

        sdict = self.dasmgr.status()
        sdict['web'] = self.taskmgr.status()
        dasprint(dastimestamp('DAS INFO '), "web TaskManager", sdict['web'])
        for key, val in sdict.items():
            dasprint(dastimestamp('DAS INFO '), "%s TaskManager %s" % (key, val))
        page += '<h3>Services</h3>'
        def dump(idict):
            "Dump input dict"
            return ', '.join(['<em>%s:</em> %s' % (k, idict[k]) for k in sorted(idict)])
        for key, val in sdict.items():
            page += '<div>'
            stats = ', '.join([dump(v) for v in val.values()])
            page += '<b>%s</b>: %s' % (key, stats)
            page += '</div>'
        return self.page(page)
Esempio n. 27
0
 def empty_return(self, dasquery, status='busy', reason=None):
     "Return header/data when DAS server is busy"
     if  not reason:
         reason  = 'DAS server is busy'
         reason += ', #requests=%s, #workers=%s, queue size=%s' \
             % (self.reqmgr.size(), self.taskmgr.nworkers(), self.queue_limit)
     head = dict(timestamp=time.time())
     head.update({'status': status, 'reason': reason, 'ctime':0})
     data = []
     dasprint(dastimestamp('DAS INFO '), dasquery, 'server status=%s'%status, reason)
     return self.datastream(dict(head=head, data=data))
Esempio n. 28
0
def getdata_pycurl(url, params, headers=None, expire=3600, post=None,
    error_expire=300, verbose=0, ckey=None, cert=None, doseq=True, system=None):
    "Fetch data via pycurl library"
    contact = 'data-service.'
    if  system:
        contact = system + ' ' + contact
    if  isinstance(params, dict):
        timer_key = '%s?%s' % (url, urllib.urlencode(params, doseq=True))
    else:
        timer_key = '%s?%s' % (url, params)
    das_timer(timer_key, verbose)
    handler = REQUEST_HANDLER
    try:
        data, expire = handler.getdata(url, params, headers, expire, post, \
                    error_expire, verbose, ckey, cert, doseq)
    except urllib2.HTTPError as httperror:
        msg  = 'urllib2.HTTPError, system=%s, url=%s, args=%s, headers=%s' \
                    % (system, url, json.dumps(params), json.dumps(headers))
        data = {'error': 'Received HTTP error from %s data-service' % contact,
                'reason': msg, 'ts':time.time()}
        try:
            reason = extract_http_error(httperror.read())
            data.update({'reason': reason, 'request': msg})
            # TODO: err variable did not exit in this function!
            msg += '\n' + reason
        except Exception as exp:
            data.update({'httperror': None})
            msg += '\n' + str(exp)
        print(dastimestamp('getdata_pycurl'), msg)
        data = json.dumps(data)
        expire = expire_timestamp(error_expire)
    except Exception as exp:
        msg  = 'HTTPError, system=%s, url=%s, args=%s, headers=%s' \
                    % (system, url, json.dumps(params), json.dumps(headers))
        print(dastimestamp('getdata_pycurl'), msg + '\n' + str(exp))
        data = {'error': 'Received generic error from %s data-service' % contact,
                'reason': msg, 'ts':time.time()}
        data = json.dumps(data)
        expire = expire_timestamp(error_expire)
    das_timer(timer_key, verbose)
    return data, expire
Esempio n. 29
0
    def check_pid(self, pid):
        """
        Check status of given pid. This is a server callback
        function for ajaxCheckPid, see js/ajax_utils.js
        """
        # do not allow caching
        set_no_cache_flags()

        img  = '<img src="%s/images/loading.gif" alt="loading"/>' % self.base
        page = ''
        try:
            if  self.taskmgr.is_alive(pid):
                page = img + " processing PID=%s" % pid
            else:
                # at this point we don't know if request arrived to this host
                # or it was processed. To distinguish the case we'll ask
                # request manager for that pid
                if  self.reqmgr.has_pid(pid):
                    self.reqmgr.remove(pid)
                    self.taskmgr.remove(pid)
                    page  = 'Request PID=%s is completed' % pid
                    page += ', please wait for results to load'
                else:
                    # there're no request on this server, re-initiate it
                    ref = cherrypy.request.headers.get('Referer', None)
                    if  ref:
                        url = urlparse(ref)
                        params = dict(parse_qsl(url.query))
                        return self.request(**params)
                    else:
                        msg  = 'No referer in cherrypy.request.headers'
                        msg += '\nHeaders: %s' % cherrypy.request.headers
                        dasprint(dastimestamp('DAS WEB ERROR '), msg)
        except Exception as err:
            msg = 'check_pid fails for pid=%s' % pid
            dasprint(dastimestamp('DAS WEB ERROR '), msg)
            print_exc(err)
            self.reqmgr.remove(pid)
            self.taskmgr.remove(pid)
            return self.error(gen_error_msg({'pid':pid}), wrap=False)
        return page
Esempio n. 30
0
    def init(self):
        """
        Init db connection and check that it is alive
        """
        try:
            indexes = [('value', ASCENDING), ('ts', ASCENDING)]
            create_indexes(self.col, indexes)

            if not KEEP_EXISTING_RECORDS_ON_RESTART:
                self.col.remove()
        except Exception as exc:
            print(dastimestamp(), exc)
Esempio n. 31
0
 def busy(self):
     """
     Check server load and report busy status if
     nrequests - nworkers > queue limit
     """
     nrequests = self.reqmgr.size()
     if  (nrequests - self.taskmgr.nworkers()) > self.queue_limit:
         msg = '#request=%s, queue_limit=%s, #workers=%s' \
                 % (nrequests, self.taskmgr.nworkers(), self.queue_limit)
         dasprint(dastimestamp('DAS WEB SERVER IS BUSY '), msg)
         return True
     return False
Esempio n. 32
0
def dbs_find(entity, url, kwds, verbose=0):
    "Find DBS3 entity for given set of parameters"
    if entity not in ["run", "file", "block"]:
        msg = "Unsupported entity key=%s" % entity
        raise Exception(msg)
    expire = 600
    dataset = kwds.get("dataset", None)
    block = kwds.get("block_name", None)
    if not block:
        # TODO: this should go away when DBS will be retired (user in combined srv)
        block = kwds.get("block", None)
    lfn = kwds.get("file", None)
    runs = kwds.get("runs", [])
    if not (dataset or block or lfn):
        return
    url = "%s/%ss" % (url, entity)  # DBS3 APIs use plural entity value
    if dataset:
        params = {"dataset": dataset}
    elif block:
        params = {"block_name": block}
    elif lfn:
        params = {"logical_file_name": lfn}
    if runs:
        params.update({"run_num": runrange(runs[0], runs[-1], False)})
    headers = {"Accept": "application/json;text/json"}
    source, expire = getdata(url, params, headers, expire, ckey=CKEY, cert=CERT, verbose=verbose)
    for row in json_parser(source, None):
        for rec in row:
            try:
                if isinstance(rec, basestring):
                    print dastimestamp("DBS3 ERROR:"), row
                elif entity == "file":
                    yield rec["logical_file_name"]
                elif entity == "block":
                    yield rec["block_name"]
                elif entity == "file":
                    yield rec["dataset"]
            except Exception as exp:
                msg = 'Fail to parse "%s", exception="%s"' % (rec, exp)
                print_exc(msg)
Esempio n. 33
0
def block_run_lumis(url, blocks, runs=None):
    """
    Find block, run, lumi tuple for given set of files and (optional) runs.
    """
    headers = {'Accept': 'text/xml'}
    urls = []
    for blk in blocks:
        if  not blk:
            continue
        query   = 'find block,run,lumi where block=%s' % blk
        if  runs and isinstance(runs, list):
            val = ' or '.join(['run=%s' % r for r in runs])
            query += ' and (%s)' % val
        params  = {'api':'executeQuery', 'apiversion':'DBS_2_0_9',
                   'query':query}
        dbs_url = url + '?' + urllib.urlencode(params)
        urls.append(dbs_url)
    if  not urls:
        return
    gen = urlfetch_getdata(urls, CKEY, CERT, headers)
    prim_key = 'row'
    odict = {} # output dict
    for rec in gen:
        if  'error' in rec:
            error  = rec.get('error')
            reason = rec.get('reason', '')
            print dastimestamp('DAS ERROR'), error, reason
            yield {'error': error, 'reason': reason}
        else:
            source   = StringIO.StringIO(rec['data'])
            lumis    = []
            for row in qlxml_parser(source, prim_key):
                run  = row['row']['run']
                blk  = row['row']['block']
                lumi = row['row']['lumi']
                key  = (blk, run)
                odict.setdefault(key, []).append(lumi)
    for key, lumis in odict.iteritems():
        blk, run = key
        yield blk, run, lumis
Esempio n. 34
0
File: das_core.py Progetto: ktf/DAS
 def update_das_query(dasquery, status, reason=None):
     "Update DAS query record with given status and reason"
     self.rawcache.update_query_record(dasquery, status, reason=reason)
     self.rawcache.add_to_record(\
             dasquery, {'das.timer': get_das_timer()}, system='das')
     # make sure that das record is updated, we use 7 iteration which
     # sum up into 1 minute to cover default syncdelay value of mongo
     # server (in a future it would be better to find programatically
     # this syncdelay value, but it seems pymongo driver does not
     # provide any API for it.
     for idx in xrange(1, 7):
         spec = {'qhash':dasquery.qhash, 'das.system':['das']}
         res = self.rawcache.col.find_one(spec)
         if  res:
             dbstatus = res.get('das', {}).get('status', None)
             if  dbstatus == status:
                 break
             msg = 'qhash %s, das.status=%s, status=%s, wait for update' \
                     % (dasquery.qhash, dbstatus, status)
             print dastimestamp('DAS WARNING'), msg
         time.sleep(idx*idx)
         self.rawcache.update_query_record(dasquery, status, reason=reason)
Esempio n. 35
0
    def init(self):
        """Init DAS web server, connect to DAS Core"""
        try:
            self.reqmgr = RequestManager(lifetime=self.lifetime)
            self.dasmgr = DASCore(engine=self.engine)
            self.repmgr = CMSRepresentation(self.dasconfig, self.dasmgr)
            self.daskeys = self.dasmgr.das_keys()
            self.gfs = db_gridfs(self.dburi)
            self.daskeys.sort()
            self.dasmapping = self.dasmgr.mapping
            self.dbs_url = self.dasmapping.dbs_url()
            self.dbs_global = self.dasmapping.dbs_global_instance()
            self.dbs_instances = self.dasmapping.dbs_instances()
            self.dasmapping.init_presentationcache()
            self.colors = {"das": gen_color("das")}
            for system in self.dasmgr.systems:
                self.colors[system] = gen_color(system)
            # get SiteDB from global scope
            self.sitedbmgr = SERVICES.get("sitedb2", None)
            # Start DBS daemon
            if self.dataset_daemon:
                self.dbs_daemon(self.dasconfig["web_server"])
            if not self.daskeyslist:
                keylist = [r for r in self.dasmapping.das_presentation_map()]
                keylist.sort(key=lambda r: r["das"])
                self.daskeyslist = keylist

        except ConnectionFailure as _err:
            tstamp = dastimestamp("")
            mythr = threading.current_thread()
            print "### MongoDB connection failure thread=%s, id=%s, time=%s" % (mythr.name, mythr.ident, tstamp)
        except Exception as exc:
            print_exc(exc)
            self.dasmgr = None
            self.reqmgr = None
            self.dbs_url = None
            self.dbs_global = None
            self.dbs_instances = []
            self.daskeys = []
            self.colors = {}
            self.q_rewriter = None
            return

        # KWS and Query Rewriting failures are not fatal
        try:
            # init query rewriter, if needed
            if self.dasconfig["query_rewrite"]["pk_rewrite_on"]:
                self.q_rewriter = CMSQueryRewrite(self.repmgr, self.templatepage)
        except Exception as exc:
            print_exc(exc)
            self.q_rewriter = None
Esempio n. 36
0
def db_monitor(uri, func, sleep, reload_map, reload_time, check_maps, reload_time_bad_maps):
    """
    Check status of MongoDB connection and reload DAS maps once in a while.
    """
    time0 = time.time()
    valid_maps = False
    try:
        valid_maps = check_maps()
    except Exception as err:
        print_exc(err)
    while True:
        conn = db_connection(uri)
        if not conn or not is_db_alive(uri):
            try:
                conn = db_connection(uri, verbose=False)
                func()
                if conn:
                    print "### db_monitor re-established connection %s" % conn
                    valid_maps = check_maps()
                else:
                    print "### db_monitor, lost connection"
            except Exception as err:
                print_exc(err)
        if conn:
            # reload invalid more quickly
            reload_intervl = reload_time if valid_maps else reload_time_bad_maps
            if time.time() - time0 > reload_intervl:
                map_state = "INVALID" if not valid_maps else ""
                msg = "reload %s DAS maps %s" % (map_state, reload_map)
                print dastimestamp(), msg
                try:
                    reload_map()
                    valid_maps = check_maps()
                except Exception as err:
                    print_exc(err)
                time0 = time.time()

        time.sleep(sleep)
Esempio n. 37
0
    def get_das_records(self, dasquery, idx=0, limit=0, collection='merge'):
        "Generator which retrieves DAS records from the cache"
        col = self.mdb[collection]
        msg = "(%s, %s, %s, coll=%s)" % (dasquery, idx, limit, collection)
        self.logger.info(msg)

        idx = int(idx)
        fields, filter_cond = self.get_fields(dasquery)
        if  not fields:
            spec = dasquery.mongo_query.get('spec', {})
        else:
            spec = {'qhash':dasquery.qhash, 'das.empty_record':0}
        if  filter_cond:
            spec.update(filter_cond)
        if  fields: # be sure to extract das internal keys
            fields += self.das_internal_keys
        # try to get sort keys all the time to get ordered list of
        # docs which allow unique_filter to apply afterwards
        skeys   = self.mongo_sort_keys(collection, dasquery)
        res     = self.get_records(col, spec, fields, skeys, \
                        idx, limit, dasquery.unique_filter)
        counter = 0
        for row in res:
            counter += 1
            yield row

        if  counter:
            msg = "yield %s record(s)" % counter
            self.logger.info(msg)

        # if no raw records were yield we look-up possible error records
        if  not counter:
            nrec = self.col.find({'qhash':dasquery.qhash}).count()
            if  nrec:
                msg = "for query %s, found %s non-result record(s)" \
                        % (dasquery, nrec)
                prf = 'DAS WARNING, monogocache:get_from_cache '
                print dastimestamp(prf), msg
Esempio n. 38
0
 def add(self, pid, kwds):
     """Add new pid/kwds"""
     self.clean()
     if  not kwds:
         return
     tstamp = time.strftime("%Y%m%d %H:%M:%S", time.localtime())
     doc = dict(_id=pid, kwds=json.dumps(kwds),
             ts=time.time(), timestamp=tstamp)
     attempts = 0
     while True:
         try:
             self.col.insert(doc, safe=True)
             break
         except DuplicateKeyError as err:
             break
         except Exception as err:
             print_exc(err)
             time.sleep(0.01)
         attempts += 1
         if  attempts > 2:
             msg = '%s unable to add pid=%s' % (self.col, pid)
             print dastimestamp('DAS ERROR '), msg
             break
Esempio n. 39
0
    def init(self):
        """Init DAS web server, connect to DAS Core"""
        try:
            self.reqmgr     = RequestManager(lifetime=self.lifetime)
            self.dasmgr     = DASCore(engine=self.engine)
            self.repmgr     = CMSRepresentation(self.dasconfig, self.dasmgr)
            self.daskeys    = self.dasmgr.das_keys()
            self.gfs        = db_gridfs(self.dburi)
            self.daskeys.sort()
            self.dasmapping = self.dasmgr.mapping
            self.dbs_url    = self.dasmapping.dbs_url()
            self.dbs_global = self.dasmapping.dbs_global_instance()
            self.dbs_instances = self.dasmapping.dbs_instances()
            self.dasmapping.init_presentationcache()
            self.colors = {'das':gen_color('das')}
            for system in self.dasmgr.systems:
                self.colors[system] = gen_color(system)
            if  not self.daskeyslist:
                keylist = [r for r in self.dasmapping.das_presentation_map()]
                keylist.sort(key=lambda r: r['das'])
                self.daskeyslist = keylist

        except ConnectionFailure as _err:
            tstamp = dastimestamp('')
            mythr  = threading.current_thread()
            print("### MongoDB connection failure thread=%s, id=%s, time=%s" \
                    % (mythr.name, mythr.ident, tstamp))
        except Exception as exc:
            print_exc(exc)
            self.dasmgr  = None
            self.reqmgr  = None
            self.dbs_url = None
            self.dbs_global = None
            self.dbs_instances = []
            self.daskeys = []
            self.colors  = {}
            self.q_rewriter = None
            return

        # KWS and Query Rewriting failures are not fatal
        try:
            # init query rewriter, if needed
            if self.dasconfig['query_rewrite']['pk_rewrite_on']:
                self.q_rewriter = CMSQueryRewrite(self.repmgr,
                                                  self.templatepage)
        except Exception as exc:
            print_exc(exc)
            self.q_rewriter = None
Esempio n. 40
0
 def init(self):
     """Init DAS web server, connect to DAS Core"""
     try:
         self.dasmgr = DASCore(multitask=False)
         self.dbs_instances = self.dasmgr.mapping.dbs_instances()
         self.dbs_global = self.dasmgr.mapping.dbs_global_instance()
         if KeywordSearchHandler:
             self.kws = KeywordSearchHandler(self.dasmgr)
     except ConnectionFailure:
         tstamp = dastimestamp('')
         mythr = threading.current_thread()
         print("### MongoDB connection failure thread=%s, id=%s, time=%s" \
               % (mythr.name, mythr.ident, tstamp))
     except Exception as exc:
         print_exc(exc)
         self.dasmgr = None
         self.kws = None
Esempio n. 41
0
def dbs_find(entity, url, kwds, verbose=0):
    "Find DBS3 entity for given set of parameters"
    if  entity not in ['run', 'file', 'block']:
        msg = 'Unsupported entity key=%s' % entity
        raise Exception(msg)
    expire  = 600
    dataset = kwds.get('dataset', None)
    block   = kwds.get('block_name', None)
    if  not block:
        # TODO: this should go away when DBS will be retired (user in combined srv)
        block = kwds.get('block', None)
    lfn     = kwds.get('file', None)
    runs    = kwds.get('runs', [])
    if  not (dataset or block or lfn):
        return
    url = '%s/%ss' % (url, entity) # DBS3 APIs use plural entity value
    if  dataset:
        params = {'dataset':dataset}
    elif block:
        params = {'block_name': block}
    elif lfn:
        params = {'logical_file_name': lfn}
    if  runs:
        params.update({'run_num': runs})
    headers = {'Accept': 'application/json;text/json'}
    source, expire = \
        getdata(url, params, headers, expire, ckey=CKEY, cert=CERT,
                verbose=verbose)
    for row in json_parser(source, None):
        for rec in row:
            try:
                if  isinstance(rec, basestring):
                    print(dastimestamp('DBS3 ERROR:'), row)
                elif  entity == 'file':
                    yield rec['logical_file_name']
                elif  entity == 'block':
                    yield rec['block_name']
                elif  entity == 'file':
                    yield rec['dataset']
            except Exception as exp:
                msg = 'Fail to parse "%s", exception="%s"' % (rec, exp)
                print_exc(msg)
Esempio n. 42
0
 def get_new_connection(self, uri):
     "Get new MongoDB connection"
     key = self.genkey(uri)
     for idx in range(0, self.retry):
         try:
             dbinst = MongoClient(host=uri, **self.mongo_opts)
             #                dbinst = MongoConnection(uri, **self.mongo_opts).client()
             gfs = dbinst.gridfs
             fsinst = gridfs.GridFS(gfs)
             self.conndict[key] = (dbinst, fsinst)
             self.timedict[key] = time.time()
             return (dbinst, fsinst)
         except (ConnectionFailure, AutoReconnect) as exc:
             tstamp = dastimestamp('')
             thread = threading.current_thread()
             print("### MongoDB connection failure thread=%s, id=%s, time=%s" \
                     % (thread.name, thread.ident, tstamp))
             print_exc(exc)
         except Exception as exc:
             print_exc(exc)
         time.sleep(idx)
     return self.conndict.get(key, (None, None))
Esempio n. 43
0
    def insert_query_records(self, dasquery):
        """
        Insert DAS query records into DAS cache and return list of services
        which will answer this query
        """
        services = dasquery.services
        self.logger.info('Potential services = %s' % services)
        if not services:
            msg = 'No data-services for query %s' % dasquery
            msg += 'mongo_query: %s' % dasquery.mongo_query
            msg += 'params: %s' % dasquery.params()
            print(dastimestamp('DAS WARNING '), msg)

        # get list of URI which can answer this query
        ack_services = []
        for srv in services:
            gen = [t for t in getattr(getattr(self, srv), 'apimap')(dasquery)]
            for url, api, args, iformat, expire in gen:
                header = dasheader(srv, dasquery, expire, api, url, ctime=0)
                self.rawcache.insert_query_record(dasquery, header)
                if srv not in ack_services:
                    ack_services.append(srv)
        if not ack_services:
            ack_services = services
        if dasquery.query.find('records ') != -1:
            srv_status = True  # skip DAS queries w/ records request
        # create das record with initial expire tstamp
        expire = time.time() + self.init_expire
        header = dasheader("das",
                           dasquery,
                           expire,
                           api='das_core',
                           services=dict(das=ack_services))
        header['lookup_keys'] = []
        self.rawcache.insert_query_record(dasquery, header)
        das_timer('das_record', self.verbose)
        return ack_services
Esempio n. 44
0
    def merge_records(self, dasquery, attempt=0):
        """
        Merge DAS records for provided query. We perform the following
        steps:
        1. get all queries from das.cache by ordering them by primary key
        2. run aggregtor function to merge neighbors
        3. insert records into das.merge
        """
        ### TMP for asyncio
        #         time.sleep(attempt+3) # pymongo 3.2 don't yet flush in time

        # remove any entries in merge collection for this query
        self.merge.delete_many({'qhash': dasquery.qhash})
        # proceed
        self.logger.debug(dasquery)
        id_list = []
        expire = 9999999999  # future
        # get all API records for given DAS query
        spec = {
            'qhash': dasquery.qhash,
            'das.expire': {
                '$gt': time.time()
            },
            'das.record': record_codes('query_record')
        }
        records = self.col.find(spec, **PYMONGO_OPTS)
        for row in records:
            # find smallest expire timestamp to be used by aggregator
            rexpire = row.get('das', {}).get('expire', expire)
            if rexpire < expire:
                expire = rexpire
            if row['_id'] not in id_list:
                id_list.append(row['_id'])
        inserted = 0
        lookup_keys = set()
        fields = dasquery.mongo_query.get('fields')
        if not fields:  # Mongo
            fields = []
        for key in fields:
            for pkey in self.mapping.mapkeys(key):
                lookup_keys.add(pkey)
        for pkey in lookup_keys:
            skey = [(pkey, DESCENDING)]
            # lookup all service records
            spec = {'das_id': {'$in': id_list}, 'das.primary_key': pkey}
            if self.verbose:
                nrec = self.col.find(spec, **PYMONGO_OPTS).sort(skey).count()
                msg = "merging %s records, for %s key" % (nrec, pkey)
            else:
                msg = "merging records, for %s key" % pkey
            self.logger.debug(msg)
            # use exhaust=False since we process all records in aggregator
            # and it can be delay in processing
            records = self.col.find(spec, **PYMONGO_NOEXHAUST).sort(skey)
            # aggregate all records
            agen = aggregator(dasquery, records, expire)
            # diff aggregated records
            gen = das_diff(agen, self.mapping.diff_keys(pkey.split('.')[0]))
            # insert all records into das.merge using bulk insert
            size = self.cache_size
            try:
                res = self.merge.insert_many(gen)
                inserted += len(res.inserted_ids)
            except InvalidDocument as exp:
                print(dastimestamp('DAS WARNING'),
                      'InvalidDocument during merge', str(exp))
                msg = "Caught bson error: " + str(exp)
                self.logger.info(msg)
                records = self.col.find(spec, **PYMONGO_OPTS).sort(skey)
                gen = aggregator(dasquery, records, expire)
                genrows = parse2gridfs(self.gfs, pkey, gen, self.logger)
                das_dict = {
                    'das': {
                        'expire': expire,
                        'das.record': record_codes('gridfs_record'),
                        'primary_key': [k for k in lookup_keys],
                        'system': ['gridfs']
                    },
                    'qhash': dasquery.qhash,
                    'cache_id': [],
                    'das_id': id_list
                }
                for row in genrows:
                    row.update(das_dict)
                    self.merge.insert(row)
            except InvalidOperation as exp:
                pass
            except DuplicateKeyError as err:
                print(dastimestamp('DAS WARNING'),
                      'DuplicateKeyError during merge')
                if not isinstance(gen, list):
                    raise err
        status = 'fail'
        if inserted:
            status = 'ok'
        elif not lookup_keys:  # we get query w/o fields
            msg = 'qhash %s, no lookup_keys' % dasquery.qhash
            print(dastimestamp('DAS WARNING'), msg)
            status = 'ok'
        else:  # we didn't merge anything, it is DB look-up failure
            msg  = 'qhash %s, did not insert into das.merge, attempt %s' \
                    % (dasquery.qhash, attempt)
            print(dastimestamp('DAS WARNING'), msg)
            empty_expire = etstamp()
            lkeys = list(lookup_keys)
            das = dict(expire=empty_expire,
                       primary_key=lkeys[0],
                       condition_keys=lkeys,
                       instance=dasquery.instance,
                       system=['das'],
                       services=dasquery.services,
                       record=record_codes('empty_record'),
                       ts=time.time(),
                       api=[])
            empty_record = {
                'das': das,
                'qhash': dasquery.qhash,
                'cache_id': [],
                'das_id': id_list
            }
            for key in lkeys:
                empty_record.update({key.split('.')[0]: []})
            for key, val in dasquery.mongo_query['spec'].items():
                if key.find('.') == -1:
                    empty_record[key] = []
                else:  # it is compound key, e.g. site.name
                    newkey, newval = convert_dot_notation(key, val)
                    empty_record[newkey] = adjust_mongo_keyvalue(newval)
            self.merge.insert(empty_record)
            # update DAS records (both meta and data ones, by using qhash)
            nval = {'$set': {'das.expire': empty_expire}}
            spec = {'qhash': dasquery.qhash}
            self.col.update_many(spec, nval)
        return status
Esempio n. 45
0
    def get_data(self, kwargs):
        """
        Invoke DAS workflow and get data from the cache.
        """
        head   = dict(timestamp=time.time())
        head['args'] = kwargs
        uinput = kwargs.get('input', '')
        inst   = kwargs.get('instance', self.dbs_global)
        idx    = getarg(kwargs, 'idx', 0)
        limit  = getarg(kwargs, 'limit', 0) # do not impose limit
        coll   = kwargs.get('collection', 'merge')
        status = kwargs.get('status')
        error  = kwargs.get('error')
        reason = kwargs.get('reason')
        dasquery = kwargs.get('dasquery', None)
        time0  = time.time()
        if  dasquery:
            dasquery = DASQuery(dasquery, instance=inst)
            if  dasquery.error:
                return self.page(form + dasquery.error, ctime=time.time()-time0)
        else:
            check, content = \
                    self.generate_dasquery(uinput, inst, html_mode=False)
            if  check:
                head.update({'status': 'fail', 'reason': content,
                             'ctime': time.time()-time0, 'input': uinput})
                data = []
                return head, data
            dasquery = content # returned content is valid DAS query
        try:
            nres = self.dasmgr.nresults(dasquery, coll)
            data = \
                self.dasmgr.get_from_cache(dasquery, idx, limit)
            # check that we got what we expected
            data = [r for r in data]
            if  nres and not len(data):
                for retry in range(1, 3, 5):
                    msg = 'retry in %s sec' % retry
                    dasprint(dastimestamp('DAS WARNING '), msg, dasquery)
                    time.sleep(retry) # retry one more time
                    data = \
                        self.dasmgr.get_from_cache(dasquery, idx, limit)
                    data = [r for r in data]
                    if  len(data):
                        break
            if  nres and not len(data):
                msg = 'fail to get all data for %s, nres=%s, len(data)=%s' \
                        % (dasquery, nres, len(data))
                dasprint(dastimestamp('DAS WARNING '), msg)
                status = 'fail'
                reason = 'Fail to retrieve data from DAS cache, please retry'

            if  dasquery.aggregators:
                # aggregators split DAS record into sub-system and then
                # apply aggregator functions, therefore we need to correctly
                # account for nresults. Resolve generator into list and take
                # its length as nresults value.
                data = [r for r in data]
                nres = len(data)
            if  error: # DAS record contains an error
                status = 'error'
            head.update({'status':status, 'nresults':nres,
                         'ctime': time.time()-time0, 'dasquery': dasquery})
        except Exception as exc:
            status = 'fail'
            reason = str(exc)
            print_exc(exc)
            head.update({'status': status,
                         'ctime': time.time()-time0, 'dasquery': dasquery})
            data = []
        head.update({'incache':self.dasmgr.incache(dasquery, coll='cache'),
                     'apilist':self.dasmgr.apilist(dasquery)})
        if  reason:
            head.update({'reason': reason})
        if  status != 'ok':
            head.update(self.info())

        # check if query had dataset input and returned no results
        # then run hint functions to find dataset in other DBS instances
        mquery = dasquery.mongo_query
        empty = False
        for item in data:
            if  'dataset.name' in mquery['spec'] and 'dataset' in mquery['fields'] \
                    and 'result' not in item:
                if  not item['dataset']:
                    empty = True
                    break
        if  empty: # if no results found add dataset from other DBS instances
            hints = self.hint_datasets(kwargs)
            for item in data:
                item.update({'hints': hints})

        return head, data
Esempio n. 46
0
 def listview(self, head, data):
     """
     Represent data in list view.
     """
     kwargs   = head.get('args')
     uinput   = kwargs.get('input', '')
     total    = head.get('nresults', 0)
     apilist  = head.get('apilist')
     dasquery = head.get('dasquery', None)
     if  not dasquery:
         inst     = head.get('instance', self.dbs_global)
         dasquery = DASQuery(uinput, instance=inst)
     inst     = dasquery.instance
     filters  = dasquery.filters
     aggrtrs  = dasquery.aggregators
     pager    = self.pagination(head)
     main     = pager
     style    = 'white'
     rowkeys  = []
     fltpage  = self.filter_bar(dasquery)
     page     = ''
     old      = None
     dup      = False
     status   = head.get('status', None)
     if  status == 'fail':
         reason = head.get('reason', '')
         if  reason:
             page += '<br/><span class="box_red">%s</span>' % reason
     for row in data:
         if  not row:
             continue
         if  not dup and old and identical_data_records(old, row):
             dup = True
         error = row.get('error', None)
         try:
             mongo_id = row['_id']
         except Exception as exc:
             msg  = 'Exception: %s\n' % str(exc)
             msg += 'Fail to process row\n%s' % str(row)
             raise Exception(msg)
         page += '<div class="%s"><hr class="line" />' % style
         links = []
         pkey  = None
         pval  = None
         lkey  = None
         if  'das' in row and 'primary_key' in row['das']:
             pkey = row['das']['primary_key']
             if  pkey and not rowkeys and not fltpage:
                 fltpage = self.fltpage(dasquery)
             try:
                 lkey = pkey.split('.')[0]
                 if  pkey == 'summary':
                     pval = row[pkey]
                 else:
                     pval = [i for i in DotDict(row).get_values(pkey)]
                     if  isinstance(pval, list):
                         if  pval and not isinstance(pval[0], list):
                             pval = list(set(pval))
                     else:
                         pval = list(set(pval))
                     if  len(pval) == 1:
                         pval = pval[0]
                     if  pkey == 'run.run_number' or pkey == 'lumi.number':
                         if  isinstance(pval, basestring):
                             pval = int(pval)
             except Exception as exc:
                 msg  = "Fail to extract pval for pkey='%s', lkey='%s'" \
                         % (pkey, lkey)
                 msg += "\npval='%s', type(pval)='%s'" % (pval, type(pval))
                 print(msg)
                 print_exc(exc)
                 pval = 'N/A'
             try:
                 if  not filters:
                     if  pkey == 'summary':
                         page += 'Summary information:'
                     elif  pval and pval != 'N/A':
                         page += '%s: ' % lkey.capitalize()
                         if  lkey == 'parent' or lkey == 'child':
                             if  str(pval).find('.root') != -1:
                                 lkey = 'file'
                             else:
                                 lkey = 'dataset'
                         if  lkey in not_to_link():
                             page += '%s' % pval
                         elif  isinstance(pval, list):
                             page += ', '.join(['<span class="highlight>"'+\
                                 '<a href="/das/request?%s">%s</a></span>'\
                                 % (make_args(lkey, i, inst), i) for i in pval])
                         else:
                             args  = make_args(lkey, pval, inst)
                             page += '<span class="highlight">'+\
                                 '<a href="/das/request?%s">%s</a></span>'\
                                 % (args, pval)
                     else:
                         page += '%s: N/A' % lkey.capitalize()
                 plist = self.dasmgr.mapping.presentation(lkey)
                 linkrec = None
                 for item in plist:
                     if  'link' in item:
                         linkrec = item['link']
                         break
                 if  linkrec and pval and pval != 'N/A' and \
                     not isinstance(pval, list) and not error:
                     links += [l for l in make_links(linkrec, pval, inst)]
                 if  pkey and pkey == 'file.name':
                     try:
                         lfn = DotDict(row).get('file.name')
                         val = '<a href="/das/download?lfn=%s">Download</a>'\
                                 % lfn if lfn else ''
                         if  val: links.append(val)
                     except:
                         pass
                 if  pkey and pkey == 'site.name':
                     try:
                         site = DotDict(row).get('site.name')
                         val = self.templatepage(\
                         'sitedb', item=site, api="sites") if site else ''
                         if  val: links.append(val)
                     except:
                         pass
                 if  pkey and pkey == 'user.name':
                     try:
                         user = DotDict(row).get('user.username')
                         val = self.templatepage(\
                         'sitedb', item=user, api="people") if user else ''
                         if  val: links.append(val)
                     except:
                         pass
                 if  pkey and pkey == 'dataset.name':
                     try:
                         path = DotDict(row).get('dataset.name')
                         if  path:
                             links.append(self.templatepage(\
                                 'makepy', path=path, inst=inst))
                             if  inst == self.dbs_global:
                                 links.append(self.templatepage(\
                                     'phedex_subscription', path=path))
                                 links.append(self.templatepage(\
                                     'xsecdb', primds=path.split('/')[1]))
                     except:
                         pass
                 if  pkey and pkey == 'release.name':
                     rel  = '["%s"]' % DotDict(row).get('release.name')
                     url  = 'https://cmstags.cern.ch/tc/py_getReleasesTags?'
                     url += 'diff=false&releases=%s' % urllib.quote(rel)
                     links.append('<a href="%s">Packages</a>' % url)
             except Exception as exc:
                 print_exc(exc)
                 pval = 'N/A'
         gen   = self.convert2ui(row, pkey)
         if  self.dasmgr:
             func  = self.dasmgr.mapping.daskey_from_presentation
             if  filters and not aggrtrs:
                 page += add_filter_values(row, filters)
             else:
                 page += adjust_values(func, gen, links, pkey)
         pad   = ""
         try:
             if  'das' in row and 'system' in row['das']:
                 systems = self.systems(row['das']['system'])
             else:
                 systems = "" # no das record
                 print(dastimestamp('DAS ERROR '), \
                         'record without DAS key', row)
         except KeyError as exc:
             print_exc(exc)
             systems = "" # we don't store systems for aggregated records
         except Exception as exc:
             print_exc(exc)
             systems = "" # we don't store systems for aggregated records
         jsonhtml = das_json(dasquery, row, pad)
         jsonhtml = jsonhtml.replace(\
             'request?', 'request?instance=%s&' % inst)
         if  not links:
             page += '<br />'
         if  'das' in row and 'conflict' in row['das']:
             conflict = ', '.join(row['das']['conflict'])
         else:
             conflict = ''
         hints = ''
         for hint in row.get('hints', {}):
             if  hint:
                 hints += self.templatepage('hint',
                         hint=hint, base=self.base, dbs=self.dbs_global)
         page += self.templatepage('das_row', systems=systems, \
                 sanitized_data=jsonhtml, id=mongo_id, rec_id=mongo_id,
                 conflict=conflict, hints=hints)
         page += '</div>'
         old = row
     main += fltpage
     if  dup and not dasquery.aggregators:
         main += self.templatepage('das_duplicates', uinput=uinput,
                     instance=inst)
     main += page
     if total>10:
         main += '<hr class="line" />'
         main += pager
         main += '<hr class="line" />'
     proc_time = self.processing_time(dasquery)
     if  proc_time:
         msg = 'processing time: %5.3f sec, ' % proc_time
     else:
         msg   = ''
     msg  += 'cache server time: %5.3f sec' % head['ctime']
     main += '<div align="right">%s</div>' % msg
     return main
Esempio n. 47
0
    def call(self, query, **kwds):
        """
        Top level DAS api which execute a given query using underlying
        data-services. It follows the following steps:

            - parse input query
            - identify data-sercices based on selection keys
              and where clause conditions
            - construct DAS workflow and execute data-service 
              API calls. At this step individual 
              data-services store results into DAS cache.

        Return status 0/1 depending on success of the calls, can be
        used by workers on cache server.

        kwds is provided for compatibility with web layer, e.g. it
        may invoke this method with additional pid parameter.
        """
        def update_das_query(dasquery, status, reason=None):
            "Update DAS query record with given status and reason"
            self.rawcache.update_query_record(dasquery, status, reason=reason)
            self.rawcache.add_to_record(\
                    dasquery, {'das.timer': get_das_timer()}, system='das')

        self.logger.info('input query=%s' % query)
        das_timer('DASCore::call', self.verbose)
        if  isinstance(query, object) and hasattr(query, '__class__')\
            and query.__class__.__name__ == 'DASQuery':
            dasquery = query
        else:
            dasquery = DASQuery(query)
        for col in ['merge', 'cache']:
            self.rawcache.remove_expired(dasquery, col)
        query = dasquery.mongo_query
        spec = query.get('spec')
        fields = query.get('fields')
        if fields == ['records']:
            msg = 'look-up all records in cache'
            self.logger.info(msg)
            return 'in cache'
        if spec == dict(records='*'):
            self.logger.info("look-up everything in cache")
            return 'in cache'
        for record in self.rawcache.find_specs(dasquery):
            status = record['das']['status']
            msg = 'found query %s in cache, status=%s\n' \
                        % (record['query'], status)
            self.logger.info(msg)
            print(dastimestamp('DAS INFO'), msg)
            return status

        self.logger.info(dasquery)
        das_timer('das_record', self.verbose)
        services = self.insert_query_records(dasquery)
        if not services:
            msg = 'unable to locate data-services to fulfill this request'
            msg += ', will iterate over all registered services'
            print(dastimestamp('DAS WARNING '), dasquery, msg)
            services = dasquery.services if dasquery.services else self.systems
        try:
            if self.multitask:
                jobs = []
                for srv in sorted(services):
                    jobs.append(self.taskmgr.spawn(self.worker, srv, dasquery))
                self.taskmgr.joinall(jobs)
            else:
                for srv in services:
                    self.worker(srv, dasquery)
        except Exception as exc:
            print_exc(exc)
            return 'fail'
        self.logger.info('\n##### merging ######\n')
        update_das_query(dasquery, 'merging')
        das_timer('merge', self.verbose)

        # check that all query record statuses are ok, i.e. we did insert records
        # this status is set by self.rawcache.update_cache
        for idx in range(self.collect_wait_time):
            records = self.rawcache.find_query_record(dasquery)
            statuses = []
            for row in records:
                system = row['das']['system']
                status = row['das']['status']
                self.logger.info("### query record status %s %s %s" %
                                 (dasquery.qhash, system, status))
                statuses.append(status)
            all_statuses = sorted(list(set(statuses)))
            # at this point we're looking that all services will have 'ok' and das status will be 'merging'
            if len(all_statuses) == 2 and all_statuses == ['merging', 'ok']:
                break
            time.sleep(1)

        # now we can merge records
        status = self.rawcache.merge_records(dasquery)
        das_timer('merge', self.verbose)
        # check if we have service records and properly setup status
        self.logger.info('\n##### check services ######\n')
        das_services = self.rawcache.check_services(dasquery)
        reason = ''
        status = 'ok'
        if not das_services:
            if 'records' in dasquery.query:
                status = 'ok'  # keep status ok for 'records' queries
            else:
                reason = 'no data records found in DAS cache'
                status = 'fail'
                print(dastimestamp('DAS ERROR '), dasquery, reason)
        update_das_query(dasquery, status, reason)
        das_timer('DASCore::call', self.verbose)
        return status
Esempio n. 48
0
 def helper(self, api, args, expire):
     """
     Class helper function which yields results for given
     set of input parameters. It yeilds the data record which
     must contain combined attribute corresponding to systems
     used to produce record content.
     """
     dbs_url = self.map[api]['services'][self.dbs]
     phedex_url = self.map[api]['services']['phedex']
     # make phedex_api from url, but use xml version for processing
     phedex_api = phedex_url.replace('/json/', '/xml/') + '/blockReplicas'
     if  api == 'dataset4site_release' or \
         api == 'dataset4site_release_parent' or \
         api == 'child4site_release_dataset':
         # DBS part
         datasets = set()
         release = args['release']
         parent = args.get('parent', None)
         for row in dbs_dataset4release_parent(dbs_url, release, parent):
             datasets.add(row)
         # Phedex part
         if args['site'].find('.') != -1:  # it is SE
             phedex_args = {
                 'dataset': list(datasets),
                 'se': '%s' % args['site']
             }
         else:
             phedex_args = {
                 'dataset': list(datasets),
                 'node': '%s*' % args['site']
             }
         headers = {'Accept': 'text/xml'}
         source, expire = \
             getdata(phedex_api, phedex_args, headers, expire, system='phedex')
         prim_key = 'block'
         tags = 'block.replica.node'
         found = {}
         for rec in xml_parser(source, prim_key, tags):
             ddict = DotDict(rec)
             block = ddict.get('block.name')
             bbytes = ddict.get('block.bytes')
             files = ddict.get('block.files')
             found_dataset = block.split('#')[0]
             if found_dataset in found:
                 val = found[found_dataset]
                 found[found_dataset] = {
                     'bytes': val['bytes'] + bbytes,
                     'files': val['files'] + files
                 }
             else:
                 found[found_dataset] = {'bytes': bbytes, 'files': files}
         for name, val in found.items():
             record = dict(name=name, size=val['bytes'], files=val['files'])
             if api == 'child4site_release_dataset':
                 yield {'child': record}
             else:
                 yield {'dataset': record}
         del datasets
         del found
     if api == 'site4dataset':
         try:
             gen = site4dataset(dbs_url, phedex_api, args, expire)
             for row in gen:
                 sname = row.get('site', {}).get('name', '')
                 skind = self.site_info(phedex_url, sname)
                 row['site'].update({'kind': skind})
                 yield row
         except Exception as err:
             print_exc(err)
             tstamp = dastimestamp('')
             msg = tstamp + ' Exception while processing DBS/Phedex info:'
             msg += str(err)
             row = {
                 'site': {
                     'name': 'Fail to look-up site info',
                     'error': msg,
                     'dataset_fraction': 'N/A',
                     'block_fraction': 'N/A',
                     'block_completion': 'N/A'
                 },
                 'error': msg
             }
             yield row
     if  api == 'files4dataset_runs_site' or \
         api == 'files4block_runs_site':
         run_value = args.get('run', [])
         if isinstance(run_value, dict) and '$in' in run_value:
             runs = run_value['$in']
         elif isinstance(run_value, list):
             runs = run_value
         else:
             if int_number_pattern.match(str(run_value)):
                 runs = [run_value]
             else:
                 runs = []
         args.update({'runs': runs})
         files = dbs_find('file', dbs_url, args)
         site = args.get('site')
         phedex_api = phedex_url.replace('/json/',
                                         '/xml/') + '/fileReplicas'
         for fname in files4site(phedex_api, files, site):
             yield {'file': {'name': fname}}