コード例 #1
0
 def datasets_dbs(self):
     """
     Retrieve a list of DBS datasets
     """
     params = {'dataset_access_type':'VALID'}
     encoded_data = urllib.urlencode(params, doseq=True)
     url = self.dbs_url + '/datasets?' + encoded_data
     req = urllib2.Request(url)
     ckey, cert = get_key_cert()
     handler = HTTPSClientAuthHandler(ckey, cert)
     opener  = urllib2.build_opener(handler)
     urllib2.install_opener(opener)
     stream = urllib2.urlopen(req)
     gen = json.load(stream)
     for row in gen:
         dataset = row['dataset']
         rec = {'dataset': dataset}
         if  self.write_hash:
             storage_query = {"fields": ["dataset"],
                  "spec": [{"key": "dataset.name",
                            "value": "\"%s\"" % dataset}],
                  "instance": self.dbcoll}
             rec.update({'qhash': genkey(storage_query)})
         yield rec
     stream.close()
コード例 #2
0
    def fetch_values(self):
        """ fetch the data from providers and select the final values
         with jsonpath rules """
        # use grid-proxy for authentication
        ckey, cert = get_key_cert()

        handler = HTTPSClientAuthHandler(ckey, cert)
        opener = urllib2.build_opener(handler)
        urllib2.install_opener(opener)

        # request list of possible values
        params = {}
        encoded_data = urllib.urlencode(params, doseq=True)

        service = self.cfg
        url = service['url'] + encoded_data
        print(str(url))
        req = urllib2.Request(url)

        # ensure we get json (sitedb is messed up and randomly returns xml)
        if service['jsonpath_selector']:
            req.add_header('Accept', 'application/json')
            #print req.get_full_url()

        stream = urllib2.urlopen(req)

        if service['jsonpath_selector']:
            response = json.load(stream)
            jsonpath_expr = parse(service['jsonpath_selector'])
            results = jsonpath_expr.find(response)
            stream.close()

            return ({'value': v.value} for v in results)

        return []
コード例 #3
0
ファイル: abstract_service.py プロジェクト: zdenekmaxa/DAS
    def __init__(self, name, config):
        self.name = name
        try:
            self.verbose      = config['verbose']
            title             = 'DASAbstactService_%s' % self.name
            self.logger       = PrintManager(title, self.verbose)
            self.dasmapping   = config['dasmapping']
            self.analytics    = config['dasanalytics']
            self.write2cache  = config.get('write_cache', True)
            self.multitask    = config['das'].get('multitask', True)
            self.error_expire = config['das'].get('error_expire', 300) 
            if  config.has_key('dbs'):
                self.dbs_global = config['dbs'].get('dbs_global_instance', None)
            else:
                self.dbs_global = None
            dburi             = config['mongodb']['dburi']
            engine            = config.get('engine', None)
            self.gfs          = db_gridfs(dburi)
        except Exception as exc:
            print_exc(exc)
            raise Exception('fail to parse DAS config')

        # read key/cert info
        try:
            self.ckey, self.cert = get_key_cert()
        except Exception as exc:
            print_exc(exc)
            self.ckey = None
            self.cert = None

        if  self.multitask:
            nworkers = config['das'].get('api_workers', 3)
            thr_weights = config['das'].get('thread_weights', [])
            for system_weight in thr_weights:
                system, weight = system_weight.split(':')
                if  system == self.name:
                    nworkers *= int(weight)
            if  engine:
                thr_name = 'DASAbstractService:%s:PluginTaskManager' % self.name
                self.taskmgr = PluginTaskManager(\
                        engine, nworkers=nworkers, name=thr_name)
                self.taskmgr.subscribe()
            else:
                thr_name = 'DASAbstractService:%s:TaskManager' % self.name
                self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name)
        else:
            self.taskmgr = None

        self.map        = {}   # to be defined by data-service implementation
        self._keys      = None # to be defined at run-time in self.keys
        self._params    = None # to be defined at run-time in self.parameters
        self._notations = {}   # to be defined at run-time in self.notations

        self.logger.info('initialized')
        # define internal cache manager to put 'raw' results into cache
        if  config.has_key('rawcache') and config['rawcache']:
            self.localcache   = config['rawcache']
        else:
            msg = 'Undefined rawcache, please check your configuration'
            raise Exception(msg)
コード例 #4
0
ファイル: dbs_daemon.py プロジェクト: ktf/DAS
 def datasets_dbs3(self):
     """
     Retrieve a list of DBS datasets (DBS3)
     """
     params = {"dataset_access_type": "VALID"}
     encoded_data = urllib.urlencode(params, doseq=True)
     url = self.dbs_url + "/datasets?" + encoded_data
     req = urllib2.Request(url)
     ckey, cert = get_key_cert()
     handler = HTTPSClientAuthHandler(ckey, cert)
     opener = urllib2.build_opener(handler)
     urllib2.install_opener(opener)
     stream = urllib2.urlopen(req)
     gen = json.load(stream)
     for row in gen:
         dataset = row["dataset"]
         rec = {"dataset": dataset}
         if self.write_hash:
             storage_query = {
                 "fields": ["dataset"],
                 "spec": [{"key": "dataset.name", "value": '"%s"' % dataset}],
                 "instance": self.dbcoll,
             }
             rec.update({"qhash": genkey(storage_query)})
         yield rec
     stream.close()
コード例 #5
0
ファイル: input_values_tracker.py プロジェクト: perrozzi/DAS
    def fetch_values(self):
        """ fetch the data from providers and select the final values
         with jsonpath rules """
        # use grid-proxy for authentication
        ckey, cert = get_key_cert()

        handler = HTTPSClientAuthHandler(ckey, cert)
        opener = urllib2.build_opener(handler)
        urllib2.install_opener(opener)

        # request list of possible values
        params = {}
        encoded_data = urllib.urlencode(params, doseq=True)

        service = self.cfg
        url = service['url'] + encoded_data
        print(str(url))
        req = urllib2.Request(url)

        # ensure we get json (sitedb is messed up and randomly returns xml)
        if service['jsonpath_selector']:
            req.add_header('Accept', 'application/json')
            #print req.get_full_url()

        stream = urllib2.urlopen(req)

        if service['jsonpath_selector']:
            response = json.load(stream)
            jsonpath_expr = parse(service['jsonpath_selector'])
            results = jsonpath_expr.find(response)
            stream.close()

            return ({'value': v.value} for v in results)

        return []
コード例 #6
0
ファイル: abstract_service.py プロジェクト: perrozzi/DAS
    def __init__(self, name, config):
        self.name = name
        try:
            self.verbose = config['verbose']
            title = 'DASAbstactService_%s' % self.name
            self.logger = PrintManager(title, self.verbose)
            self.dasmapping = config['dasmapping']
            self.write2cache = config.get('write_cache', True)
            self.multitask = config['das'].get('multitask', True)
            self.error_expire = config['das'].get('error_expire', 300)
            self.dbs_global = None  # to be configured at run time
            self.dburi = config['mongodb']['dburi']
            engine = config.get('engine', None)
            self.gfs = db_gridfs(self.dburi)
        except Exception as exc:
            print_exc(exc)
            raise Exception('fail to parse DAS config')

        # read key/cert info
        try:
            self.ckey, self.cert = get_key_cert()
        except Exception as exc:
            print_exc(exc)
            self.ckey = None
            self.cert = None

        if self.multitask:
            nworkers = config['das'].get('api_workers', 3)
            thr_weights = config['das'].get('thread_weights', [])
            for system_weight in thr_weights:
                system, weight = system_weight.split(':')
                if system == self.name:
                    nworkers *= int(weight)
#             if  engine:
#                 thr_name = 'DASAbstractService:%s:PluginTaskManager' % self.name
#                 self.taskmgr = PluginTaskManager(\
#                         engine, nworkers=nworkers, name=thr_name)
#                 self.taskmgr.subscribe()
#             else:
#                 thr_name = 'DASAbstractService:%s:TaskManager' % self.name
#                 self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name)
            thr_name = 'DASAbstractService:%s:TaskManager' % self.name
            self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name)
        else:
            self.taskmgr = None

        self.map = {}  # to be defined by data-service implementation
        self._keys = None  # to be defined at run-time in self.keys
        self._params = None  # to be defined at run-time in self.parameters
        self._notations = {}  # to be defined at run-time in self.notations

        self.logger.info('initialized')
        # define internal cache manager to put 'raw' results into cache
        if 'rawcache' in config and config['rawcache']:
            self.localcache = config['rawcache']
        else:
            msg = 'Undefined rawcache, please check your configuration'
            raise Exception(msg)
コード例 #7
0
ファイル: cern_sso_auth_t.py プロジェクト: perrozzi/DAS
def runsummary(run, debug):
    """Test RunSummary for given run number"""
    pat = '<runNumber>%s</runNumber>' % run
    key, cert = get_key_cert()
    url = 'https://cmswbm.web.cern.ch/cmswbm/cmsdb/servlet/RunSummary?'
    url += 'RUN=%s&DB=cms_omds_lb&FORMAT=XML' % run
    data = get_data(url, key, cert, debug)
    for line in data.read().split('\n'):
        if line == pat:
            return pat
コード例 #8
0
ファイル: cern_sso_auth_t.py プロジェクト: zdenekmaxa/DAS
def runsummary(run, debug):
    """Test RunSummary for given run number"""
    pat = "<runNumber>%s</runNumber>" % run
    key, cert = get_key_cert()
    url = "https://cmswbm.web.cern.ch/cmswbm/cmsdb/servlet/RunSummary?"
    url += "RUN=%s&DB=cms_omds_lb&FORMAT=XML" % run
    data = get_data(url, key, cert, debug)
    for line in data.read().split("\n"):
        if line == pat:
            return pat
コード例 #9
0
ファイル: abstract_service.py プロジェクト: ktf/DAS
    def __init__(self, name, config):
        self.name = name
        try:
            self.verbose = config["verbose"]
            title = "DASAbstactService_%s" % self.name
            self.logger = PrintManager(title, self.verbose)
            self.dasmapping = config["dasmapping"]
            self.write2cache = config.get("write_cache", True)
            self.multitask = config["das"].get("multitask", True)
            self.error_expire = config["das"].get("error_expire", 300)
            self.dbs_global = None  # to be configured at run time
            self.dburi = config["mongodb"]["dburi"]
            engine = config.get("engine", None)
            self.gfs = db_gridfs(self.dburi)
        except Exception as exc:
            print_exc(exc)
            raise Exception("fail to parse DAS config")

        # read key/cert info
        try:
            self.ckey, self.cert = get_key_cert()
        except Exception as exc:
            print_exc(exc)
            self.ckey = None
            self.cert = None

        if self.multitask:
            nworkers = config["das"].get("api_workers", 3)
            thr_weights = config["das"].get("thread_weights", [])
            for system_weight in thr_weights:
                system, weight = system_weight.split(":")
                if system == self.name:
                    nworkers *= int(weight)
            if engine:
                thr_name = "DASAbstractService:%s:PluginTaskManager" % self.name
                self.taskmgr = PluginTaskManager(engine, nworkers=nworkers, name=thr_name)
                self.taskmgr.subscribe()
            else:
                thr_name = "DASAbstractService:%s:TaskManager" % self.name
                self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name)
        else:
            self.taskmgr = None

        self.map = {}  # to be defined by data-service implementation
        self._keys = None  # to be defined at run-time in self.keys
        self._params = None  # to be defined at run-time in self.parameters
        self._notations = {}  # to be defined at run-time in self.notations

        self.logger.info("initialized")
        # define internal cache manager to put 'raw' results into cache
        if "rawcache" in config and config["rawcache"]:
            self.localcache = config["rawcache"]
        else:
            msg = "Undefined rawcache, please check your configuration"
            raise Exception(msg)
コード例 #10
0
ファイル: dbs_rr.py プロジェクト: dmwm/DAS
def test():
    """Test main function"""
    dbs_url = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader'
    dbs_url = 'http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet'
    rr_url = 'http://localhost:8081/runregistry'
    ckey, cert = get_key_cert()
    dataset = '/DoubleElectron/Run2012A-13Jul2012-v1/AOD'
    kwds = dict(dbs_url=dbs_url, rr_url=rr_url, ckey=ckey, cert=cert,
            dataset=dataset)
    for row in lumis4dataset(kwds):
        print(row)
コード例 #11
0
ファイル: lumi_service.py プロジェクト: perrozzi/DAS
 def __init__(self, config=None):
     super(LumiService, self).__init__()
     if  not config:
         config   = {}
     self.dasconfig = das_readconfig()
     self.service_name = config.get('name', 'combined')
     self.service_api  = config.get('api', 'combined_lumi4dataset')
     self.uri       = self.dasconfig['mongodb']['dburi']
     self.urls      = None # defined at run-time via self.init()
     self.expire    = None # defined at run-time via self.init()
     self.ckey, self.cert = get_key_cert()
     self.init()
コード例 #12
0
def test():
    """Test main function"""
    dbs_url = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader'
    dbs_url = 'http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet'
    rr_url = 'http://localhost:8081/runregistry'
    ckey, cert = get_key_cert()
    dataset = '/DoubleElectron/Run2012A-13Jul2012-v1/AOD'
    kwds = dict(dbs_url=dbs_url,
                rr_url=rr_url,
                ckey=ckey,
                cert=cert,
                dataset=dataset)
    for row in lumis4dataset(kwds):
        print(row)
コード例 #13
0
ファイル: dbs_daemon.py プロジェクト: zdenekmaxa/DAS
 def datasets_dbs3(self):
     """
     Retrieve a list of DBS datasets (DBS3)
     """
     params = {'dataset_access_type':'PRODUCTION'}
     encoded_data = urllib.urlencode(params, doseq=True)
     url = self.dbs_url + '/datasets?' + encoded_data
     req = urllib2.Request(url)
     ckey, cert = get_key_cert()
     handler = HTTPSClientAuthHandler(ckey, cert)
     opener  = urllib2.build_opener(handler)
     urllib2.install_opener(opener)
     stream = urllib2.urlopen(req)
     gen = json.load(stream)
     for row in gen:
         yield row
     stream.close()
コード例 #14
0
ファイル: dbs_phedex.py プロジェクト: zdenekmaxa/DAS
def datasets_dbs3(urls, verbose=0):
    """DBS3 implementation of datasets function"""
    headers = {'Accept':'application/json;text/json'}
    records = []
    url     = urls.get('dbs')
    params  = {'detail':'True', 'dataset_access_type':'PRODUCTION'}
    ckey, cert = get_key_cert()
    data, _ = getdata(url, params, headers, verbose=verbose,
                ckey=ckey, cert=cert, doseq=False)
    records = json.load(data)
    data.close()
    data = {}
    size = 10 # size for POST request to Phedex
    for row in records:
        if  not data.has_key(row['dataset']):
            data[row['dataset']] = \
            dict(era=row['acquisition_era_name'], tier=row['data_tier_name'])
        if  len(data.keys()) > size:
            for rec in dataset_info(urls, data):
                yield rec
            data = {}
    if  data:
        for rec in dataset_info(urls, data):
            yield rec
コード例 #15
0
ファイル: reqmgr_service.py プロジェクト: ktf/DAS
ReqMgr service
"""
__author__ = "Valentin Kuznetsov"

# system modules
import time

# DAS modules
from DAS.services.abstract_service import DASAbstractService
from DAS.utils.utils import map_validator, get_key_cert, json_parser
from DAS.utils.url_utils import getdata
from DAS.utils.urlfetch_pycurl import getdata as urlfetch_getdata

import DAS.utils.jsonwrapper as json

CKEY, CERT = get_key_cert()

def findReqMgrIds(dataset, base='https://cmsweb.cern.ch', verbose=False):
    """
    Find ReqMgrIds for a given dataset. This is quite complex procedure in CMS.
    We need to query ReqMgr data-service cache and find workflow ids by
    outputdataset name. The ReqMgr returns either document with ids used by MCM
    (i.e. ProcConfigCacheID, ConfigCacheID, SkimConfigCacheID) or we can take
    id of the request which bypass MCM. For refences see these discussions:
    https://github.com/dmwm/DAS/issues/4045
    https://hypernews.cern.ch/HyperNews/CMS/get/dmDevelopment/1501/1/1/1/1.html
    """
    params = {'key': '"%s"' % dataset, 'include_docs':'true'}
    url = "%s/couchdb/reqmgr_workload_cache/_design/ReqMgr/_view/byoutputdataset" \
        % base
    headers = {'Accept': 'application/json;text/json'}
コード例 #16
0
ファイル: reqmgr2_service.py プロジェクト: perrozzi/DAS
"""
ReqMgr2 service
"""
__author__ = "Valentin Kuznetsov"

# system modules
import json
import time

# DAS modules
from DAS.services.abstract_service import DASAbstractService
from DAS.utils.utils import map_validator, get_key_cert, json_parser
from DAS.utils.url_utils import getdata
from DAS.utils.urlfetch_pycurl import getdata as urlfetch_getdata

CKEY, CERT = get_key_cert()


def get_ids(url, params, dataset, verbose=False):
    "Query either ReqMgr2 or WMStats and retrieve request ids"
    headers = {'Accept': 'application/json;text/json'}
    expire = 600  # dummy number, we don't need it here
    ids = []
    source, expire = \
        getdata(url, params, headers, expire, ckey=CKEY, cert=CERT,
                verbose=verbose)
    for row in json_parser(source, None):
        for rec in row.get('rows', []):
            doc = rec['doc']
            found = 0
            if not doc:
コード例 #17
0
ファイル: runsum_service.py プロジェクト: zdenekmaxa/DAS
 def apicall(self, dasquery, url, api, args, dformat, expire):
     """
     Invoke DBS API to execute given query.
     Return results as a list of dict, e.g.
     [{'run':1,'dataset':/a/b/c'}, ...]
     """
     # translate selection keys into ones data-service APIs provides
     cond = dasquery.mongo_query['spec']
     args = dict(self.params)
     for key, value in cond.iteritems():
         if  isinstance(value, dict): # we got equal condition
             if  key == 'date':
                 if  isinstance(value, list) and len(value) != 2:
                     msg  = 'RunSummary service requires 2 time stamps.'
                     msg += 'Please use either date last XXh format or'
                     msg += 'date in YYYYMMDD-YYYYMMDD'
                     raise Exception(msg)
                 args['TIME_BEGIN'] = convert_datetime(value[0])
                 args['TIME_END']   = convert_datetime(value[1])
             else:
                 for param in self.dasmapping.das2api(self.name, key):
                     args[param] = value
         elif key == 'run.number' or key == 'run.run_number':
             minrun = None
             maxrun = None
             for oper, val in value.iteritems():
                 if  oper == '$in':
                     minrun = int(val[0])
                     maxrun = int(val[-1])
                 elif oper == '$lt':
                     maxrun = int(val) - 1
                 elif oper == '$lte':
                     maxrun = int(val)
                 elif oper == '$gt':
                     minrun = int(val) + 1
                 elif oper == '$gte':
                     minrun = int(val)
                 else:
                     msg = 'RunSummary does not support operator %s' % oper
                     raise Exception(msg)
             args['RUN_BEGIN'] = minrun
             args['RUN_END']   = maxrun
         elif key == 'date' and value.has_key('$in') and \
             len(value['$in']) == 2:
             date1, date2 = value['$in']
             args['TIME_BEGIN'] = convert_datetime(date1)
             args['TIME_END']   = convert_datetime(date2)
         else: # we got some operator, e.g. key :{'$in' : [1,2,3]}
             # TODO: not sure how to deal with them right now, will throw
             msg = 'RunSummary does not support value %s for key=%s' \
             % (value, key)
             raise Exception(msg)
     if  args == self.params: # no parameter is provided
         args['TIME_END'] = convert_datetime(time.time())
         args['TIME_BEGIN'] = convert_datetime(time.time() - 24*60*60)
     key, cert = get_key_cert()
     debug   = 0
     if  self.verbose > 1:
         debug   = 1
     try:
         time0   = time.time()
         api     = self.map.keys()[0] # we only register 1 API
         url     = self.map[api]['url']
         expire  = self.map[api]['expire']
         msg     = 'DASAbstractService::%s::getdata(%s, %s)' \
                 % (self.name, url, args)
         self.logger.info(msg)
         data    = get_data(run_summary_url(url, args), key, cert, debug)
         genrows = self.parser(data, api)
         ctime   = time.time()-time0
         self.write_to_cache(\
             dasquery, expire, url, api, args, genrows, ctime)
     except:
         traceback.print_exc()
         msg = 'Fail to process: url=%s, api=%s, args=%s' \
                 % (url, api, args)
         self.logger.warning(msg)
コード例 #18
0
ファイル: runsum_service.py プロジェクト: perrozzi/DAS
 def apicall(self, dasquery, url, api, args, dformat, expire):
     """
     Invoke DBS API to execute given query.
     Return results as a list of dict, e.g.
     [{'run':1,'dataset':/a/b/c'}, ...]
     """
     # translate selection keys into ones data-service APIs provides
     cond = dasquery.mongo_query['spec']
     args = dict(self.params)
     for key, value in cond.items():
         if  isinstance(value, dict): # we got equal condition
             if  key == 'date':
                 if  isinstance(value, list) and len(value) != 2:
                     msg  = 'RunSummary service requires 2 time stamps.'
                     msg += 'Please use either date last XXh format or'
                     msg += 'date in YYYYMMDD-YYYYMMDD'
                     raise Exception(msg)
                 args['TIME_BEGIN'] = convert_datetime(value[0])
                 args['TIME_END']   = convert_datetime(value[1])
             else:
                 for param in self.dasmapping.das2api(self.name, api, key):
                     args[param] = value
         elif key == 'run.number' or key == 'run.run_number':
             minrun = None
             maxrun = None
             for oper, val in value.items():
                 if  oper == '$in':
                     minrun = int(val[0])
                     maxrun = int(val[-1])
                 elif oper == '$lt':
                     maxrun = int(val) - 1
                 elif oper == '$lte':
                     maxrun = int(val)
                 elif oper == '$gt':
                     minrun = int(val) + 1
                 elif oper == '$gte':
                     minrun = int(val)
                 else:
                     msg = 'RunSummary does not support operator %s' % oper
                     raise Exception(msg)
             args['RUN_BEGIN'] = minrun
             args['RUN_END']   = maxrun
         elif key == 'date' and '$in' in value and \
             len(value['$in']) == 2:
             date1, date2 = value['$in']
             args['TIME_BEGIN'] = convert_datetime(date1)
             args['TIME_END']   = convert_datetime(date2)
         else: # we got some operator, e.g. key :{'$in' : [1,2,3]}
             # TODO: not sure how to deal with them right now, will throw
             msg = 'RunSummary does not support value %s for key=%s' \
             % (value, key)
             raise Exception(msg)
     if  args == self.params: # no parameter is provided
         args['TIME_END'] = convert_datetime(time.time())
         args['TIME_BEGIN'] = convert_datetime(time.time() - 24*60*60)
     key, cert = get_key_cert()
     debug   = 0
     if  self.verbose > 1:
         debug   = 1
     try:
         time0   = time.time()
         api     = list(self.map.keys())[0] # we only register 1 API
         url     = self.map[api]['url']
         expire  = self.map[api]['expire']
         msg     = 'DASAbstractService::%s::getdata(%s, %s)' \
                 % (self.name, url, args)
         self.logger.info(msg)
         data    = get_data(run_summary_url(url, args), key, cert, debug)
         genrows = self.parser(data, api)
         ctime   = time.time()-time0
         self.write_to_cache(\
             dasquery, expire, url, api, args, genrows, ctime)
     except:
         traceback.print_exc()
         msg = 'Fail to process: url=%s, api=%s, args=%s' \
                 % (url, api, args)
         self.logger.warning(msg)