Beispiel #1
0
    def init(self):
        """Init DAS web server, connect to DAS Core"""
        try:
            self.reqmgr = RequestManager(lifetime=self.lifetime)
            self.dasmgr = DASCore(engine=self.engine)
            self.repmgr = CMSRepresentation(self.dasconfig, self.dasmgr)
            self.daskeys = self.dasmgr.das_keys()
            self.gfs = db_gridfs(self.dburi)
            self.daskeys.sort()
            self.dasmapping = self.dasmgr.mapping
            self.dbs_url = self.dasmapping.dbs_url()
            self.dbs_global = self.dasmapping.dbs_global_instance()
            self.dbs_instances = self.dasmapping.dbs_instances()
            self.dasmapping.init_presentationcache()
            self.colors = {"das": gen_color("das")}
            for system in self.dasmgr.systems:
                self.colors[system] = gen_color(system)
            # get SiteDB from global scope
            self.sitedbmgr = SERVICES.get("sitedb2", None)
            # Start DBS daemon
            if self.dataset_daemon:
                self.dbs_daemon(self.dasconfig["web_server"])
            if not self.daskeyslist:
                keylist = [r for r in self.dasmapping.das_presentation_map()]
                keylist.sort(key=lambda r: r["das"])
                self.daskeyslist = keylist

        except ConnectionFailure as _err:
            tstamp = dastimestamp("")
            mythr = threading.current_thread()
            print "### MongoDB connection failure thread=%s, id=%s, time=%s" % (mythr.name, mythr.ident, tstamp)
        except Exception as exc:
            print_exc(exc)
            self.dasmgr = None
            self.reqmgr = None
            self.dbs_url = None
            self.dbs_global = None
            self.dbs_instances = []
            self.daskeys = []
            self.colors = {}
            self.q_rewriter = None
            return

        # KWS and Query Rewriting failures are not fatal
        try:
            # init query rewriter, if needed
            if self.dasconfig["query_rewrite"]["pk_rewrite_on"]:
                self.q_rewriter = CMSQueryRewrite(self.repmgr, self.templatepage)
        except Exception as exc:
            print_exc(exc)
            self.q_rewriter = None
Beispiel #2
0
 def parser(self, dasquery, dformat, source, api):
     """
     DBS data-service parser.
     """
     sitedb = SERVICES.get('sitedb2', None) # look-up SiteDB from global scope
     query = dasquery.mongo_query
     if  api == 'listBlocks':
         prim_key = 'block'
     elif api == 'listBlocks4path':
         api = 'listBlocks'
         prim_key = 'block'
     elif api == 'listBlockProvenance':
         prim_key = 'block'
     elif api == 'listBlockProvenance4child':
         prim_key = 'block'
     elif api == 'listFiles':
         prim_key = 'file'
     elif api == 'listLFNs':
         prim_key = 'file_lfn'
     elif api == 'listFileLumis':
         prim_key = 'file_lumi_section'
     elif api == 'listFileProcQuality':
         prim_key = 'file_proc_quality'
     elif api == 'listFileParents':
         prim_key = 'file_parent'
     elif api == 'listTiers':
         prim_key = 'data_tier'
     elif api == 'listDatasetParents':
         prim_key = 'processed_dataset_parent'
     elif api == 'listPrimaryDatasets':
         prim_key = 'primary_dataset'
     elif api == 'listProcessedDatasets':
         prim_key = 'processed_dataset'
     elif api == 'fakeReleases':
         prim_key = 'release'
     elif api == 'listRuns':
         prim_key = 'run'
     elif  api == 'fakeRelease4File':
         prim_key = 'release'
     elif  api == 'fakeRelease4Dataset':
         prim_key = 'release'
     elif  api == 'fakeGroup4Dataset':
         prim_key = 'group'
     elif  api == 'fakeConfig':
         prim_key = 'config'
     elif  api == 'fakeListDataset4Block':
         prim_key = 'dataset'
     elif  api == 'fakeListDataset4File':
         prim_key = 'dataset'
     elif  api == 'fakeListDatasetbyDate':
         prim_key = 'dataset'
     elif  api == 'fakeDatasetSummary':
         prim_key = 'dataset'
     elif  api == 'fakeDataset4Run':
         prim_key = 'dataset'
     elif  api == 'fakeRun4File':
         prim_key = 'run'
     elif  api == 'fakeRun4Run':
         prim_key = 'run'
     elif api == 'fakeChild4File':
         prim_key = 'child'
     elif api == 'fakeChild4Dataset':
         prim_key = 'child'
     elif api == 'fakeSite4Dataset':
         prim_key = 'site'
     elif api == 'fakeStatus':
         prim_key = 'status'
     elif api == 'fakeFiles4DatasetRunLumis':
         prim_key = 'file'
     elif api == 'fakeRun4Block':
         prim_key = 'run'
     elif api == 'fakeBlock4DatasetRun':
         prim_key = 'block'
     elif api == 'fakeSite4Dataset':
         prim_key = 'site'
     else:
         msg = 'DBSService::parser, unsupported %s API %s' \
             % (self.name, api)
         raise Exception(msg)
     if  api.find('fake') != -1:
         gen = qlxml_parser(source, prim_key)
     else:
         gen = xml_parser(source, prim_key)
     useless_run_atts = ['number_of_events', 'number_of_lumi_sections', \
             'id', 'total_luminosity', 'store_number', 'end_of_run', \
             'start_of_run']
     config_attrs = ['config.name', 'config.content', 'config.version', \
              'config.type', 'config.annotation', 'config.createdate', \
              'config.createby', 'config.moddate', 'config.modby']
     for row in gen:
         if  not row:
             continue
         if  row.has_key('status') and \
             row['status'].has_key('dataset.status'):
             row['status']['name'] = row['status']['dataset.status']
             del row['status']['dataset.status']
         if  row.has_key('file_lumi_section'):
             row['lumi'] = row['file_lumi_section']
             del row['file_lumi_section']
         if  row.has_key('algorithm'):
             del row['algorithm']['ps_content']
         if  row.has_key('processed_dataset') and \
             row['processed_dataset'].has_key('path'):
             if  isinstance(row['processed_dataset']['path'], dict) \
             and row['processed_dataset']['path'].has_key('dataset_path'):
                 path = row['processed_dataset']['path']['dataset_path']
                 del row['processed_dataset']['path']
                 row['processed_dataset']['name'] = path
         # case for fake apis
         # remove useless attribute from results
         if  row.has_key('dataset'):
             if  row['dataset'].has_key('count_file.size'):
                 del row['dataset']['count_file.size']
             if  row['dataset'].has_key('dataset'):
                 name = row['dataset']['dataset']
                 del row['dataset']['dataset']
                 row['dataset']['name'] = name
         if  row.has_key('child') and row['child'].has_key('dataset.child'):
             row['child']['name'] = row['child']['dataset.child']
             del row['child']['dataset.child']
         if  row.has_key('child') and row['child'].has_key('file.child'):
             row['child']['name'] = row['child']['file.child']
             del row['child']['file.child']
         if  row.has_key('block') and query.get('fields') == ['parent']:
             row['parent'] = row['block']
             del row['block']
         if  row.has_key('block') and query.get('fields') == ['child']:
             row['child'] = row['block']
             del row['block']
         if  row.has_key('run') and row['run'].has_key('run'):
             row['run']['run_number'] = row['run']['run']
             del row['run']['run']
         if  row.has_key('release') and row['release'].has_key('release'):
             row['release']['name'] = row['release']['release']
             del row['release']['release']
         if  row.has_key('site'):
             row['site']['se'] = row['site']['site']
             del row['site']['site']
         convert_dot(row, 'config', config_attrs)
         convert_dot(row, 'file', ['file.name'])
         convert_dot(row, 'block', ['block.name'])
         convert_dot(row, 'dataset', ['dataset.tag', 'dataset.status'])
         # remove DBS2 run attributes (to be consistent with DBS3 output)
         # and let people extract this info from CondDB/LumiDB.
         if  row.has_key('run'):
             for att in useless_run_atts:
                 try:
                     del row['run'][att]
                 except:
                     pass
         if  api == 'fakeSite4Dataset' and sitedb:
             site = row.get('site', None)
             if  site and isinstance(site, dict):
                 sename = site.get('se', None)
                 info = sitedb.site_info(sename)
                 if  info:
                     row['site'].update(info)
         yield row
Beispiel #3
0
    def adjust_params(self, api, kwds, inst=None):
        """
        Adjust DBS2 parameters for specific query requests
        To mimic DBS3 behavior we only allow dataset summary information
        for fakeDatasetSummary and fakeListDataset4Block APIs who uses
        full dataset and block name, respectively.
        """
        sitedb = SERVICES.get('sitedb2', None) # SiteDB from global scope
        if  api == 'fakeRun4Block':
            val = kwds['block']
            if  val != 'required':
                kwds['query'] = 'find run where block=%s' % val
            else:
                kwds['query'] = 'required'
            kwds.pop('block')
        if  api == 'fakeStatus':
            val = kwds['status']
            if  val:
                kwds['query'] = \
                'find dataset.status where dataset.status=%s' % val.upper()
            else:
                kwds['query'] = 'find dataset.status'
            val = kwds['dataset']
            if  val:
                if  kwds['query'].find(' where ') != -1:
                    kwds['query'] += ' and dataset=%s' % val
                else:
                    kwds['query'] += ' where dataset=%s' % val
            kwds.pop('status')
        if  api == 'listPrimaryDatasets':
            pat = kwds['pattern']
            if  pat[0] == '/':
                kwds['pattern'] = pat.split('/')[1]
        if  api == 'listProcessedDatasets':
            pat = kwds['processed_datatset_name_pattern']
            if  pat[0] == '/':
                try:
                    kwds['processed_datatset_name_pattern'] = pat.split('/')[2]
                except:
                    pass
        if  api == 'fakeReleases':
            val = kwds['release']
            if  val != 'required':
                kwds['query'] = 'find release where release=%s' % val
            else:
                kwds['query'] = 'required'
            kwds.pop('release')
        if  api == 'fakeRelease4File':
            val = kwds['file']
            if  val != 'required':
                kwds['query'] = 'find release where file=%s' % val
            else:
                kwds['query'] = 'required'
            kwds.pop('file')
        if  api == 'fakeRelease4Dataset':
            val = kwds['dataset']
            if  val != 'required':
                kwds['query'] = 'find release where dataset=%s' % val
            else:
                kwds['query'] = 'required'
            kwds.pop('dataset')
        if  api == 'fakeConfig':
            val = kwds['dataset']
            sel = 'config.name, config.content, config.version, config.type, \
 config.annotation, config.createdate, config.createby, config.moddate, \
 config.modby'
            if  val != 'required':
                kwds['query'] = 'find %s where dataset=%s' % (sel, val)
            else:
                kwds['query'] = 'required'
            kwds.pop('dataset')
        if  api == 'fakeSite4Dataset' and inst and inst != self.prim_instance:
            val = kwds['dataset']
            if  val != 'required':
                kwds['query'] = "find site where dataset=%s" % val
            else:
                kwds['query'] = 'required'
            kwds.pop('dataset')
        if  api == 'fakeDataset4Site' and inst and inst != self.prim_instance:
            val = kwds['site']
            if  val != 'required':
                sinfo = sitedb.site_info(val)
                if  sinfo and 'resources' in sinfo:
                    for row in sinfo['resources']:
                        if  row['type'] == 'SE' and 'fqdn' in row:
                            sename = row['fqdn']
                            kwds['query'] = \
                                    "find dataset,site where site=%s" % sename
                            break
            else:
                kwds['query'] = 'required'
            kwds.pop('site')
        if  api == 'fakeListDataset4File':
            val = kwds['file']
            if  val != 'required':
                kwds['query'] = "find dataset, count(block), count(file.size) \
  where file=%s" % val
            else:
                kwds['query'] = 'required'
            kwds.pop('file')
        if  api == 'fakeListDataset4Block':
            val = kwds['block']
            if  val != 'required':
                kwds['query'] = "find dataset, count(block), \
  sum(block.size), sum(block.numfiles), sum(block.numevents) \
  where block=%s" % val
            else:
                kwds['query'] = 'required'
            kwds.pop('block')
        if  api == 'fakeRun4Run':
            val = kwds['run']
            if  val != 'required':
                if  isinstance(val, dict):
                    min_run = 0
                    max_run = 0
                    if  '$lte' in val:
                        max_run = val['$lte']
                    if  '$gte' in val:
                        min_run = val['$gte']
                    if  min_run and max_run:
                        val = "run >=%s and run <= %s" % (min_run, max_run)
                    elif '$in' in val:
                        val = ' or '.join(['run=%s' % r for r in val['$in']])
                        val = '(%s)' % val
                elif isinstance(val, int):
                    val = "run = %d" % val
                kwds['query'] = "find run where %s" % val
            else:
                kwds['query'] = 'required'
            kwds.pop('run')
        if  api == 'fakeBlock4file':
            lfn = kwds.get('file', 'required')
            if  lfn != 'required':
                kwds['query'] = 'find block.name where file=%s' % lfn
            else:
                kwds['query'] = 'required'
        if  api == 'fakeLumis4block':
            block = kwds.get('block', 'required')
            if  block != 'required':
                kwds['query'] = \
                'find lumi.number, run.number, file.name where block=%s' % block
                kwds.pop('block')
            else:
                kwds['query'] = 'required'
        if  api == 'fakeLumis4FileRun':
            query = kwds.get('query', 'required')
            lfn = kwds.get('lfn', 'required')
            if  lfn != 'required':
                query = \
                'find lumi.number, run.number where file=%s' % lfn
                kwds.pop('lfn')
            run = kwds.get('run', 'optional')
            if  run != 'optional':
                query += ' and run=%s' % run
                kwds.pop('run')
            kwds['query'] = query
        if  api == 'fakeBlock4DatasetRun':
            dataset = kwds.get('dataset', 'required')
            if  dataset != 'required':
                kwds['query'] = 'find block.name where dataset=%s'\
                        % dataset
            else:
                kwds['query'] = 'required'
            val = kwds.get('run', 'required')
            if  val != 'required':
                if  isinstance(val, dict):
                    min_run = 0
                    max_run = 0
                    if  '$lte' in val:
                        max_run = val['$lte']
                    if  '$gte' in val:
                        min_run = val['$gte']
                    if  min_run and max_run:
                        val = "run >=%s and run <= %s" % (min_run, max_run)
                    elif '$in' in val:
                        val = ' or '.join(['run=%s' % r for r in val['$in']])
                        val = '(%s)' % val
                elif isinstance(val, int):
                    val = "run = %d" % val
                kwds['query'] += ' and ' + val
                kwds.pop('dataset')
                kwds.pop('run')
            else:
                kwds['query'] = 'required'
        if  api == 'fakeGroup4Dataset':
            val = kwds['dataset']
            if  val != 'required':
                val = "dataset = %s" % val
                kwds['query'] = "find phygrp where %s" % val
            else:
                kwds['query'] = 'required'
            kwds.pop('dataset')
        if  api == 'fakeChild4File':
            val = kwds['file']
            if  val != 'required':
                val = "file = %s" % val
                kwds['query'] = "find file.child where %s" % val
            else:
                kwds['query'] = 'required'
            kwds.pop('file')
        if  api == 'fakeChild4Dataset':
            val = kwds['dataset']
            if  val != 'required':
                val = "dataset = %s" % val
                kwds['query'] = "find dataset.child where %s" % val
            else:
                kwds['query'] = 'required'
            kwds.pop('dataset')
        if  api == 'fakeDataset4Run':
            val = kwds['run']
            qlist = []
            if  val != 'required':
                if  isinstance(val, dict):
                    min_run = 0
                    max_run = 0
                    if  '$lte' in val:
                        max_run = val['$lte']
                    if  '$gte' in val:
                        min_run = val['$gte']
                    if  min_run and max_run:
                        val = "run >=%s and run <= %s" % (min_run, max_run)
                    elif '$in' in val:
                        val = ' or '.join(['run=%s' % r for r in val['$in']])
                        val = '(%s)' % val
                elif isinstance(val, int):
                    val = "run = %d" % val
                if  'dataset' in kwds and kwds['dataset']:
                    val += ' and dataset=%s' % kwds['dataset']
                kwds['query'] = \
                "find dataset where %s and dataset.status like VALID*" % val
            else:
                kwds['query'] = 'required'
            kwds.pop('run')
            kwds.pop('dataset')
        if  api == 'fakeDataset4User':
            user = kwds['user']
            if  user == 'required':
                kwds['query'] = 'required'
            else:
                val = sitedb.user_dn(kwds['user'])
                if  val:
                    # DBS-QL does not allow = or spaces, so we'll tweak the DN
                    val = val.replace('=', '*').replace(' ', '*')
                    kwds['query'] = "find dataset, dataset.createby " + \
                            "where dataset.createby=%s" % val
                    if  'dataset' in kwds and kwds['dataset']:
                        kwds['query'] += ' and dataset=%s' % kwds['dataset']
                else:
                    kwds['query'] = 'required'
            kwds.pop('user')
            kwds.pop('dataset')
        if  api == 'fakeRun4File':
            val = kwds['file']
            if  val != 'required':
                kwds['query'] = "find run where file = %s" % val
            else:
                kwds['query'] = 'required'
            kwds.pop('file')
        if  api == 'fakeFiles4DatasetRunLumis':
            cond = ""
            val = kwds['dataset']
            if  val and val != 'required':
                cond = " and dataset=%s" % val
                kwds.pop('dataset')
            val = kwds['run']
            if  val and val != 'required':
                cond += " and run=%s" % val
                kwds.pop('run')
            val = kwds['lumi']
            if  val and val != 'required':
                cond += " and lumi=%s" % val
                kwds.pop('lumi')
            if  cond:
                kwds['query'] = "find file.name where %s" % cond[4:]
            else:
                kwds['query'] = 'required'
        if  api == 'fakeDatasetSummary' or api == 'fakeDatasetPattern':
            value = ""
            path = False
            for key, val in kwds.iteritems():
                if  key == 'dataset' and val:
                    value += ' and dataset=%s' % val
                    if  len(val.split('/')) == 4: # /a/b/c -> ['','a','b','c']
                        if  val.find('*') == -1:
                            path = True
                if  key == 'primary_dataset' and val:
                    value += ' and primds=%s' % val
                if  key == 'release' and val:
                    value += ' and release=%s' % val
                if  key == 'tier' and val:
                    value += ' and tier=%s' % val
                if  key == 'phygrp' and val:
                    value += ' and phygrp=%s' % val
                if  key == 'datatype' and val:
                    value += ' and datatype=%s' % val
                if  api == 'fakeDatasetPattern':
                    if  key == 'status':
                        if  val:
                            value += ' and dataset.status=%s' % val.upper()
                        else:
                            value += ' and dataset.status like VALID*'
            keys = ['dataset', 'release', 'primary_dataset', 'tier', \
                'phygrp', 'datatype', 'status']
            for key in keys:
                try:
                    del kwds[key]
                except:
                    pass
            if  value:
                query  = "find dataset, datatype, dataset.status, dataset.tag"
                query += ", procds.createdate, procds.createby, procds.moddate"
                query += ", procds.modby"
                if  path: # we have full path, ask for summary information
                    query += ", sum(block.numfiles), sum(block.numevents)"
                    query += ", count(block), sum(block.size)"
                query += " where %s" % value[4:]
                kwds['query'] = query
            else:
                kwds['query'] = 'required'
        if  api == 'fakeListDatasetbyDate':
            value = ''
            if  kwds['status']:
                value = ' and dataset.status=%s' % kwds['status'].upper()
            else:
                value = ' and dataset.status like VALID*'
#           20110126/{'$lte': 20110126}/{'$lte': 20110126, '$gte': 20110124}
            query_for_single = "find dataset, datatype, dataset.status, \
  dataset.tag, \
  dataset.createdate where dataset.createdate %s %s " + value
            query_for_double = "find dataset, datatype, dataset.status, \
  dataset.tag, \
  dataset.createdate where dataset.createdate %s %s \
  and dataset.createdate %s %s " + value
            val = kwds['date']
            qlist = []
            query = ""
            if val != "required":
                if isinstance(val, dict):
                    for opt in val:
                        nopt = dbsql_opt_map(opt)
                        if nopt == ('in'):
                            self.logger.debug(val[opt])
                            nval = [convert_datetime(x) for x in val[opt]]
                        else:
                            nval = convert_datetime(val[opt])
                        qlist.append(nopt)
                        qlist.append(nval)
                    if len(qlist) == 4:
                        query = query_for_double % tuple(qlist)
                    else:
                        msg = "dbs_services::fakeListDatasetbyDate \
 wrong params get, IN date is not support by DBS2 QL"
                        self.logger.info(msg)
                elif isinstance(val, int):
                    val = convert_datetime(val)
                    query = query_for_single % ('=', val)
                kwds['query'] = query
            else:
                kwds['query'] = 'required'
            kwds.pop('date')
        if  api == 'listFiles':
            val = kwds.get('run_number', None)
            if  isinstance(val, dict):
                # listFiles does not support run range, see
                # fakeFiles4DatasetRun API
                kwds['run_number'] = 'required'
            if  not kwds['path'] and not kwds['block_name'] and \
                not kwds['pattern_lfn']:
                kwds['path'] = 'required'
        if  api == 'fakeFiles4DatasetRun' or api == 'fakeFiles4BlockRun':
            cond = ""
            entity = 'dataset'
            if  api == 'fakeFiles4BlockRun':
                entity = 'block'
            val = kwds[entity]
            if  val and val != 'required':
                cond = " and %s=%s" % (entity, val)
                kwds.pop(entity)
            val = kwds['run']
            if  val and val != 'required':
                if  isinstance(val, dict):
                    min_run = 0
                    max_run = 0
                    if  '$lte' in val:
                        max_run = val['$lte']
                    if  '$gte' in val:
                        min_run = val['$gte']
                    if  min_run and max_run:
                        val = "run >=%s and run <= %s" % (min_run, max_run)
                    elif '$in' in val:
                        val = ' or '.join(['run=%s' % r for r in val['$in']])
                        val = '(%s)' % val
                elif isinstance(val, int):
                    val = "run = %d" % val
                cond += " and %s" % val
                kwds.pop('run')
            if  cond:
                kwds['query'] = "find file.name where %s" % cond[4:]
            else:
                kwds['query'] = 'required'