def test_rel_type(self):
        "Test rel_type method"
        result = rel_type('CMSSW_1_0_1')
        expect = RFULL
        self.assertEqual(expect, result)

        result = rel_type('CMSSW_1_0_1_pre')
        expect = RPRE
        self.assertEqual(expect, result)

        result = rel_type('CMSSW_1_0_1_patch')
        expect = RPATCH
        self.assertEqual(expect, result)
    def test_rel_type(self):
        "Test rel_type method"
        result = rel_type('CMSSW_1_0_1')
        expect = RFULL
        self.assertEqual(expect, result)

        result = rel_type('CMSSW_1_0_1_pre')
        expect = RPRE
        self.assertEqual(expect, result)

        result = rel_type('CMSSW_1_0_1_patch')
        expect = RPATCH
        self.assertEqual(expect, result)
Exemple #3
0
 def dataset_info(self, timeframe, dataset, dtypes, stypes, rtypes, tiers, dformat, target=0):
     "Return common dataset info in specified data format"
     dbsinst = self.dbs.dataset_dbsinst(dataset)
     if  not dbsinst:
         return
     row = self.dbs.dataset_info(dataset, dbsinst)
     if  row:
         if  self.multitask:
             releases, sites, parents, summary, dashboard = \
                     self.dataset_info_all(dataset, dbsinst, timeframe)
         else:
             releases = [rname for rname in self.dbs.dataset_release_versions(dataset, dbsinst)]
             sites = [sname for sname in self.phedex.sites(dataset)]
             parents = [r for r in self.dbs.dataset_parents(dataset, dbsinst)]
             summary = self.dbs.dataset_summary(dataset, dbsinst)
             dashboard = self.dashboard.dataset_info(dataset, timeframe[0], timeframe[1])
         nrels = len(releases)
         series = rtypes['series']
         majors = rtypes['majors']
         minors = rtypes['minors']
         relclf = rtypes['rtypes']
         for rel in releases:
             rserie, rmajor, rminor = rel_ver(rel)
             if  not cmssw_test(rserie, rmajor, rminor):
                 continue
             rtype = rel_type(rel)
             try:
                 series['rel1_%s'%rserie] += 1
             except:
                 pass
             try:
                 majors['rel2_%s'%rmajor] += 1
             except:
                 pass
             try:
                 minors['rel3_%s'%rminor] += 1
             except:
                 pass
             try:
                 relclf['relt_%s'%rtype] += 1
             except:
                 pass
         nsites = len(sites)
         for site in sites:
             stier = site_tier(site)
             stypes['s_%s'%stier] += 1
         dataset_id = row['rid']
         era = genkey(row['acquisition_era_name'], self.salt, 5)
         create_dn = self.sitedb.dnid(row['create_by'])
         dbsinstid = row['dbs_instance']
         dtype = row['primary_ds_type']
         # number of data types should be small and simple
         # list look-up shouldn't be a problem
         if  dtype not in dtypes:
             dtypes.append(dtype)
         dtype = dtypes.index(dtype)
         _, prim, proc, tier = dataset.split('/')
         prim = genkey(prim, self.salt, 5)
         proc = genkey(proc, self.salt, 5)
         if  tier not in tiers:
             tiers.append(tier)
         tier = genkey(tier, self.salt, 5)
         parent = parents[0] if len(parents) else 0
         uid = genuid(yyyymmdd(timeframe[0]), dbsinstid, dataset_id)
         size_norm = 2**30 # normalization factor for file size
         rec = dict(id=uid, dataset=dataset_id, primds=prim, procds=proc, tier=tier,
                 dtype=dtype, creator=create_dn, nrel=nrels, nsites=nsites,
                 parent=parent, era=era, dbs=dbsinstid,
                 nfiles=summary.get('num_file', 0),
                 nlumis=summary.get('num_lumi', 0),
                 nblk=summary.get('num_block', 0),
                 nevt=summary.get('num_event', 0),
                 size=summary.get('file_size', 0)/size_norm,
                 cpu=dashboard.get('cpu', 0),
                 wct=dashboard.get('wct', 0),
                 proc_evts=dashboard.get('nevt', 0))
         if  isinstance(target, dict):
             rec.update(target)
         for key,val in series.items():
             rec.update({key:val})
         for key, val in majors.items():
             rec.update({key:val})
         for key, val in minors.items():
             rec.update({key:val})
         for key, val in relclf.items():
             rec.update({key:val})
         for key, val in stypes.items():
             rec.update({key:val})
         headers = rec.keys()
         headers.sort()
         headers.remove('id')
         headers = ['id'] + headers # let dataset id be the first column
         if  dformat == 'headers':
             yield headers
         elif  dformat == 'csv':
             res = [str(rec[h]) for h in headers]
             yield ','.join(res)
         elif dformat == 'vw':
             target_str = target.get('rnaccess')
             vals = ' '.join([str(rec[h]) for h in headers])
             uid = genkey(vals, self.salt, 5) # unique row identified
             vwrow = "%s '%s |f %s" % (target_str, uid, vals)
             yield vwrow
Exemple #4
0
 def dataset_info(self,
                  timeframe,
                  dataset,
                  dtypes,
                  stypes,
                  rtypes,
                  tiers,
                  dformat,
                  target=0):
     "Return common dataset info in specified data format"
     dbsinst = self.dbs.dataset_dbsinst(dataset)
     if not dbsinst:
         return
     row = self.dbs.dataset_info(dataset, dbsinst)
     if row:
         if self.multitask:
             releases, sites, parents, summary, dashboard = \
                     self.dataset_info_all(dataset, dbsinst, timeframe)
         else:
             releases = [
                 rname for rname in self.dbs.dataset_release_versions(
                     dataset, dbsinst)
             ]
             sites = [sname for sname in self.phedex.sites(dataset)]
             parents = [
                 r for r in self.dbs.dataset_parents(dataset, dbsinst)
             ]
             summary = self.dbs.dataset_summary(dataset, dbsinst)
             dashboard = self.dashboard.dataset_info(
                 dataset, timeframe[0], timeframe[1])
         nrels = len(releases)
         series = {}
         for k in rtypes['series'].keys():
             series[k] = 0
         majors = {}
         for k in rtypes['majors'].keys():
             majors[k] = 0
         minors = {}
         for k in rtypes['minors'].keys():
             minors[k] = 0
         relclf = {}
         for k in rtypes['rtypes'].keys():
             relclf[k] = 0
         for rel in releases:
             rserie, rmajor, rminor = rel_ver(rel)
             if not cmssw_test(rserie, rmajor, rminor):
                 continue
             rtype = rel_type(rel)
             try:
                 series['rel1_%s' % rserie] += 1
             except:
                 pass
             try:
                 majors['rel2_%s' % rmajor] += 1
             except:
                 pass
             try:
                 minors['rel3_%s' % rminor] += 1
             except:
                 pass
             try:
                 relclf['relt_%s' % rtype] += 1
             except:
                 pass
         nsites = len(sites)
         for site in sites:
             stier = site_tier(site)
             stypes['s_%s' % stier] += 1
         dataset_id = row['rid']
         era = genkey(row['acquisition_era_name'], self.salt, 5)
         create_dn = self.sitedb.dnid(row['create_by'])
         dbsinstid = row['dbs_instance']
         dtype = row['primary_ds_type']
         # number of data types should be small and simple
         # list look-up shouldn't be a problem
         if dtype not in dtypes:
             dtypes.append(dtype)
         dtype = dtypes.index(dtype)
         _, prim, proc, tier = dataset.split('/')
         prim = genkey(prim, self.salt, 5)
         proc = genkey(proc, self.salt, 5)
         if tier not in tiers:
             tiers.append(tier)
         tier = genkey(tier, self.salt, 5)
         parent = parents[0] if len(parents) else 0
         uid = genuid(yyyymmdd(timeframe[0]), dbsinstid, dataset_id)
         size_norm = 2**30  # normalization factor for file size
         if not summary:
             summary = {}  # we need a dict type
         rec = dict(id=uid,
                    dataset=dataset_id,
                    primds=prim,
                    procds=proc,
                    tier=tier,
                    dtype=dtype,
                    creator=create_dn,
                    nrel=nrels,
                    nsites=nsites,
                    parent=parent,
                    era=era,
                    dbs=dbsinstid,
                    nfiles=summary.get('num_file', 0),
                    nlumis=summary.get('num_lumi', 0),
                    nblk=summary.get('num_block', 0),
                    nevt=summary.get('num_event', 0),
                    size=summary.get('file_size', 0) / size_norm,
                    cpu=dashboard.get('cpu', 0),
                    wct=dashboard.get('wct', 0),
                    proc_evts=dashboard.get('nevt', 0))
         if isinstance(target, dict):
             rec.update(target)
         for key, val in series.items():
             rec.update({key: val})
         for key, val in majors.items():
             rec.update({key: val})
         for key, val in minors.items():
             rec.update({key: val})
         for key, val in relclf.items():
             rec.update({key: val})
         for key, val in stypes.items():
             rec.update({key: val})
         headers = rec.keys()
         headers.sort()
         headers.remove('id')
         headers = ['id'] + headers  # let dataset id be the first column
         if dformat == 'headers':
             yield headers
         elif dformat == 'csv':
             res = [str(rec[h]) for h in headers]
             yield ','.join(res)
         elif dformat == 'vw':
             target_str = target.get('rnaccess')
             vals = ' '.join([str(rec[h]) for h in headers])
             uid = genkey(vals, self.salt, 5)  # unique row identified
             vwrow = "%s '%s |f %s" % (target_str, uid, vals)
             yield vwrow