def dbs_dataset4site_release(dbs_url, release): "Get dataset for given site and release" expire = 600 # set some expire since we're not going to use it if which_dbs(dbs_url) == 'dbs': # in DBS3 I'll use datasets API and pass release over there query = 'find dataset where release=%s' % release dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \ 'query':query} headers = {'Accept': 'text/xml'} source, expire = \ getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT, system='dbs') prim_key = 'dataset' for row in qlxml_parser(source, prim_key): if 'dataset' in row: dataset = row['dataset']['dataset'] yield dataset elif 'error' in row: err = row.get('reason', None) err = err if err else row['error'] yield 'DBS error: %s' % err else: # we call datasets?release=release to get list of datasets dbs_url += '/datasets' dbs_args = \ {'release_version': release, 'dataset_access_type':'VALID'} headers = {'Accept': 'application/json;text/json'} source, expire = \ getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT, system='dbs3') for rec in json_parser(source, None): for row in rec: yield row['dataset']
def datasets_dbs2(urls, verbose=0): """DBS2 implementation of datasets function""" headers = {'Accept':'application/xml;text/xml'} records = [] url = urls.get('dbs') query = 'find dataset,dataset.tier,dataset.era where dataset.status like VALID*' params = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query} stream, _ = getdata(url, params, headers, verbose=verbose) records = [r for r in qlxml_parser(stream, 'dataset')] stream.close() data = {} size = 10 # size for POST request to Phedex for row in records: dataset = row['dataset'] if not data.has_key(dataset['dataset']): data[dataset['dataset']] = \ dict(era=dataset['dataset.era'], tier=dataset['dataset.tier']) if len(data.keys()) > size: for rec in dataset_info(urls, data): yield rec data = {} if data: for rec in dataset_info(urls, data): yield rec del records
def dataset_summary(dbs_url, getdata, dataset): """ Invoke DBS2/DBS3 call to get information about total number of filesi/blocks in a given dataset. """ expire = 600 # set some expire since we're not going to use it if which_dbs(dbs_url) == 'dbs2': # DBS2 call query = 'find count(file.name), count(block.name) where dataset=%s'\ % dataset dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \ 'query':query} headers = {'Accept': 'text/xml'} source, expire = getdata(dbs_url, dbs_args, expire, headers) prim_key = 'dataset' datasets = set() for row in qlxml_parser(source, prim_key): totfiles = row['dataset']['count_file.name'] totblocks = row['dataset']['count_block.name'] return totblocks, totfiles else: # we call filesummaries?dataset=dataset to get number of files/blks dbs_args = {'dataset': dataset} headers = {'Accept': 'application/json;text/json'} source, expire = getdata(dbs_url, dbs_args, expire, headers) for row in json_parser(source, None): totfiles = row[0]['num_file'] totblocks = row[0]['num_block'] return totblocks, totfiles
def test_xml_parser_4(self): """ Test functionality of xml_parser """ xmldata = """<?xml version='1.0' encoding='ISO-8859-1'?> <results> <row> <name>/c1.root</name> <size>1</size> </row> <row> <name>/c2.root</name> <size>2</size> </row> </results> """ fdesc = tempfile.NamedTemporaryFile() fname = fdesc.name stream = file(fname, 'w') stream.write(xmldata) stream.close() stream = file(fname, 'r') gen = qlxml_parser(stream, "file") result = [r for r in gen] expect = [{'file': {'name': '/c1.root', 'size': 1}}, {'file': {'name': '/c2.root', 'size': 2}}] self.assertEqual(expect, result)
def dbs_find(entity, url, kwds): "Find files for given set of parameters" if entity not in ['run', 'file', 'block']: msg = 'Unsupported entity key=%s' % entity raise Exception(msg) expire = 600 dataset = kwds.get('dataset', None) block = kwds.get('block', None) lfn = kwds.get('lfn', None) runs = kwds.get('runs', []) if not (dataset or block or lfn): return query = 'find %s' % entity if dataset: query += ' where dataset=%s' % dataset elif block: query += ' where block=%s' % block elif lfn: query += ' where file=%s' % lfn if runs: rcond = ' or '.join(['run=%s' % r for r in runs]) query += ' and (%s)' % rcond params = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query} headers = {'Accept': 'text/xml'} source, expire = \ getdata(url, params, headers, expire, ckey=CKEY, cert=CERT) pkey = entity for row in qlxml_parser(source, pkey): val = row[entity][entity] yield val
def datasets_dbs(self): """ Retrieve a list of DBS datasets (DBS2) """ query = "find dataset,dataset.status" params = {"api": "executeQuery", "apiversion": "DBS_2_0_9", "query": query} encoded_data = urllib.urlencode(params, doseq=True) url = self.dbs_url + "?" + encoded_data req = urllib2.Request(url) try: stream = urllib2.urlopen(req) except urllib2.HTTPError: msg = "Fail to contact %s" % url print dastimestamp("DAS ERROR"), msg raise Exception(msg) except Exception as exc: print_exc(exc) msg = "Fail to contact %s" % url print dastimestamp("DAS ERROR"), msg raise Exception(msg) gen = qlxml_parser(stream, "dataset") for row in gen: dataset = row["dataset"]["dataset"] rec = {"dataset": dataset} if self.write_hash: storage_query = { "fields": ["dataset"], "spec": [{"key": "dataset.name", "value": '"%s"' % dataset}], "instance": self.dbcoll, } rec.update({"qhash": genkey(storage_query)}) if row["dataset"]["dataset.status"] == "VALID": yield rec stream.close()
def runs_dbs2(url, dataset, ckey, cert): "Retrive list of run from DBS2 for a given dataset" query = "find run where dataset=%s" % dataset params = dict(api='executeQuery', apiversion='DBS_2_0_9', query=query) data, _ = getdata(url, params, ckey=ckey, cert=cert, system='combined') prim_key = 'run' for row in qlxml_parser(data, prim_key): run = row['run']['run'] yield run
def run_lumis_dbs2(url, dataset, ckey, cert): "Retrive list of run/lumis from DBS2 for a given dataset" query = "find run, lumi where dataset=%s" % dataset params = dict(api='executeQuery', apiversion='DBS_2_0_9', query=query) data, _ = getdata(url, params, ckey=ckey, cert=cert, system='combined') prim_key = 'run' res = {} # output result for row in qlxml_parser(data, prim_key): run = row['run']['run'] lumi = row['run']['lumi'] res.setdefault(run, []).append(lumi) return res
def datasets_dbs2(urls, verbose=0): """DBS2 implementation of datasets function""" headers = {'Accept':'application/xml;text/xml'} records = [] url = urls.get('dbs') query = \ 'find dataset,dataset.tier,dataset.era where dataset.status like VALID*' params = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query} stream, _ = getdata(url, params, headers, post=False, \ ckey=CKEY, cert=CERT, verbose=verbose, system='dbs') records = [r for r in qlxml_parser(stream, 'dataset')] stream.close() dbsdata = {} for row in records: dataset = row['dataset'] if dataset['dataset'] not in dbsdata: dbsdata[dataset['dataset']] = \ dict(era=dataset['dataset.era'], tier=dataset['dataset.tier'], status='VALID') for row in phedex_info(urls, dbsdata): yield row
def datasets_dbs(self): """ Retrieve a list of DBS datasets (DBS2) """ query = 'find dataset,dataset.status' params = {'api': 'executeQuery', 'apiversion': 'DBS_2_0_9', 'query':query} encoded_data = urllib.urlencode(params, doseq=True) url = self.dbs_url + '?' + encoded_data req = urllib2.Request(url) try: stream = urllib2.urlopen(req) except Exception as exc: print_exc(exc) msg = 'Fail to contact %s' % url raise Exception(msg) gen = qlxml_parser(stream, 'dataset') for row in gen: if row['dataset']['dataset.status'] == 'VALID': yield dict(dataset=row['dataset']['dataset']) stream.close()
def block_run_lumis(url, blocks, runs=None): """ Find block, run, lumi tuple for given set of files and (optional) runs. """ headers = {'Accept': 'text/xml'} urls = [] for blk in blocks: if not blk: continue query = 'find block,run,lumi where block=%s' % blk if runs and isinstance(runs, list): val = ' or '.join(['run=%s' % r for r in runs]) query += ' and (%s)' % val params = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query} dbs_url = url + '?' + urllib.urlencode(params) urls.append(dbs_url) if not urls: return gen = urlfetch_getdata(urls, CKEY, CERT, headers) prim_key = 'row' odict = {} # output dict for rec in gen: if 'error' in rec: error = rec.get('error') reason = rec.get('reason', '') print dastimestamp('DAS ERROR'), error, reason yield {'error': error, 'reason': reason} else: source = StringIO.StringIO(rec['data']) lumis = [] for row in qlxml_parser(source, prim_key): run = row['row']['run'] blk = row['row']['block'] lumi = row['row']['lumi'] key = (blk, run) odict.setdefault(key, []).append(lumi) for key, lumis in odict.iteritems(): blk, run = key yield blk, run, lumis
def test_xml_parser_3(self): """ Test functionality of xml_parser """ xmldata = """<?xml version='1.0' encoding='ISO-8859-1'?> <results> <row> <dataset>/a/b/c</dataset> <nblocks>25</nblocks> </row> </results> """ fdesc = tempfile.NamedTemporaryFile() fname = fdesc.name stream = file(fname, 'w') stream.write(xmldata) stream.close() stream = file(fname, 'r') gen = qlxml_parser(stream, "dataset") result = gen.next() expect = {'dataset': {'dataset':'/a/b/c', 'nblocks': 25}} self.assertEqual(expect, result)
def test_xml_parser_3(self): """ Test functionality of xml_parser """ xmldata = """<?xml version='1.0' encoding='ISO-8859-1'?> <results> <row> <dataset>/a/b/c</dataset> <nblocks>25</nblocks> </row> </results> """ fdesc = tempfile.NamedTemporaryFile() fname = fdesc.name stream = open(fname, 'w') stream.write(xmldata) stream.close() stream = open(fname, 'r') gen = qlxml_parser(stream, "dataset") result = next(gen) expect = {'dataset': {'dataset': '/a/b/c', 'nblocks': 25}} self.assertEqual(expect, result)
def dataset_summary(dbs_url, dataset): """ Invoke DBS2/DBS3 call to get information about total number of filesi/blocks in a given dataset. """ expire = 600 # set some expire since we're not going to use it if which_dbs(dbs_url) == 'dbs': # DBS2 call query = 'find count(file.name), count(block.name)' query += ' where dataset=%s and dataset.status=*' % dataset dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \ 'query':query} headers = {'Accept': 'text/xml'} source, expire = \ getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT, system='dbs') prim_key = 'dataset' for row in qlxml_parser(source, prim_key): if 'dataset' in row: totfiles = row['dataset']['count_file.name'] totblocks = row['dataset']['count_block.name'] return totblocks, totfiles elif 'error' in row: raise Exception(row.get('reason', row['error'])) # if we're here we didn't find a dataset, throw the error msg = 'empty set' raise Exception(msg) else: # we call filesummaries?dataset=dataset to get number of files/blks dbs_url += '/filesummaries' dbs_args = {'dataset': dataset} headers = {'Accept': 'application/json;text/json'} source, expire = \ getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT, system='dbs3') for row in json_parser(source, None): totfiles = row[0]['num_file'] totblocks = row[0]['num_block'] return totblocks, totfiles
def dbs_dataset4site_release(dbs_url, getdata, release): expire = 600 # set some expire since we're not going to use it if which_dbs(dbs_url) == 'dbs2': # in DBS3 I'll use datasets API and pass release over there query = 'find dataset where release=%s' % release dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \ 'query':query} headers = {'Accept': 'text/xml'} source, expire = getdata(dbs_url, dbs_args, expire, headers) prim_key = 'dataset' datasets = set() for row in qlxml_parser(source, prim_key): dataset = row['dataset']['dataset'] yield dataset else: # we call datasets?release=release to get list of datasets dbs_args = \ {'release_version': release, 'dataset_access_type':'PRODUCTION'} headers = {'Accept': 'application/json;text/json'} source, expire = getdata(dbs_url, dbs_args, expire, headers) for rec in json_parser(source, None): for row in rec: yield row['dataset']
def test_xml_parser_4(self): """ Test functionality of xml_parser """ xmldata = """<?xml version='1.0' encoding='ISO-8859-1'?> <results> <row> <name>/c1.root</name> <size>1</size> </row> <row> <name>/c2.root</name> <size>2</size> </row> </results> """ fdesc = tempfile.NamedTemporaryFile() fname = fdesc.name stream = open(fname, 'w') stream.write(xmldata) stream.close() stream = open(fname, 'r') gen = qlxml_parser(stream, "file") result = [r for r in gen] expect = [{ 'file': { 'name': '/c1.root', 'size': 1 } }, { 'file': { 'name': '/c2.root', 'size': 2 } }] self.assertEqual(expect, result)
def test_xml_parser_5(self): """ Test functionality of xml_parser """ xmldata = """<?xml version='1.0' standalone='yes'?> <!-- DBS Version 1 --> <dbs> <userinput> <input>find dataset where tier=*GEN* and primds=ZJetToEE_Pt* </input> <timeStamp>Mon Feb 07 19:51:59 CET 2011</timeStamp> </userinput> <java_query> <sql>GROUP BY PATH</sql> <bp>%GEN%</bp> <bp>ZJetToEE_Pt%</bp> </java_query> <python_query> <sql>SELECT PATH AS PATH,</sql> <bindparams><p0>%GEN%</p0> <p1>ZJetToEE_Pt%</p1> </bindparams> </python_query> <count_query> <sql> SELECT COUNT(*) AS CNT FROM </sql> <bindparams> <p0>%GEN%</p0> <p1>ZJetToEE_Pt%</p1> </bindparams> </count_query> <results>""" suffix = """ </results> <SUCCESS/> </dbs> """ row = """<row> <dataset>/ZJetToEE_Pt_80to120_TuneZ2_7TeV_pythia6/</dataset> <sum_block.numfiles>%d</sum_block.numfiles> <sum_block.numevents>110000</sum_block.numevents> <count_block>1</count_block> <sum_block.size>61942523513</sum_block.size> </row>""" for i in range(200): xmldata = xmldata + row % i xmldata = xmldata + suffix fdesc = tempfile.NamedTemporaryFile() fname = fdesc.name stream = file(fname, 'w') stream.write(xmldata) stream.close() stream = file(fname, 'r') gen = qlxml_parser(stream, "dataset") expect = {'dataset': {'sum_block.numfiles': 12, 'count_block': 1, 'sum_block.numevents': 110000, 'sum_block.size': 61942523513, 'dataset': '/ZJetToEE_Pt_80to120_TuneZ2_7TeV_pythia6/'}} count = 0 for r in gen: expect['dataset']['sum_block.numfiles'] = count self.assertEqual(expect, r) count = count + 1 self.assertEqual(200, count)
def test_xml_parser_5(self): """ Test functionality of xml_parser """ xmldata = """<?xml version='1.0' standalone='yes'?> <!-- DBS Version 1 --> <dbs> <userinput> <input>find dataset where tier=*GEN* and primds=ZJetToEE_Pt* </input> <timeStamp>Mon Feb 07 19:51:59 CET 2011</timeStamp> </userinput> <java_query> <sql>GROUP BY PATH</sql> <bp>%GEN%</bp> <bp>ZJetToEE_Pt%</bp> </java_query> <python_query> <sql>SELECT PATH AS PATH,</sql> <bindparams><p0>%GEN%</p0> <p1>ZJetToEE_Pt%</p1> </bindparams> </python_query> <count_query> <sql> SELECT COUNT(*) AS CNT FROM </sql> <bindparams> <p0>%GEN%</p0> <p1>ZJetToEE_Pt%</p1> </bindparams> </count_query> <results>""" suffix = """ </results> <SUCCESS/> </dbs> """ row = """<row> <dataset>/ZJetToEE_Pt_80to120_TuneZ2_7TeV_pythia6/</dataset> <sum_block.numfiles>%d</sum_block.numfiles> <sum_block.numevents>110000</sum_block.numevents> <count_block>1</count_block> <sum_block.size>61942523513</sum_block.size> </row>""" for i in range(200): xmldata = xmldata + row % i xmldata = xmldata + suffix fdesc = tempfile.NamedTemporaryFile() fname = fdesc.name stream = open(fname, 'w') stream.write(xmldata) stream.close() stream = open(fname, 'r') gen = qlxml_parser(stream, "dataset") expect = { 'dataset': { 'sum_block.numfiles': 12, 'count_block': 1, 'sum_block.numevents': 110000, 'sum_block.size': 61942523513, 'dataset': '/ZJetToEE_Pt_80to120_TuneZ2_7TeV_pythia6/' } } count = 0 for r in gen: expect['dataset']['sum_block.numfiles'] = count self.assertEqual(expect, r) count = count + 1 self.assertEqual(200, count)
def parser(self, dasquery, dformat, source, api): """ DBS data-service parser. """ sitedb = SERVICES.get('sitedb2', None) # look-up SiteDB from global scope query = dasquery.mongo_query if api == 'listBlocks': prim_key = 'block' elif api == 'listBlocks4path': api = 'listBlocks' prim_key = 'block' elif api == 'listBlockProvenance': prim_key = 'block' elif api == 'listBlockProvenance4child': prim_key = 'block' elif api == 'listFiles': prim_key = 'file' elif api == 'listLFNs': prim_key = 'file_lfn' elif api == 'listFileLumis': prim_key = 'file_lumi_section' elif api == 'listFileProcQuality': prim_key = 'file_proc_quality' elif api == 'listFileParents': prim_key = 'file_parent' elif api == 'listTiers': prim_key = 'data_tier' elif api == 'listDatasetParents': prim_key = 'processed_dataset_parent' elif api == 'listPrimaryDatasets': prim_key = 'primary_dataset' elif api == 'listProcessedDatasets': prim_key = 'processed_dataset' elif api == 'fakeReleases': prim_key = 'release' elif api == 'listRuns': prim_key = 'run' elif api == 'fakeRelease4File': prim_key = 'release' elif api == 'fakeRelease4Dataset': prim_key = 'release' elif api == 'fakeGroup4Dataset': prim_key = 'group' elif api == 'fakeConfig': prim_key = 'config' elif api == 'fakeListDataset4Block': prim_key = 'dataset' elif api == 'fakeListDataset4File': prim_key = 'dataset' elif api == 'fakeListDatasetbyDate': prim_key = 'dataset' elif api == 'fakeDatasetSummary': prim_key = 'dataset' elif api == 'fakeDataset4Run': prim_key = 'dataset' elif api == 'fakeRun4File': prim_key = 'run' elif api == 'fakeRun4Run': prim_key = 'run' elif api == 'fakeChild4File': prim_key = 'child' elif api == 'fakeChild4Dataset': prim_key = 'child' elif api == 'fakeSite4Dataset': prim_key = 'site' elif api == 'fakeStatus': prim_key = 'status' elif api == 'fakeFiles4DatasetRunLumis': prim_key = 'file' elif api == 'fakeRun4Block': prim_key = 'run' elif api == 'fakeBlock4DatasetRun': prim_key = 'block' elif api == 'fakeSite4Dataset': prim_key = 'site' else: msg = 'DBSService::parser, unsupported %s API %s' \ % (self.name, api) raise Exception(msg) if api.find('fake') != -1: gen = qlxml_parser(source, prim_key) else: gen = xml_parser(source, prim_key) useless_run_atts = ['number_of_events', 'number_of_lumi_sections', \ 'id', 'total_luminosity', 'store_number', 'end_of_run', \ 'start_of_run'] config_attrs = ['config.name', 'config.content', 'config.version', \ 'config.type', 'config.annotation', 'config.createdate', \ 'config.createby', 'config.moddate', 'config.modby'] for row in gen: if not row: continue if row.has_key('status') and \ row['status'].has_key('dataset.status'): row['status']['name'] = row['status']['dataset.status'] del row['status']['dataset.status'] if row.has_key('file_lumi_section'): row['lumi'] = row['file_lumi_section'] del row['file_lumi_section'] if row.has_key('algorithm'): del row['algorithm']['ps_content'] if row.has_key('processed_dataset') and \ row['processed_dataset'].has_key('path'): if isinstance(row['processed_dataset']['path'], dict) \ and row['processed_dataset']['path'].has_key('dataset_path'): path = row['processed_dataset']['path']['dataset_path'] del row['processed_dataset']['path'] row['processed_dataset']['name'] = path # case for fake apis # remove useless attribute from results if row.has_key('dataset'): if row['dataset'].has_key('count_file.size'): del row['dataset']['count_file.size'] if row['dataset'].has_key('dataset'): name = row['dataset']['dataset'] del row['dataset']['dataset'] row['dataset']['name'] = name if row.has_key('child') and row['child'].has_key('dataset.child'): row['child']['name'] = row['child']['dataset.child'] del row['child']['dataset.child'] if row.has_key('child') and row['child'].has_key('file.child'): row['child']['name'] = row['child']['file.child'] del row['child']['file.child'] if row.has_key('block') and query.get('fields') == ['parent']: row['parent'] = row['block'] del row['block'] if row.has_key('block') and query.get('fields') == ['child']: row['child'] = row['block'] del row['block'] if row.has_key('run') and row['run'].has_key('run'): row['run']['run_number'] = row['run']['run'] del row['run']['run'] if row.has_key('release') and row['release'].has_key('release'): row['release']['name'] = row['release']['release'] del row['release']['release'] if row.has_key('site'): row['site']['se'] = row['site']['site'] del row['site']['site'] convert_dot(row, 'config', config_attrs) convert_dot(row, 'file', ['file.name']) convert_dot(row, 'block', ['block.name']) convert_dot(row, 'dataset', ['dataset.tag', 'dataset.status']) # remove DBS2 run attributes (to be consistent with DBS3 output) # and let people extract this info from CondDB/LumiDB. if row.has_key('run'): for att in useless_run_atts: try: del row['run'][att] except: pass if api == 'fakeSite4Dataset' and sitedb: site = row.get('site', None) if site and isinstance(site, dict): sename = site.get('se', None) info = sitedb.site_info(sename) if info: row['site'].update(info) yield row
def summary4dataset_run(url, kwds): "Helper function to deal with summary dataset=/a/b/c requests" urls = [] cond = '' val = kwds.get('run', 'optional') if val != 'optional': if isinstance(val, dict): min_run = 0 max_run = 0 if '$lte' in val: max_run = val['$lte'] if '$gte' in val: min_run = val['$gte'] if min_run and max_run: val = "run >=%s and run <= %s" % (min_run, max_run) elif '$in' in val: val = ' or '.join(['run=%s' % r for r in val['$in']]) val = '(%s)' % val elif isinstance(val, int): val = "run=%d" % val cond += ' and %s' % val val = kwds.get('dataset', None) if val and val != 'optional': cond += ' and dataset=%s' % val val = kwds.get('block', None) if val and val != 'optional': cond += ' and block=%s' % val query = "find file, file.size, file.numevents where " + cond[4:] params = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query} url1 = url + '?' + urllib.urlencode(params) urls.append(url1) query = "find run, count(lumi) where " + cond[4:] params = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query} url2 = url + '?' + urllib.urlencode(params) urls.append(url2) headers = {'Accept': 'text/xml'} gen = urlfetch_getdata(urls, CKEY, CERT, headers) tot_size = 0 tot_evts = 0 tot_lumis = 0 tot_files = 0 for rec in gen: if 'error' in rec: error = rec.get('error') reason = rec.get('reason', '') srec = {'summary':'', 'error':error, 'reason':reason} yield srec url = rec['url'] data = rec['data'] stream = StringIO.StringIO(data) if url.find('file') != -1: prim_key = 'file' else: prim_key = 'run' for row in qlxml_parser(stream, prim_key): if prim_key == 'file': fdata = row['file'] tot_size += fdata['file.size'] tot_evts += fdata['file.numevents'] tot_files += 1 else: fdata = row['run'] tot_lumis += fdata['count_lumi'] srec = {'summary': {'file_size':tot_size, 'nevents':tot_evts, 'nlumis':tot_lumis, 'nfiles': tot_files}} yield srec