def test_DotDict(self): """Test DotDict class""" res = {u'zip' : {u'code':u'14850'}} mdict = DotDict(res) mdict['zip.code'] = 14850 expect = {u'zip' : {u'code':14850}} self.assertEqual(expect, mdict) res = {'a':{'b':{'c':10}, 'd':10}} mdict = DotDict(res) mdict['x.y.z'] = 10 expect = {'a':{'b':{'c':10}, 'd':10}, 'x':{'y':{'z':10}}} self.assertEqual(expect, mdict) mdict['a.b.k.m'] = 10 expect = {'a':{'b':{'c':10, 'k':{'m':10}}, 'd':10}, 'x':{'y':{'z':10}}} self.assertEqual(expect, mdict) expect = 10 result = mdict.get('a.b.k.m') self.assertEqual(expect, result) res = {'a':{'b':{'c':10}, 'd':[{'x':1}, {'x':2}]}} mdict = DotDict(res) expect = 1 result = mdict.get('a.d.x') self.assertEqual(expect, result) expect = None result = mdict.get('a.M.Z') self.assertEqual(expect, result) res = {'a': {'b': {'c':1, 'd':2}}} mdict = DotDict(res) expect = {'a': {'b': {'c':1}}} mdict.delete('a.b.d') self.assertEqual(expect, mdict)
def test_DotDict_list(self): """Test DotDict class""" res = {'a':[{'b':1, 'c':1}, {'c':1}]} mdict = DotDict(res) expect = 1 result = mdict.get('a.b') self.assertEqual(expect, result) res = {'a':[{'c':1}, {'b':1, 'c':1}]} mdict = DotDict(res) expect = 1 result = mdict.get('a.b') self.assertEqual(expect, result)
def test_DotDict_list(self): """Test DotDict class""" res = {'a': [{'b': 1, 'c': 1}, {'c': 1}]} mdict = DotDict(res) expect = 1 result = mdict.get('a.b') self.assertEqual(expect, result) res = {'a': [{'c': 1}, {'b': 1, 'c': 1}]} mdict = DotDict(res) expect = 1 result = mdict.get('a.b') self.assertEqual(expect, result)
def test_DotDict(self): """Test DotDict class""" res = {u'zip': {u'code': u'14850'}} mdict = DotDict(res) mdict['zip.code'] = 14850 expect = {u'zip': {u'code': 14850}} self.assertEqual(expect, mdict) res = {'a': {'b': {'c': 10}, 'd': 10}} mdict = DotDict(res) mdict['x.y.z'] = 10 expect = {'a': {'b': {'c': 10}, 'd': 10}, 'x': {'y': {'z': 10}}} self.assertEqual(expect, mdict) mdict['a.b.k.m'] = 10 expect = { 'a': { 'b': { 'c': 10, 'k': { 'm': 10 } }, 'd': 10 }, 'x': { 'y': { 'z': 10 } } } self.assertEqual(expect, mdict) expect = 10 result = mdict.get('a.b.k.m') self.assertEqual(expect, result) res = {'a': {'b': {'c': 10}, 'd': [{'x': 1}, {'x': 2}]}} mdict = DotDict(res) expect = 1 result = mdict.get('a.d.x') self.assertEqual(expect, result) expect = None result = mdict.get('a.M.Z') self.assertEqual(expect, result) res = {'a': {'b': {'c': 1, 'd': 2}}} mdict = DotDict(res) expect = {'a': {'b': {'c': 1}}} mdict.delete('a.b.d') self.assertEqual(expect, mdict)
def test_get(self): """test get method""" rec = DotDict(self.rec1) expect = [1,2] result = rec.get('a.c') self.assertEqual(expect, result) self.assertEqual(expect, rec['a.c'])
def test_get(self): """test get method""" rec = DotDict(self.rec1) expect = [1, 2] result = rec.get('a.c') self.assertEqual(expect, result) self.assertEqual(expect, rec['a.c'])
def phedex_files(phedex_url, kwds): "Get file information from Phedex" params = dict(kwds) # parameters to be send to Phedex site = kwds.get('site', None) if site and phedex_node_pattern.match(site): if not site.endswith('*'): # this will account to look-up site names w/o _Buffer or _MSS site += '*' params.update({'node': site}) params.pop('site') elif site and se_pattern.match(site): params.update({'se': site}) params.pop('site') else: return expire = 600 # set some expire since we're not going to use it headers = {'Accept': 'text/xml'} source, expire = \ getdata(phedex_url, params, headers, expire, ckey=CKEY, cert=CERT, system='phedex') tags = 'block.file.name' prim_key = 'block' for rec in xml_parser(source, prim_key, tags): ddict = DotDict(rec) files = ddict.get('block.file') if not isinstance(files, list): files = [files] for row in files: yield row['name']
def filter_with_filters(rows, filters): """ Filter given rows with provided set of filters. """ for row in rows: ddict = DotDict(row) flist = [(f, ddict.get(f)) for f in filters] for idx in flist: yield idx
def site4dataset(dbs_url, phedex_api, args, expire): "Yield site information about given dataset" # DBS part dataset = args['dataset'] try: totblocks, totfiles = dataset_summary(dbs_url, dataset) except Exception as err: error = str(err) reason = "Can't find #block, #files info in DBS for dataset=%s" \ % dataset yield {'site': {'error': error, 'reason': reason}} return # Phedex part phedex_args = {'dataset':args['dataset']} headers = {'Accept': 'text/xml'} source, expire = \ getdata(phedex_api, phedex_args, headers, expire, post=True, system='phedex') prim_key = 'block' tags = 'block.replica.node' site_info = {} for rec in xml_parser(source, prim_key, tags): ddict = DotDict(rec) replicas = ddict.get('block.replica') if not isinstance(replicas, list): replicas = [replicas] for row in replicas: if not row or 'node' not in row: continue node = row['node'] files = int(row['files']) complete = 1 if row['complete'] == 'y' else 0 if node in site_info: files = site_info[node]['files'] + files nblks = site_info[node]['blocks'] + 1 bc_val = site_info[node]['blocks_complete'] b_complete = bc_val+1 if complete else bc_val else: b_complete = 1 if complete else 0 nblks = 1 site_info[node] = {'files': files, 'blocks': nblks, 'blocks_complete': b_complete} row = {} for key, val in site_info.iteritems(): if totfiles: nfiles = '%5.2f%%' % (100*float(val['files'])/totfiles) else: nfiles = 'N/A' if totblocks: nblks = '%5.2f%%' % (100*float(val['blocks'])/totblocks) else: nblks = 'N/A' ratio = float(val['blocks_complete'])/val['blocks'] b_completion = '%5.2f%%' % (100*ratio) row = {'site':{'name':key, 'dataset_fraction': nfiles, 'block_fraction': nblks, 'block_completion': b_completion}} yield row
def set_misses(self, dasquery, api, genrows): """ Check and adjust DAS records wrt input query. If some of the DAS keys are missing, add it with its value to the DAS record. """ # look-up primary key prim_key = self.dasmapping.primary_key(self.name, api) # Scan all docs and store those whose size above MongoDB limit into # GridFS map_key = self.dasmapping.primary_mapkey(self.name, api) genrows = parse2gridfs(self.gfs, map_key, genrows, self.logger) spec = dasquery.mongo_query['spec'] row = next(genrows) ddict = DotDict(row) keys2adjust = [] for key in spec.keys(): val = ddict.get(key) if spec[key] != val and key not in keys2adjust: keys2adjust.append(key) msg = "adjust keys %s" % keys2adjust self.logger.debug(msg) count = 0 if keys2adjust: # adjust of the rows for row in yield_rows(row, genrows): ddict = DotDict(row) pval = ddict.get(map_key) if isinstance(pval, dict) and 'error' in pval: ddict[map_key] = '' ddict.update({prim_key: pval}) for key in keys2adjust: value = spec[key] existing_value = ddict.get(key) # the way to deal with proximity/patern/condition results if (isinstance(value, str) or isinstance(value, unicode))\ and value.find('*') != -1: # we got pattern if existing_value: value = existing_value elif isinstance(value, dict) or \ isinstance(value, list): # we got condition if existing_value: value = existing_value elif isinstance(value, dict) and \ '$in' in value: # we got a range {'$in': []} value = value['$in'] elif isinstance(value, dict) and \ '$lte' in value and '$gte' in value: # we got a between range value = [value['$gte'], value['$lte']] else: value = json.dumps(value) elif existing_value and value != existing_value: # we got proximity results if 'proximity' in ddict: proximity = DotDict({key:existing_value}) ddict['proximity'].update(proximity) else: proximity = DotDict({}) proximity[key] = existing_value ddict['proximity'] = proximity else: if existing_value: value = existing_value ddict[key] = value yield ddict count += 1 else: yield row for row in genrows: yield row count += 1 msg = "yield %s rows" % count self.logger.debug(msg)
def helper(self, api, args, expire): """ Class helper function which yields results for given set of input parameters. It yeilds the data record which must contain combined attribute corresponding to systems used to produce record content. """ dbs_url = self.map[api]['services'][self.dbs] phedex_url = self.map[api]['services']['phedex'] # make phedex_api from url, but use xml version for processing phedex_api = phedex_url.replace('/json/', '/xml/') + '/blockReplicas' if api == 'dataset4site_release' or \ api == 'dataset4site_release_parent' or \ api == 'child4site_release_dataset': # DBS part datasets = set() release = args['release'] parent = args.get('parent', None) for row in dbs_dataset4release_parent(dbs_url, release, parent): datasets.add(row) # Phedex part if args['site'].find('.') != -1: # it is SE phedex_args = {'dataset':list(datasets), 'se': '%s' % args['site']} else: phedex_args = {'dataset':list(datasets), 'node': '%s*' % args['site']} headers = {'Accept': 'text/xml'} source, expire = \ getdata(phedex_api, phedex_args, headers, expire, system='phedex') prim_key = 'block' tags = 'block.replica.node' found = {} for rec in xml_parser(source, prim_key, tags): ddict = DotDict(rec) block = ddict.get('block.name') bbytes = ddict.get('block.bytes') files = ddict.get('block.files') found_dataset = block.split('#')[0] if found_dataset in found: val = found[found_dataset] found[found_dataset] = {'bytes': val['bytes'] + bbytes, 'files': val['files'] + files} else: found[found_dataset] = {'bytes': bbytes, 'files': files} for name, val in found.items(): record = dict(name=name, size=val['bytes'], files=val['files']) if api == 'child4site_release_dataset': yield {'child': record} else: yield {'dataset':record} del datasets del found if api == 'site4block': pass if api == 'site4dataset': try: gen = site4dataset(dbs_url, phedex_api, args, expire) for row in gen: sname = row.get('site', {}).get('name', '') skind = self.site_info(phedex_url, sname) row['site'].update({'kind':skind}) yield row except Exception as err: print_exc(err) tstamp = dastimestamp('') msg = tstamp + ' Exception while processing DBS/Phedex info:' msg += str(err) row = {'site':{'name':'Fail to look-up site info', 'error':msg, 'dataset_fraction': 'N/A', 'block_fraction':'N/A', 'block_completion':'N/A'}, 'error': msg} yield row if api == 'files4dataset_runs_site' or \ api == 'files4block_runs_site': run_value = args.get('run', []) if isinstance(run_value, dict) and '$in' in run_value: runs = run_value['$in'] elif isinstance(run_value, list): runs = run_value else: if int_number_pattern.match(str(run_value)): runs = [run_value] else: runs = [] args.update({'runs': runs}) files = dbs_find('file', dbs_url, args) site = args.get('site') phedex_api = phedex_url.replace('/json/', '/xml/') + '/fileReplicas' for fname in files4site(phedex_api, files, site): yield {'file':{'name':fname}}
def helper(self, api, args, expire): """ Class helper function which yields results for given set of input parameters. It yeilds the data record which must contain combined attribute corresponding to systems used to produce record content. """ dbs_url = self.map[api]['services'][self.dbs] phedex_url = self.map[api]['services']['phedex'] # make phedex_api from url, but use xml version for processing phedex_api = phedex_url.replace('/json/', '/xml/') + '/blockReplicas' if api == 'dataset4site_release' or \ api == 'dataset4site_release_parent' or \ api == 'child4site_release_dataset': # DBS part datasets = set() release = args['release'] parent = args.get('parent', None) for row in dbs_dataset4release_parent(dbs_url, release, parent): datasets.add(row) # Phedex part if args['site'].find('.') != -1: # it is SE phedex_args = { 'dataset': list(datasets), 'se': '%s' % args['site'] } else: phedex_args = { 'dataset': list(datasets), 'node': '%s*' % args['site'] } headers = {'Accept': 'text/xml'} source, expire = \ getdata(phedex_api, phedex_args, headers, expire, system='phedex') prim_key = 'block' tags = 'block.replica.node' found = {} for rec in xml_parser(source, prim_key, tags): ddict = DotDict(rec) block = ddict.get('block.name') bbytes = ddict.get('block.bytes') files = ddict.get('block.files') found_dataset = block.split('#')[0] if found_dataset in found: val = found[found_dataset] found[found_dataset] = { 'bytes': val['bytes'] + bbytes, 'files': val['files'] + files } else: found[found_dataset] = {'bytes': bbytes, 'files': files} for name, val in found.items(): record = dict(name=name, size=val['bytes'], files=val['files']) if api == 'child4site_release_dataset': yield {'child': record} else: yield {'dataset': record} del datasets del found if api == 'site4dataset': try: gen = site4dataset(dbs_url, phedex_api, args, expire) for row in gen: sname = row.get('site', {}).get('name', '') skind = self.site_info(phedex_url, sname) row['site'].update({'kind': skind}) yield row except Exception as err: print_exc(err) tstamp = dastimestamp('') msg = tstamp + ' Exception while processing DBS/Phedex info:' msg += str(err) row = { 'site': { 'name': 'Fail to look-up site info', 'error': msg, 'dataset_fraction': 'N/A', 'block_fraction': 'N/A', 'block_completion': 'N/A' }, 'error': msg } yield row if api == 'files4dataset_runs_site' or \ api == 'files4block_runs_site': run_value = args.get('run', []) if isinstance(run_value, dict) and '$in' in run_value: runs = run_value['$in'] elif isinstance(run_value, list): runs = run_value else: if int_number_pattern.match(str(run_value)): runs = [run_value] else: runs = [] args.update({'runs': runs}) files = dbs_find('file', dbs_url, args) site = args.get('site') phedex_api = phedex_url.replace('/json/', '/xml/') + '/fileReplicas' for fname in files4site(phedex_api, files, site): yield {'file': {'name': fname}}
def site4dataset(dbs_url, phedex_api, args, expire): "Yield site information about given dataset" # DBS part dataset = args['dataset'] try: totblocks, totfiles = dataset_summary(dbs_url, dataset) except Exception as err: error = 'combined service unable to process your request' reason = "Fail to parse #block, #files info, %s" % str(err) yield { 'site': { 'name': 'N/A', 'se': 'N/A', 'error': error, 'reason': reason } } return # Phedex part phedex_args = {'dataset': args['dataset']} headers = {'Accept': 'text/xml'} source, expire = \ getdata(phedex_api, phedex_args, headers, expire, system='phedex') prim_key = 'block' tags = 'block.replica.node' site_info = {} for rec in xml_parser(source, prim_key, tags): ddict = DotDict(rec) replicas = ddict.get('block.replica') if not isinstance(replicas, list): replicas = [replicas] for row in replicas: if not row or 'node' not in row: continue node = row['node'] files = int(row['files']) complete = 1 if row['complete'] == 'y' else 0 if node in site_info: files = site_info[node]['files'] + files nblks = site_info[node]['blocks'] + 1 bc_val = site_info[node]['blocks_complete'] b_complete = bc_val + 1 if complete else bc_val else: b_complete = 1 if complete else 0 nblks = 1 site_info[node] = { 'files': files, 'blocks': nblks, 'blocks_complete': b_complete } row = {} for key, val in site_info.items(): if totfiles: nfiles = '%5.2f%%' % (100 * float(val['files']) / totfiles) else: nfiles = 'N/A' if totblocks: nblks = '%5.2f%%' % (100 * float(val['blocks']) / totblocks) else: nblks = 'N/A' ratio = float(val['blocks_complete']) / val['blocks'] b_completion = '%5.2f%%' % (100 * ratio) row = { 'site': { 'name': key, 'dataset_fraction': nfiles, 'block_fraction': nblks, 'block_completion': b_completion } } yield row
def helper(self, url, api, args, expire): """ Class helper function which yields results for given set of input parameters. It yeilds the data record which must contain combined attribute corresponding to systems used to produce record content. """ dbs_url = url['dbs'] phedex_url = url['phedex'] if api == 'combined_dataset4site_release': # DBS part datasets = set() for row in dbs_dataset4site_release(dbs_url, self.getdata, args['release']): datasets.add(row) # Phedex part if args['site'].find('.') != -1: # it is SE phedex_args = {'dataset':list(datasets), 'se': '%s' % args['site']} else: phedex_args = {'dataset':list(datasets), 'node': '%s*' % args['site']} headers = {'Accept': 'text/xml'} source, expire = \ self.getdata(phedex_url, phedex_args, expire, headers, post=True) prim_key = 'block' tags = 'block.replica.node' found = {} for rec in xml_parser(source, prim_key, tags): ddict = DotDict(rec) block = ddict.get('block.name') bbytes = ddict.get('block.bytes') files = ddict.get('block.files') found_dataset = block.split('#')[0] if found.has_key(found_dataset): val = found[found_dataset] found[found_dataset] = {'bytes': val['bytes'] + bbytes, 'files': val['files'] + files} else: found[found_dataset] = {'bytes': bbytes, 'files': files} for name, val in found.iteritems(): record = dict(name=name, size=val['bytes'], files=val['files'], combined=['dbs', 'phedex']) yield {'dataset':record} del datasets del found if api == 'combined_site4dataset': # DBS part dataset = args['dataset'] totblocks, totfiles = \ dataset_summary(dbs_url, self.getdata, dataset) # Phedex part phedex_args = {'dataset':args['dataset']} headers = {'Accept': 'text/xml'} source, expire = \ self.getdata(phedex_url, phedex_args, expire, headers, post=True) prim_key = 'block' tags = 'block.replica.node' found = {} site_info = {} for rec in xml_parser(source, prim_key, tags): ddict = DotDict(rec) replicas = ddict.get('block.replica') if not isinstance(replicas, list): replicas = [replicas] for row in replicas: if not row or not row.has_key('node'): continue node = row['node'] files = int(row['files']) complete = 1 if row['complete'] == 'y' else 0 if site_info.has_key(node): files = site_info[node]['files'] + files nblks = site_info[node]['blocks'] + 1 bc_val = site_info[node]['blocks_complete'] b_complete = bc_val+1 if complete else bc_val else: b_complete = 1 if complete else 0 nblks = 1 site_info[node] = {'files': files, 'blocks': nblks, 'blocks_complete': b_complete} row = {} for key, val in site_info.iteritems(): if totfiles: nfiles = '%5.2f%%' % (100*float(val['files'])/totfiles) else: nfiles = 'N/A' if totblocks: nblks = '%5.2f%%' % (100*float(val['blocks'])/totblocks) else: nblks = 'N/A' ratio = float(val['blocks_complete'])/val['blocks'] b_completion = '%5.2f%%' % (100*ratio) row = {'site':{'name':key, 'dataset_fraction': nfiles, 'block_fraction': nblks, 'block_completion': b_completion}} yield row
def set_misses(self, dasquery, api, genrows): """ Check and adjust DAS records wrt input query. If some of the DAS keys are missing, add it with its value to the DAS record. """ # look-up primary key prim_key = self.dasmapping.primary_key(self.name, api) # Scan all docs and store those whose size above MongoDB limit into # GridFS map_key = self.dasmapping.primary_mapkey(self.name, api) genrows = parse2gridfs(self.gfs, map_key, genrows, self.logger) spec = dasquery.mongo_query['spec'] row = next(genrows) ddict = DotDict(row) keys2adjust = [] for key in spec.keys(): val = ddict.get(key) if spec[key] != val and key not in keys2adjust: keys2adjust.append(key) msg = "adjust keys %s" % keys2adjust self.logger.debug(msg) count = 0 if keys2adjust: # adjust of the rows for row in yield_rows(row, genrows): ddict = DotDict(row) pval = ddict.get(map_key) if isinstance(pval, dict) and 'error' in pval: ddict[map_key] = '' ddict.update({prim_key: pval}) for key in keys2adjust: value = spec[key] existing_value = ddict.get(key) # the way to deal with proximity/patern/condition results if (isinstance(value, str) or isinstance(value, unicode))\ and value.find('*') != -1: # we got pattern if existing_value: value = existing_value elif isinstance(value, dict) or \ isinstance(value, list): # we got condition if existing_value: value = existing_value elif isinstance(value, dict) and \ '$in' in value: # we got a range {'$in': []} value = value['$in'] elif isinstance(value, dict) and \ '$lte' in value and '$gte' in value: # we got a between range value = [value['$gte'], value['$lte']] else: value = json.dumps(value) elif existing_value and value != existing_value: # we got proximity results if 'proximity' in ddict: proximity = DotDict({key: existing_value}) ddict['proximity'].update(proximity) else: proximity = DotDict({}) proximity[key] = existing_value ddict['proximity'] = proximity else: if existing_value: value = existing_value ddict[key] = value yield ddict count += 1 else: yield row for row in genrows: yield row count += 1 msg = "yield %s rows" % count self.logger.debug(msg)