Python urlfetch_getdata Examples, DAS.utils.urlfetch_pycurl.urlfetch_getdata Python Examples

Example #1

0

Show file

File: reqmgr_service.py Project: ktf/DAS

def configs(url, args, verbose=False):
    """Find config info in ReqMgr"""
    headers = {'Accept': 'application/json;text/json'}
    dataset = args.get('dataset', None)
    if  not dataset:
        return
    base = 'https://%s' % url.split('/')[2]
    ids  = findReqMgrIds(dataset, base, verbose)
    # probe to find configs in showWorkload
    urls = ['%s/reqmgr/view/showWorkload?requestName=%s' % (base, i) for i in ids]
    gen  = urlfetch_getdata(urls, CKEY, CERT, headers)
    config_urls = []
    for row in gen:
        if  'error' not in row:
            for line in row['data'].split('\n'):
                if  line.rfind("/configFile") != -1:
                    cfg = line.split('=')[-1].strip()
                    cfg = cfg.replace('<br/>', '').replace("'",'')
                    config_urls.append(cfg)
    if  config_urls:
        urls = config_urls
    else:
        urls = ['%s/%s/configFile' % (url, i) for i in ids]
    gen  = urlfetch_getdata(urls, CKEY, CERT, headers)
    for row in gen:
        if  'error' in row:
            error  = row.get('error')
            reason = row.get('reason', '')
            yield {'error':error, 'reason':reason}
        else:
            config = {'data':row['data'], 'dataset':dataset, 'name':'ReqMgr',
                      'ids': ids, 'urls': urls}
            yield {'config':config}

Example #2

0

Show file

File: combined_service.py Project: dmwm/DAS

def files4site(phedex_url, files, site):
    "Find site for given files"

    params = {}
    if  site and phedex_node_pattern.match(site):
        if  not site.endswith('*'):
            # this will account to look-up site names w/o _Buffer or _MSS
            site += '*'
        params.update({'node': site})
    elif site and se_pattern.match(site):
        params.update({'se': site})
    else:
        return
    sname = urllib.urlencode(params)
    urls = []
    for fname in files:
        url = '%s?lfn=%s&%s' % (phedex_url, fname, sname)
        urls.append(url)
    tags = 'block.replica.node'
    prim_key = 'block'
    gen = urlfetch_getdata(urls, CKEY, CERT)
    for rec in gen:
        if  'error' in rec.keys():
            yield rec
        else:
            # convert record string into StringIO for xml_parser
            source = StringIO.StringIO(rec['data'])
            for row in xml_parser(source, prim_key, tags):
                fobj = row['block']['file']
                fname = fobj['name']
                replica = fobj['replica']
                for item in replica:
                    yield fname

Example #3

0

Show file

def files4site(phedex_url, files, site):
    "Find site for given files"

    params = {}
    if site and phedex_node_pattern.match(site):
        if not site.endswith('*'):
            # this will account to look-up site names w/o _Buffer or _MSS
            site += '*'
        params.update({'node': site})
    elif site and se_pattern.match(site):
        params.update({'se': site})
    else:
        return
    sname = urllib.urlencode(params)
    urls = []
    for fname in files:
        url = '%s?lfn=%s&%s' % (phedex_url, fname, sname)
        urls.append(url)
    tags = 'block.replica.node'
    prim_key = 'block'
    gen = urlfetch_getdata(urls, CKEY, CERT)
    for rec in gen:
        if 'error' in rec.keys():
            yield rec
        else:
            # convert record string into StringIO for xml_parser
            source = StringIO.StringIO(rec['data'])
            for row in xml_parser(source, prim_key, tags):
                fobj = row['block']['file']
                fname = fobj['name']
                replica = fobj['replica']
                for item in replica:
                    yield fname

Example #4

0

Show file

File: reqmgr2_service.py Project: perrozzi/DAS

def configs(url, args, verbose=False):
    """Find config info in ReqMgr2"""
    headers = {'Accept': 'application/json;text/json'}
    dataset = args.get('dataset', None)
    if not dataset:
        return
    base = 'https://%s' % url.split('/')[2]
    idict, source = findReqMgrIds(dataset, base, verbose)
    ids = []
    ids_types = {}  # keep track of ids/types relationship
    for key, ilist in idict.items():
        rtype = 'output' if key.lower().find('output') != -1 else 'input'
        for item in ilist:
            ids.append(item)
            ids_types[item] = rtype
    # for hash ids find configs via ReqMgr2 REST API
    urls = [rurl(base, i) for i in ids if i and len(i) == 32]
    # for non-hash ids probe to find configs in showWorkload
    req_urls = ['%s/couchdb/reqmgr_workload_cache/%s' \
            % (base, i) for i in ids if i and len(i) != 32]
    if req_urls:
        gen = urlfetch_getdata(req_urls, CKEY, CERT, headers)
        config_urls = []
        for row in gen:
            if 'error' not in row:
                url = row['url']
                for key, rtype in ids_types.items():
                    if key in url:
                        break
                rdict = json.loads(row['data'])
                for key in rdict.keys():
                    val = rdict[key]
                    if key.endswith('ConfigCacheID'):
                        if isinstance(val, basestring):
                            config_urls.append(rurl(base, val))
                            ids_types[val] = rtype
                    elif isinstance(val, dict):
                        for kkk in val.keys():
                            if kkk.endswith('ConfigCacheID'):
                                vvv = val[kkk]
                                if isinstance(vvv, basestring):
                                    config_urls.append(rurl(base, vvv))
                                    ids_types[vvv] = rtype
        if config_urls:
            urls += config_urls
    udict = {}
    for rid, rtype in ids_types.items():
        for url in set(urls):
            if str(rid) in str(url):
                udict.setdefault(rtype, []).append(url)
    config = {
        'dataset': dataset,
        'name': source,
        'urls': udict,
        'ids': ids,
        'idict': idict
    }
    yield {'config': config}

Example #5

0

Show file

def configs(url, args, verbose=False):
    """Find config info in ReqMgr"""
    headers = {'Accept': 'application/json;text/json'}
    dataset = args.get('dataset', None)
    if  not dataset:
        return
    base = 'https://%s' % url.split('/')[2]
    idict, source = findReqMgrIds(dataset, base, verbose)
    ids = []
    ids_types = {} # keep track of ids/types relationship
    for key, ilist in idict.items():
        rtype = 'output' if key.lower().find('output') != -1 else 'input'
        for item in ilist:
            ids.append(item)
            ids_types[item] = rtype
    # for hash ids find configs via ReqMgr REST API
    urls = [rurl(base, i) for i in ids if len(i) == 32]
    # for non-hash ids probe to find configs in showWorkload
    req_urls = ['%s/couchdb/reqmgr_workload_cache/%s' \
            % (base, i) for i in ids if len(i) != 32]
    if  req_urls:
        gen  = urlfetch_getdata(req_urls, CKEY, CERT, headers)
        config_urls = []
        for row in gen:
            if  'error' not in row:
                url = row['url']
                for key, rtype in ids_types.items():
                    if  key in url:
                        break
                rdict = json.loads(row['data'])
                for key in rdict.keys():
                    val = rdict[key]
                    if  key.endswith('ConfigCacheID'):
                        if  isinstance(val, basestring):
                            config_urls.append(rurl(base, val))
                            ids_types[val] = rtype
                    elif isinstance(val, dict):
                        for kkk in val.keys():
                            if  kkk.endswith('ConfigCacheID'):
                                vvv = val[kkk]
                                if  isinstance(vvv, basestring):
                                    config_urls.append(rurl(base, vvv))
                                    ids_types[vvv] = rtype
        if  config_urls:
            urls += config_urls
    udict = {}
    for rid, rtype in ids_types.items():
        for url in set(urls):
            if  rid in url:
                udict.setdefault(rtype, []).append(url)
    config = {'dataset':dataset, 'name': source, 'urls': udict, 'ids': ids, 'idict': idict}
    yield {'config': config}

Example #6

0

Show file

File: dbs_phedex.py Project: dmwm/DAS

def phedex_info(urls, dbsdata):
    "Get phedex info for given set of dbs data"
    # create list of URLs for urlfetch
    url  = urls.get('phedex') + '/blockReplicas'
    urls = ('%s?dataset=%s' % (url, d) for d in dbsdata.keys())
    headers  = {'Accept':'application/json;text/json'}
    gen  = urlfetch_getdata(urls, CKEY, CERT, headers)
    for ddict in gen:
        try:
            jsondict = json.loads(ddict['data'])
        except Exception as _exc:
            continue
        rec = {}
        for blk in jsondict['phedex']['block']:
            dataset = blk['name'].split('#')[0]
            if  'nfiles' not in rec:
                nfiles = blk['files']
                size = blk['bytes']
            else:
                nfiles = rec['nfiles'] + blk['files']
                size = rec['size'] + blk['bytes']
            rec.update({'nfiles':nfiles, 'size':size})
            for rep in blk['replica']:
                if  'site' not in rec:
                    rec = dict(dataset=dataset, nfiles=nfiles, size=size,
                                site=[rep['node']], se=[rep['se']],
                                custodial=[rep['custodial']])
                    rec.update(dbsdata[dataset])
                else:
                    sites = rec['site']
                    ses = rec['se']
                    custodial = rec['custodial']
                    if  rep['node'] not in sites:
                        sites.append(rep['node'])
                        ses.append(rep['se'])
                        custodial.append(rep['custodial'])
                    rec.update({'site':sites, 'se':ses, 'custodial':custodial})
        if  rec:
            # unwrap the site/se/custodial lists and yield records w/ their
            # individual values
            for idx in range(0, len(rec['site'])):
                sename = rec['se'][idx]
                site = rec['site'][idx]
                custodial = rec['custodial'][idx]
                newrec = dict(rec)
                newrec['se'] = sename
                newrec['site'] = site
                newrec['custodial'] = custodial
                yield newrec

Example #7

0

Show file

File: dbs3_service.py Project: perrozzi/DAS

def file_run_lumis(url, blocks, runs=None, valid=None, verbose=0):
    """
    Find file, run, lumi tuple for given set of files and (optional) runs.
    """
    headers = {'Accept': 'application/json;text/json'}
    urls = []
    for blk in blocks:
        if  not blk:
            continue
        dbs_url = '%s/filelumis/?block_name=%s' % (url, urllib.quote(blk))
        if  valid:
            dbs_url += '&validFileOnly=1'
        if  runs:
            dbs_url += "&run_num=%s" % urllib.quote(str(runs))
        urls.append(dbs_url)
    if  not urls:
        return
    if  verbose > 1:
        print("\nDEBUG: file_run_lumis")
        print(urls)
    gen = urlfetch_getdata(urls, CKEY, CERT, headers)
    odict = {} # output dict
    for rec in gen:
        if  'error' in rec:
            error  = rec.get('error')
            reason = rec.get('reason', '')
            yield {'error':error, 'reason':reason}
        else:
            for row in json.loads(rec['data']):
                run = row['run_num']
                lfn = row['logical_file_name']
                lumilist = row['lumi_section_num']
                eventlist = row.get('event_count', [])
                key = (lfn, run)
                for idx, lumi in enumerate(lumilist):
                    if  len(eventlist) > 0:
                        evts = eventlist[idx]
                    else:
                        evts = None
                    odict.setdefault(key, []).append((lumi, evts))
    for key, values in odict.items():
        lfn, run = key
        lumis = []
        evts = []
        for lumi, evt in values:
            lumis.append(lumi)
            evts.append(evt)
        yield lfn, run, lumis, evts

Example #8

0

Show file

File: dbs3_service.py Project: ktf/DAS

def block_summary(dbs, blocks):
    "Get block summary information for given set of blocks"
    headers = {"Accept": "text/json;application/json"}
    url = dbs + "/blocksummaries"
    urls = ["%s/?block_name=%s" % (url, urllib.quote(b)) for b in blocks]
    res = urlfetch_getdata(urls, CKEY, CERT, headers)
    for row in res:
        if "error" in row:
            error = row.get("error")
            reason = row.get("reason", "")
            yield {"error": error, "reason": reason}
            continue
        url = row["url"]
        blk = urllib.unquote(url.split("=")[-1])
        for rec in json.loads(row["data"]):
            data = {"name": blk, "size": rec["file_size"], "nfiles": rec["num_file"], "nevents": rec["num_event"]}
            yield dict(block=data)

Example #9

0

Show file

File: dbs3_service.py Project: perrozzi/DAS

def block_summary(dbs, blocks):
    "Get block summary information for given set of blocks"
    headers = {'Accept':'text/json;application/json'}
    url     = dbs + "/blocksummaries"
    urls    = ['%s/?block_name=%s' % (url, urllib.quote(b)) for b in blocks]
    res     = urlfetch_getdata(urls, CKEY, CERT, headers)
    for row in res:
        if  'error' in row:
            error  = row.get('error')
            reason = row.get('reason', '')
            yield {'error':error, 'reason':reason}
            continue
        url = row['url']
        blk = urllib.unquote(url.split('=')[-1])
        for rec in json.loads(row['data']):
            data = {'name': blk, 'size': rec['file_size'],
                    'nfiles': rec['num_file'],
                    'nevents': rec['num_event']}
            yield dict(block=data)

Example #10

0

Show file

File: dbs3_service.py Project: ktf/DAS

def blocks4tier_date(dbs, tier, min_cdate, max_cdate, verbose=0):
    "Get list of blocks for given parameters"
    headers = {"Accept": "text/json;application/json"}
    url = dbs + "/blocks"
    params = {"data_tier_name": tier, "min_cdate": min_cdate, "max_cdate": max_cdate}
    urls = ["%s?%s" % (url, urllib.urlencode(params))]
    if verbose > 1:
        print "\nblocks4tier_date"
        print urls
    res = process(urlfetch_getdata(urls, CKEY, CERT, headers))
    err = "Unable to get blocks for tier=%s, mindate=%s, maxdate=%s" % (tier, min_cdate, max_cdate)
    for blist in res:
        if "error" in blist:
            yield blist
            continue
        if isinstance(blist, dict):
            if "block_name" not in blist:
                msg = err + ", reason=%s" % json.dumps(blist)
                raise Exception(msg)
        for row in blist:
            yield row["block_name"]

Example #11

0

Show file

File: dbs_service.py Project: ktf/DAS

def block_run_lumis(url, blocks, runs=None):
    """
    Find block, run, lumi tuple for given set of files and (optional) runs.
    """
    headers = {'Accept': 'text/xml'}
    urls = []
    for blk in blocks:
        if  not blk:
            continue
        query   = 'find block,run,lumi where block=%s' % blk
        if  runs and isinstance(runs, list):
            val = ' or '.join(['run=%s' % r for r in runs])
            query += ' and (%s)' % val
        params  = {'api':'executeQuery', 'apiversion':'DBS_2_0_9',
                   'query':query}
        dbs_url = url + '?' + urllib.urlencode(params)
        urls.append(dbs_url)
    if  not urls:
        return
    gen = urlfetch_getdata(urls, CKEY, CERT, headers)
    prim_key = 'row'
    odict = {} # output dict
    for rec in gen:
        if  'error' in rec:
            error  = rec.get('error')
            reason = rec.get('reason', '')
            print dastimestamp('DAS ERROR'), error, reason
            yield {'error': error, 'reason': reason}
        else:
            source   = StringIO.StringIO(rec['data'])
            lumis    = []
            for row in qlxml_parser(source, prim_key):
                run  = row['row']['run']
                blk  = row['row']['block']
                lumi = row['row']['lumi']
                key  = (blk, run)
                odict.setdefault(key, []).append(lumi)
    for key, lumis in odict.iteritems():
        blk, run = key
        yield blk, run, lumis

Example #12

0

Show file

File: dbs3_service.py Project: ktf/DAS

def block_run_lumis(url, blocks, runs=None, verbose=0):
    """
    Find block, run, lumi tuple for given set of files and (optional) runs.
    """
    headers = {"Accept": "application/json;text/json"}
    urls = []
    params = {}
    for blk in blocks:
        if not blk:
            continue
        dbs_url = "%s/filelumis/?block_name=%s" % (url, urllib.quote(blk))
        if runs and isinstance(runs, list):
            params.update({"run_num": runrange(runs[0], runs[-1], False)})
        urls.append(dbs_url)
    if not urls:
        return
    if verbose > 1:
        print "\nDEBUG: block_run_lumis"
        print urls
    gen = urlfetch_getdata(urls, CKEY, CERT, headers)
    odict = {}  # output dict
    for rec in gen:
        blk = urllib.unquote(url_args(rec["url"])["block_name"])
        if "error" in rec:
            error = rec.get("error")
            reason = rec.get("reason", "")
            yield {"error": error, "reason": reason}
        else:
            for row in json.loads(rec["data"]):
                run = row["run_num"]
                lumilist = row["lumi_section_num"]
                key = (blk, run)
                for lumi in lumilist:
                    odict.setdefault(key, []).append(lumi)
    for key, lumis in odict.iteritems():
        blk, run = key
        yield blk, run, lumis

Example #13

0

Show file

def block_run_lumis(url, blocks, runs=None, verbose=0):
    """
    Find block, run, lumi tuple for given set of files and (optional) runs.
    """
    headers = {'Accept': 'application/json;text/json'}
    urls = []
    params = {}
    for blk in blocks:
        if  not blk:
            continue
        dbs_url = '%s/filelumis/?block_name=%s' % (url, urllib.quote(blk))
        if  runs and isinstance(runs, list):
            params.update({'run_num': urllib.quote(str(runs))})
        urls.append(dbs_url)
    if  not urls:
        return
    if  verbose > 1:
        print("\nDEBUG: block_run_lumis")
        print(urls)
    gen = urlfetch_getdata(urls, CKEY, CERT, headers)
    odict = {} # output dict
    for rec in gen:
        blk = urllib.unquote(url_args(rec['url'])['block_name'])
        if  'error' in rec:
            error  = rec.get('error')
            reason = rec.get('reason', '')
            yield {'error':error, 'reason':reason}
        else:
            for row in json.loads(rec['data']):
                run = row['run_num']
                lumilist = row['lumi_section_num']
                key = (blk, run)
                for lumi in lumilist:
                    odict.setdefault(key, []).append(lumi)
    for key, lumis in odict.items():
        blk, run = key
        yield blk, run, lumis

Example #14

0

Show file

File: dbs3_service.py Project: perrozzi/DAS

def blocks4tier_date(dbs, tier, min_cdate, max_cdate, verbose=0):
    "Get list of blocks for given parameters"
    headers = {'Accept':'text/json;application/json'}
    url     = dbs + "/blocks"
    params  = {'data_tier_name':tier,
               'min_cdate':min_cdate,
               'max_cdate':max_cdate}
    urls    = ['%s?%s' % (url, urllib.urlencode(params))]
    if  verbose > 1:
        print("\nblocks4tier_date")
        print(urls)
    res     = process(urlfetch_getdata(urls, CKEY, CERT, headers))
    err     = 'Unable to get blocks for tier=%s, mindate=%s, maxdate=%s' \
                % (tier, min_cdate, max_cdate)
    for blist in res:
        if 'error' in blist:
            yield blist
            continue
        if  isinstance(blist, dict):
            if  'block_name' not in blist:
                msg = err + ', reason=%s' % json.dumps(blist)
                raise Exception(msg)
        for row in blist:
            yield row['block_name']

Example #15

0

Show file

File: dbs_service.py Project: ktf/DAS

def summary4dataset_run(url, kwds):
    "Helper function to deal with summary dataset=/a/b/c requests"
    urls = []
    cond = ''
    val = kwds.get('run', 'optional')
    if  val != 'optional':
        if  isinstance(val, dict):
            min_run = 0
            max_run = 0
            if  '$lte' in val:
                max_run = val['$lte']
            if  '$gte' in val:
                min_run = val['$gte']
            if  min_run and max_run:
                val = "run >=%s and run <= %s" % (min_run, max_run)
            elif '$in' in val:
                val = ' or '.join(['run=%s' % r for r in val['$in']])
                val = '(%s)' % val
        elif isinstance(val, int):
            val = "run=%d" % val
        cond += ' and %s' % val
    val = kwds.get('dataset', None)
    if  val and val != 'optional':
        cond += ' and dataset=%s' % val
    val = kwds.get('block', None)
    if  val and val != 'optional':
        cond += ' and block=%s' % val
    query = "find file, file.size, file.numevents where " + cond[4:]
    params  = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query}
    url1 = url + '?' + urllib.urlencode(params)
    urls.append(url1)
    query = "find run, count(lumi) where " + cond[4:]
    params  = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query}
    url2 = url + '?' + urllib.urlencode(params)
    urls.append(url2)
    headers = {'Accept': 'text/xml'}
    gen = urlfetch_getdata(urls, CKEY, CERT, headers)
    tot_size  = 0
    tot_evts  = 0
    tot_lumis = 0
    tot_files = 0
    for rec in gen:
        if  'error' in rec:
            error  = rec.get('error')
            reason = rec.get('reason', '')
            srec = {'summary':'', 'error':error, 'reason':reason}
            yield srec
        url = rec['url']
        data = rec['data']
        stream = StringIO.StringIO(data)
        if  url.find('file') != -1:
            prim_key = 'file'
        else:
            prim_key = 'run'
        for row in qlxml_parser(stream, prim_key):
            if  prim_key == 'file':
                fdata = row['file']
                tot_size  += fdata['file.size']
                tot_evts  += fdata['file.numevents']
                tot_files += 1
            else:
                fdata = row['run']
                tot_lumis += fdata['count_lumi']
    srec = {'summary': {'file_size':tot_size, 'nevents':tot_evts,
        'nlumis':tot_lumis, 'nfiles': tot_files}}
    yield srec