Python autocomplete_Datasets Exemples, DasQuery.autocomplete_Datasets Python Exemples

Exemple #1

0

Afficher le fichier

def get_mc_lumi_list(inputDataset="/QCD_Pt_300to470_TuneCP5_13TeV_pythia8/RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic*/MINIAODSIM"):
    """Get the LumiList object(s) for dataset(s) matching `inputDataset`

    inputDataset:
        if a str, will ask DAS to autocomplate (can contain wildcards)
        if a list/tuple/set[str], will iterate over each entry in the list, without asking DAS to autocomplete.
        This is because the user might have cached the dataset names before calling this function, and we don't want to call DAS more than necessary.

    returns: a dict with an entry for each dataset user inputs with das string as key and LumiList as value

    raises RuntimeError if no valid voms proxy
    raises TypeError if inputDataset incorrect type
    """
    if not check_voms():
        raise RuntimeError("Missing voms proxy")

    if isinstance(inputDataset, str):
        inputDatasets = autocomplete_Datasets([inputDataset])
    elif not isinstance(inputDataset, (list, set, tuple)):
        raise TypeError('get_mc_lumi_list: `inputDataset` expects str or list/tuple/set[str]')

    result = {}
    for dataset in inputDatasets:
        print(dataset)
        json_dict = get_data(host='https://cmsweb.cern.ch', query="run lumi file dataset="+dataset, idx=0, limit=0, threshold=300)
        lumi_list = LumiList.LumiList()
        try:
            n_files = len(json_dict['data'])
            printout = round(n_files / 10)
            for i, file_info in enumerate(json_dict['data']):
                if (i>n_files):
                    break
                if i % printout == 0:
                    print("{}% done...".format(100 * i / n_files))
                ls = file_info['lumi'][0]['number']
                run = file_info['run'][0]['run_number']
                lumi_list += LumiList.LumiList(runsAndLumis={run: ls})
        except Exception as e:
            print('Did not find lumis for', dataset)
            print(e)
        result.update({dataset:lumi_list})
    return result

Exemple #2

0

Afficher le fichier

def write_lumi_list(inputDataset="/QCD_Pt_1000to1400_TuneCP5_13TeV_pythia8/RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic_v14*/MINIAODSIM", filename="test.json"):
    """Get lumilist for datasets matching `inputDataset`, and write result to `filename`

    If another matching dataset, writes to `filename` but with an extra '_ext' before the file extension.

    raises ValueError if inputDataset matches more than 2 samples (we allow 2 incase we have nominal+ext sample),
    or matches 0 samples
    """
    inputDatasets = autocomplete_Datasets([inputDataset])
    if len(inputDatasets) > 2:
        for d in inputDatasets:
            print(d)
        raise ValueError("The given inputDataset DAS string corresponds to more than two samples. This is a bit unusual. "
                         "Since this script can only handle up to 2 datasets, please use a more specific dataset pattern.")
    elif len(inputDatasets) == 0:
        raise ValueError("No matching datasets for the dataset pattern")

    results = get_mc_lumi_list(inputDataset)

    results_keys = list(results.keys())
    if len(results) == 1:
        results[results_keys[0]].writeJSON(fileName=filename)
    elif len(results) > 1:
        # if there are two results assume its nominal+ext sample:
        # but it might sort the "ext" sample first, so we should
        # check which is which.
        def _print_save(key, fname):
            print("Saved", key, "to", fname)
            results[key].writeJSON(fileName=fname)

        stem, ext = os.path.splitext(filename)
        ext_filename = stem + "_ext" + ext
        if '_ext' in results_keys[0]:
            _print_save(results_keys[0], ext_filename)
            _print_save(results_keys[1], filename)
        else:
            _print_save(results_keys[0], filename)
            _print_save(results_keys[1], ext_filename)

Exemple #3

0

Afficher le fichier

# For it to work you need inputDatasets & requestNames apart from the classical part
#
# Make sure to have a unique directory where your joboutput is saved, otherwise the script gets confused and you too!!
#
# Usage ./CrabConfig ConfigFile [options]
#
# Take care here to make the request names *nice*
#
# autocomplete_Datasets(ListOfDatasets) works also for several entries with *
#
from DasQuery import autocomplete_Datasets

inputDatasets = [
    '/TprimeTprime_M-*_TuneCUETP8M1_13TeV-madgraph-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v*/MINIAODSIM'
]
inputDatasets = autocomplete_Datasets(inputDatasets)
requestNames = []
for x in inputDatasets:
    name = x.split('/')[1]
    modified_name = name.replace('TuneCUETP8M1_13TeV-madgraph-pythia8', '25ns')
    requestNames.append(modified_name)

# ===============================================================================
# Classical part of crab, after resolving the * it uses in the example below just the first entry
#

from CRABClient.UserUtilities import config, getUsernameFromSiteDB

config = config()
config.General.workArea = 'crab_Tp'
config.General.transferOutputs = True

Exemple #4

0

Afficher le fichier

Fichier : tpb_crab.py Projet : HeinerTholen/UHH2NTupleBusiness

# This is a small example how the crab api can easily be used to create something like multi crab.
# It has some additional features like also creating the xml files for you. 
# For it to work you need inputDatasets & requestNames apart from the classical part 
#
# Usage ./CrabConfig ConfigFile [options]
#
# Take care here to make the request names *nice*
# 
# autocomplete_Datasets(ListOfDatasets) works also for several entries with *
#
from DasQuery import autocomplete_Datasets

inputDatasets = ['/TprimeBToTH_M-*_*H_TuneCUETP8M1_13TeV-madgraph-pythia8/RunIISpring15MiniAODv2-74X_mcRun2_asymptotic_v2-v1/MINIAODSIM']
inputDatasets = autocomplete_Datasets(inputDatasets)
requestNames = []
for x in inputDatasets:
    name = x.split('/')[1]
    modified_name =name.replace('TuneCUETP8M1_13TeV-madgraph-pythia8','25ns')
    requestNames.append(modified_name)

# ===============================================================================
# Classical part of crab, after resolving the * it uses in the example below just the first entry
#

from CRABClient.UserUtilities import config, getUsernameFromSiteDB


config = config()
config.General.workArea = 'tpb_crab_projects'
config.General.transferOutputs = True
config.General.transferLogs = True

Exemple #5

0

Afficher le fichier

Fichier : test_DasQuery.py Projet : chenxvan/UHH2

#!/usr/bin/env python
"""Simple test to make sure DasQuery moduel works"""

from DasQuery import autocomplete_Datasets

inputDatasets = [
    '/DYJetsToLL_M-50_HT-*to*_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6_*/MINIAODSIM'
]
result = autocomplete_Datasets(inputDatasets)
print result
assert (len(result) == 22)

Exemple #6

0

Afficher le fichier

Fichier : test_DasQuery.py Projet : UHH2/UHH2

#!/usr/bin/env python

"""Simple test to make sure DasQuery moduel works"""

from DasQuery import autocomplete_Datasets

inputDatasets = ['/DYJetsToLL_M-50_HT-*to*_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6_*/MINIAODSIM']
result = autocomplete_Datasets(inputDatasets)
print result
assert(len(result) == 11)

inputDatasets = ['/QCD_Pt-15to20_MuEnrichedPt5_TuneCUETP8M1_13TeV_pythia8/RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6-*/MINIAODSIM']
result = autocomplete_Datasets(inputDatasets)
print result
assert(len(result) == 1)