예제 #1
0
파일: crab.py 프로젝트: hqucms/crab
def calcLumiForRecovery(config, status_dict, work_area_rsb):
    import ast
    from CRABClient.UserUtilities import getLumiListInValidFiles
    from WMCore.DataStructs.LumiList import LumiList

    cfgdir = os.path.join(work_area_rsb, 'configs')
    if not os.path.exists(cfgdir):
        os.makedirs(cfgdir)

    # get lumis of the input dataset
    lumifile = getattr(config.Data, 'lumiMask', '')
    if lumifile:
        logger.info('Lumi mask for the original dataset: %s' % lumifile)
        if lumifile.startswith('http'):
            lumiIn = LumiList(url=lumifile)
        else:
            lumiIn = LumiList(lumifile)
    else:
        logger.info(
            'No lumi mask for the original dataset, will use the full lumi from input dataset %s'
            % config.Data.inputDataset)
        lumiIn = getLumiListInValidFiles(config.Data.inputDataset,
                                         dbsurl=config.Data.inputDBS)

    # get lumis of the processed dataset
    outputDataset = ast.literal_eval(status_dict['outdatasets'])[0]
    logger.info('Getting lumis in the output dataset %s' % outputDataset)
    lumiDone = getLumiListInValidFiles(outputDataset, dbsurl='phys03')
    lumiDone.writeJSON(
        os.path.join(cfgdir,
                     config.General.requestName + '_lumi_processed.json'))

    outpath = os.path.abspath(
        os.path.join(cfgdir, config.General.requestName + '_lumiMask.json'))
    newLumiMask = lumiIn - lumiDone
    newLumiMask.writeJSON(outpath)
    return outpath
예제 #2
0
try:
    from CRABClient.UserUtilities import getLumiListInValidFiles
    from CRABClient.ClientExceptions import ClientException
    from WMCore.DataStructs.LumiList import LumiList
except ImportError:
    print "This tool relies on CRAB. Please set up the environment for CRAB before using."
    sys.exit (2)

if len (sys.argv) < 2:
    print "Usage: " + os.path.basename (sys.argv[0]) + " DATASET [OUTPUT_FILE]"
    sys.exit (1)

dataset = sys.argv[1]
instance = "global"
if re.sub (r"^\/([^/]*)\/([^/]*)\/([^/]*)$", r"\3", dataset) == "USER":
  instance = "phys03"
if len (sys.argv) > 2:
  instance = sys.argv[2]

dataset0 = re.sub (r"^\/([^/]*)\/([^/]*)\/([^/]*)$", r"\1", dataset)
dataset1 = re.sub (r"^\/([^/]*)\/([^/]*)\/([^/]*)$", r"\2", dataset)
dataset2 = re.sub (r"^\/([^/]*)\/([^/]*)\/([^/]*)$", r"\3", dataset)
print "Getting JSON from DBS instance " + instance + "..."
try:
    taskALumis = getLumiListInValidFiles(dataset=dataset, dbsurl=instance)
except ClientException:
    print "Error accessing DAS. Please initialize your grid proxy before using."
    sys.exit (3)
taskALumis.writeJSON(dataset0 + "_" + dataset1 + "_" + dataset2 + ".txt")
print "Wrote JSON to \"" + dataset0 + "_" + dataset1 + "_" + dataset2 + ".txt\"."
             logger.exception(
                 "CalledProcessError for query {0}, skipping".format(query))
 nInDS = len(datasets)
 datasets = list(set(datasets))
 if len(datasets) != nInDS:
     logger.info(
         "Removed {0:d} duplicates in input datasets".format(nInDS -
                                                             len(datasets)))
 logger.info("Checking {0:d} datasets".format(len(datasets)))
 ds_complete, ds_incomplete = [], []
 topDatasetsYaml = dict()
 from CRABClient.UserUtilities import getLumiListInValidFiles
 for ds in datasets:
     parents = getParents(ds, opts=qryOpts)
     logger.debug("P: {0} -> {1}".format(ds, ", ".join(parents)))
     ll_nano = getLumiListInValidFiles(ds)
     ll_mini_items = {
         p: getLumiListInValidFiles(p, "global")
         for p in parents
     }
     if len(ll_mini_items) == 1:
         ll_mini = ll_mini_items.values()[0]
     elif len(ll_mini_items) > 1:
         items = list(ll_mini_items.values())
         ll_mini = items[0]
         for itm in items[1:]:
             ll_mini += itm
     else:
         raise RuntimeError("No parents for dataset {0}".format(ds))
     ll_remain = ll_mini - ll_nano
     comment = None
    except IOError:
        print("You need write access for crab report. Exiting ...")
        exit()
    if not os.path.isfile(crab_project_dir + "/results/processedLumis.json"):
        print(
            "processedLumis json was not found in crab project folder, please check! Exiting ..."
        )
        exit()

# find dbs instance of original dataset
dbs_instance = "phys03" if "USER" in original_input_dataset else "global"

# get the lumis processed by the original crab job
original_task_lumis = None
if original_publication:
    original_task_lumis = getLumiListInValidFiles(dataset=output_dataset,
                                                  dbsurl='phys03')
else:
    original_task_lumis = LumiList(crab_project_dir +
                                   "/results/processedLumis.json")

# get the lumis in the original input dataset
officialLumiMask = getLumiListInValidFiles(dataset=original_input_dataset,
                                           dbsurl=dbs_instance)

# get a possibly previously used lumi mask
previousLumiMask = None
if "lumiMask" in config.section_("Data").listSections_():
    previousLumiMask = LumiList(config.section_("Data").section_("lumiMask"))
    print("Previous lumi mask was found to be: ",
          config.section_("Data").section_("lumiMask"))
try:
    from CRABClient.UserUtilities import getLumiListInValidFiles
    from CRABClient.ClientExceptions import ClientException
    from WMCore.DataStructs.LumiList import LumiList
except ImportError:
    print "This tool relies on CRAB. Please set up the environment for CRAB before using."
    sys.exit(2)

if len(sys.argv) < 2:
    print "Usage: " + os.path.basename(sys.argv[0]) + " DATASET [OUTPUT_FILE]"
    sys.exit(1)

dataset = sys.argv[1]
instance = "global"
if re.sub(r"^\/([^/]*)\/([^/]*)\/([^/]*)$", r"\3", dataset) == "USER":
    instance = "phys03"
if len(sys.argv) > 2:
    instance = sys.argv[2]

dataset0 = re.sub(r"^\/([^/]*)\/([^/]*)\/([^/]*)$", r"\1", dataset)
dataset1 = re.sub(r"^\/([^/]*)\/([^/]*)\/([^/]*)$", r"\2", dataset)
dataset2 = re.sub(r"^\/([^/]*)\/([^/]*)\/([^/]*)$", r"\3", dataset)
print "Getting JSON from DBS instance " + instance + "..."
try:
    taskALumis = getLumiListInValidFiles(dataset=dataset, dbsurl=instance)
except ClientException:
    print "Error accessing DAS. Please initialize your grid proxy before using."
    sys.exit(3)
taskALumis.writeJSON(dataset0 + "_" + dataset1 + "_" + dataset2 + ".txt")
print "Wrote JSON to \"" + dataset0 + "_" + dataset1 + "_" + dataset2 + ".txt\"."
예제 #6
0
from __future__ import print_function
from CRABClient.UserUtilities import getLumiListInValidFiles
from WMCore.DataStructs.LumiList import LumiList
import sys
import csv

dataset_csv = sys.argv[1]
with open(dataset_csv) as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        if row['dataset'] == '' or row['boosted_dataset'] == '':
            continue
        print(row['dataset'], row['boosted_dataset'])
        original_task_lumis = getLumiListInValidFiles(
            dataset=row['boosted_dataset'], dbsurl='phys03')
        officalLumiMask = getLumiListInValidFiles(dataset=row['dataset'],
                                                  dbsurl='global')
        remaining = officalLumiMask - original_task_lumis
        print(remaining)
예제 #7
0
from CRABClient.UserUtilities import config, getLumiListInValidFiles
from WMCore.DataStructs.LumiList import LumiList

taskALumis = getLumiListInValidFiles(dataset='/HIMinimumBias6/qwang-V0Skim_v3-5f932986cf38f9e8dbd6c3aea7f6c2b4/USER', dbsurl='phys03')

officialLumiMask = LumiList(filename='/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions18/HI/PromptReco/Cert_326381-327564_HI_PromptReco_Collisions18_JSON.txt')

newLumiMask = officialLumiMask - taskALumis

newLumiMask.writeJSON('t.json')