Python ClearFromDuplicates Exemples, ClusterSubmission.Utils.ClearFromDuplicates Python Exemples

Exemple #1

0

Afficher le fichier

 def outDS(self):
     return ",".join(
         sorted(
             ClearFromDuplicates([
                 x['containername'] for x in self._fulldict['datasets']
                 if x['type'] != 'input'
             ])))

Exemple #2

0

Afficher le fichier

def updateBlackList(black_list):
    current_black = getPRWblackList()
    if IsListIn(black_list, current_black): return
    current_black = ClearFromDuplicates(current_black + black_list)
    current_dir = os.getcwd()
    FileName = os.path.realpath(
        ResolvePath("XAMPPbase/BlackListedPRWdatasets.txt"))
    Pkg_Dir = os.path.realpath(ResolvePath("XAMPPbase"))
    ###############################################################################
    #      Find out the current branch to propagage only                          #
    #      the updated List to the main repository. Other changes regarding       #
    #      side developments of the package should not be propagated yet          #
    ###############################################################################
    upstream = setupGITupstream()
    current_branch = getBranch()
    os.chdir(Pkg_Dir)
    new_branch = "PRW_%s_%s" % (time.strftime("%Y%m%d"), USERNAME)
    if current_branch:
        os.system(
            "git commit -am \"Commit changes of all files in order to push the 'BlackListedPRWdatasets.txt'\""
        )
    print "INFO: Create new branch %s to update the BlackListedPRWdatasets " % (
        new_branch)
    os.system("git checkout -b %s %s/master" % (new_branch, upstream))
    WriteList(sorted(current_black), FileName)
    os.system("git add BlackListedPRWdatasets.txt")
    os.system(
        "git commit BlackListedPRWdatasets.txt -m \"Updated the list of black prw files\""
    )
    os.system("git push %s %s" % (upstream, new_branch))
    if current_branch: os.system("git checkout %s" % (current_branch))
    os.chdir(current_dir)

Exemple #3

0

Afficher le fichier

 def __init__(self,
              campaign,
              stype,
              rtag,
              datasets,
              temp_dir,
              outdir,
              check_consistency=False,
              notDownloadAgain=True):
     threading.Thread.__init__(self)
     self.__campaign = campaign
     self.__stype = stype
     self.__rtag = rtag
     self.__datasets = datasets
     self.__dsids = ClearFromDuplicates(
         [GetPRW_datasetID(ds) for ds in self.__datasets])
     self.__purged = []
     self.__tempdir = temp_dir
     self.__outdir = outdir
     self.__check_consistency = check_consistency
     self.__to_black_list = []
     self.__ds_to_submit = []
     self.__inconsistent_log = []
     self.__already_on_disk = [] if not notDownloadAgain or not os.path.exists(
         "%s/Finished.txt" %
         (self.download_dir())) else ReadListFromFile("%s/Finished.txt" %
                                                      (self.download_dir()))
     if check_consistency:
         getAMIDataBase().getMCDataSets(channels=self.dsids(),
                                        campaign="%s" % (self.campaign()),
                                        derivations=[])

Exemple #4

0

Afficher le fichier

    def standaloneCheck(self):
        if not self.__check_consistency: return True
        prw_helper = self.__setup_prw_helper(config_files=[self.final_file()])
        prw_period = prw_helper.getPRWperiods_fullsim()[0]
        missing_dsids = [
            ds for ds in self.dsids()
            if prw_helper.nEventsPerPRWperiod_full(ds, prw_period) <= 0
        ]
        ### Submit another prw job on the purged prw files
        if len(self.purged()) > 0 or len(missing_dsids) > 0:
            print "WARNING: %d datasets were purged from the prw file %s" % (
                len(self.purged()) + len(missing_dsids), self.final_file())
            purged_AODs = []
            for ds in ClearFromDuplicates(self.purged() + missing_dsids):
                for A in getAODsFromRucio(self.campaign(), ds, self.isAFII()):
                    all_tags = GetAMITagsMC(A,
                                            SkimETag=False,
                                            SkimSTag=False,
                                            SkimPTag=True)
                    ### Reject second e-tag
                    if all_tags.find("_e") != -1: continue
                    ### Reject second r-tag
                    if all_tags.find("_r") != all_tags.rfind("_r"): continue
                    ### Reject second s-tag
                    if all_tags.find("_s") != all_tags.rfind("_s"): continue
                    ## Or a tag
                    if all_tags.find("_a") != all_tags.rfind("_a"): continue
                    rtag = all_tags.split("_")[-1]
                    if rtag != self.rtag() or A in self.__ds_to_submit:
                        continue
                    self.__ds_to_submit.append(A)

            os.system("rm %s" % (self.final_file()))
            return False
        return True

Exemple #5

0

Afficher le fichier

Fichier : PeriodRunConverter.py Projet : jonipham/WZanalysis_release21

 def GetSubPeriods(self, Y, project=None):
     if Y > 2000: Y -= 2000
     self.__LoadPeriodsFromAmi(Y)
     return sorted(
         ClearFromDuplicates([
             P.period() for P in self.__Periods[Y]
             if project == None or P.project().split("_")[-1] == project
         ]))

Exemple #6

0

Afficher le fichier

Fichier : RucioListBuilder.py Projet : philippgadow/evgen_batch

def GetScopes(select_user=False, select_group=False, select_official=False):
    logging.info("Reading in the scopes:")
    Scopes = getGmdOutput("rucio list-scopes")
    ScopeList = ClearFromDuplicates([
        Entry for Entry in Scopes
        if (select_user == True and Entry.find("user") != -1) or (select_group == True and Entry.find("group") != -1) or (
            select_official == True and Entry.find("user") == -1 and Entry.find("group") == -1)
    ])
    logging.info("Done found %d scopes" % (len(ScopeList)))
    return ScopeList

Exemple #7

0

Afficher le fichier

def main():
    """Request datasets to RSE location."""
    CheckRucioSetup()
    CheckRemainingProxyTime()

    RunOptions = getArgumentParser().parse_args()
    List = ClearFromDuplicates(ReadListFromFile(RunOptions.list))

    ### Start replication of the datasets
    initiateReplication(ListOfDataSets=List,
                        Rucio=RunOptions.rucio,
                        RSE=RunOptions.RSE,
                        lifeTime=RunOptions.lifetime,
                        approve=RunOptions.askapproval,
                        comment=RunOptions.comment)

Exemple #8

0

Afficher le fichier

    def __init__(
        self,
        cluster_engine=None,
        run_numbers=[],
        hold_jobs=[],
        nJobs=10,
        eventsPerJob=10000,
        evgenCache="",
        evgenRelease="AthGeneration",
        cores_to_use=1,
        memory=1200,
        run_time="12:00:00",
        keep_output=False,
        joboptions_dir="",
        models_dir="",
        preExec="",
        preInclude="",
        postExec="",
        postInclude="",
    ):
        self.__cluster_engine = cluster_engine
        self.__nJobs = nJobs
        self.__events_per_job = eventsPerJob
        self.__ev_gen_cores = cores_to_use

        self.__evgenCache = evgenCache
        self.__evgenRelease = evgenRelease

        self.__preExec = preExec.replace('"', "'")
        self.__preInclude = preInclude.replace('"', "'")

        self.__postExec = postExec.replace('"', "'")
        self.__postInclude = postInclude.replace('"', "'")

        self.__n_scheduled = 0
        self.__run_time = run_time
        self.__mem = memory
        self.__hold_jobs = [h for h in hold_jobs]
        self.__keep_out = keep_output
        self.__joboptions_dir = joboptions_dir
        self.__models_dir = models_dir
        self.__get_job_options(sorted(ClearFromDuplicates(run_numbers)))

Exemple #9

0

Afficher le fichier

def main():
    parser = setupSubmitParser()
    options = parser.parse_args()
    cluster_engine = setup_engine(options)

    Spared_Files = []
    #### The previous round of cluster screwed up. But had some results. There is no
    #### reason to reprocess them. So successful files are not submitted twice
    if len(options.SpareWhatsProcessedIn) > 0:
        print "INFO: Cluster did not perform so well last time? This little.. buttefingered.."
        for dirToSpare in options.SpareWhatsProcessedIn:
            if not os.path.isdir(dirToSpare):
                print "ERROR: I need a directory to look up %s" % (dirToSpare)
                exit(1)
            for finished in os.listdir(dirToSpare):
                if not IsROOTFile(finished): continue
                print "INFO: Yeah... %s has already beeen processed. Let's skip it.." % (
                    finished)
                Spared_Files.append(finished[:finished.rfind(".root")])

    Submit_Class = NtupleMakerSubmit(
        cluster_engine=cluster_engine,
        jobOptions=options.jobOptions.replace("share/", ""),
        input_ds=ClearFromDuplicates([
            ds for ds in options.inputDS
            if ds[:ds.rfind(".")] not in Spared_Files
            #  or ds not in  Spared_Files
        ]),
        run_time=options.RunTime,
        dcache_dir="%s/GroupDiskLists/%s" % (options.BaseProject, options.RSE),
        alg_opt=AssembleRemoteRunCmd(
            options,
            parser),  ### Extra options of the algorithm like noSyst... etc
        vmem=options.vmem,
        events_per_job=options.EventsPerJob,
        hold_jobs=options.HoldJob,
        files_per_merge=options.FilesPerMergeJob,
        final_split=options.FinalSplit,
    )
    Submit_Class.submit_job()

Exemple #10

0

Afficher le fichier

Fichier : AMIDataBase.py Projet : jonipham/WZanalysis_release21

    def loadRuns(self, Y, derivations=[], project="13TeV"):
        ### import AMI
        getAmiClient()
        import pyAMI.client
        import pyAMI.atlas.api as AtlasAPI

        periods = GetPeriodRunConverter().GetSubPeriods(Y, project=project)

        ### I'm not happy about this pattern line. If we change project to cos or hi
        ### then the patter might differ what AMI needs
        Pattern = "data%i_%s.%%physics_Main.%%" % (Y, project)

        DSIDS = AtlasAPI.list_datasets(
            getAmiClient(),
            patterns=[Pattern],
            fields=[
                'run_number',
                "period",
                'type',
                'events',
                'ami_status',
            ],
            period=",".join(GetPeriodRunConverter().GetSubPeriods(
                Y, project=project)),
            type=ClearFromDuplicates(["AOD"] + derivations))

        ### Read out the AMI query
        for entry in DSIDS:
            R = int(entry["run_number"])
            if not self.getRunElement(R): self.__runs += [AMIdataEntry(R)]
            runElement = self.getRunElement(R)
            flavour = entry["type"]
            tag = self.__getDSTag(entry['ldn'], flavour)
            nevents = int(entry['events'])
            runElement.addDataset(data_type=flavour,
                                  tag=tag,
                                  events=nevents,
                                  status="")

Exemple #11

0

Afficher le fichier

def getGRL(year=[15, 16, 17, 18], flavour='GRL', config='ClusterSubmission/GRL.json'):
    """Get from json file either 
    - default Good Run Lists (flavour='GRL') or 
    - default lumi calc files (flavour='lumiCalc') or 
    - default actual mu pile-up reweigthing files (flavour='actualMu'))
    as a list of strings. Can be called without arguments to give just GRLs 
    for all years or with a specific (list of) year(s).
    Default input is config='ClusterSubmission/GRL.json'
    """
    if isinstance(year, list): myYears = ClearFromDuplicates([str(y) for y in year if y < 100] + [str(y - 2000) for y in year if y > 2000])
    elif isinstance(year, int) or isinstance(year, str): myYears = [str(year)] if year < 100 else [str(year - 2000)]
    global m_GRLdict
    if not m_GRLdict: m_GRLdict = json.load(open(ResolvePath(config), 'r'))
    try:
        if flavour == 'actualMu' and ('15' in myYears or '16' in myYears):
            logging.warning("actual mu PRW is only avaliable for data17 and data18.")
            if not ('17' in myYears or '18' in myYears):
                logging.error("The request is ill-defined and does not make sense.")
                raise NameError('actual mu PRW is only avaliable for data17 and data18, not for data15 or data16')
        return [str(value) for key, value in m_GRLdict[flavour].items() if (value and key in ['data' + y for y in myYears])]
    except Exception as e:
        logging.error("Error when accessing GRL/lumiCalc/actualMu information!")
        raise (e)

Exemple #12

0

Afficher le fichier

 def GetPeriods(self, Y):
     if Y > 2000: Y -= 2000
     self.__LoadPeriodsFromAmi(Y)
     return sorted(ClearFromDuplicates([P.period()[0] for P in self.__Periods[Y]]))

Exemple #13

0

Afficher le fichier

        for inputFile in RunOptions.inputFile:
            datasetsInList.extend(ReadListFromFile(inputFile))
        Datasets.extend(datasetsInList)

    else:
        print 'INFO: Looking for NTUP_PILEUP datasets in rucio...'
        for c in RunOptions.campaign:
            Datasets += GetDatasets(campaign=c,
                                    requested_ds=Required_DS,
                                    restrictToRequestedDatasets=RunOptions.
                                    restrictToRequestedDatasets)

    ##### Find datasets which are required by the user but there is no prw file for them available yet
    MissingDataSets = ClearFromDuplicates([
        ds for ds in Required_DS if IsMissingDataSet(prw_files=Datasets, DS=ds)
    ])

    if len(MissingDataSets) > 0:
        print "INFO: You've requested a list of datasets, where for the following datasets no associated NTUP_PILEUP could be found:"
        for M in MissingDataSets:
            print "        *** %s" % (M)
        print "INFO: Will submit foreach dataset a job to create prw files"
        submitPRWFiles(DataSets=MissingDataSets,
                       RUCIO=RunOptions.rucio,
                       RSE=RunOptions.destRSE,
                       Official=RunOptions.official)
        print "#################################################################################################################"
        print "INFO: All prwFiles have been submitted. Will omitt to create a new prw file                                     #"
        print "INFO: Please execute the script if the jobs are processed on the grid or remove the samples from the list.      #"
        print "#################################################################################################################"

Exemple #14

0

Afficher le fichier

Fichier : UpdateSampleLists.py Projet : philippgadow/evgen_batch

    Sample_Dir = ResolvePath(RunOptions.ListDir)
    No_AOD = []
    TO_REQUEST = []

    if not Sample_Dir:
        logging.error("ERROR: Please give a valid  directory")
        exit(1)

    for File in os.listdir(Sample_Dir):
        if os.path.isdir("%s/%s" % (Sample_Dir, File)): continue
        logging.info("Update file list %s" % (File))

        DataSets = sorted(
            ClearFromDuplicates([
                GetPRW_datasetID(DS)
                for DS in ReadListFromFile("%s/%s" % (Sample_Dir, File))
                if DS.find("data") == -1
            ]))
        if len(DataSets) == 0: continue
        logging.info("Call the AMI database")

        DERIVATIONS = []
        NO_DERIVARTION = []
        AODs = []
        getAMIDataBase().getMCDataSets(
            channels=DataSets,
            derivations=["DAOD_%s" % (RunOptions.derivation)])
        #### Find the AODs for each DSID first
        for DSID in DataSets:
            Found_MC16a = False
            Found_MC16d = False

Exemple #15

0

Afficher le fichier

def performConsistencyCheck(RunOptions, mc16a_file, mc16d_file, mc16e_file):
    if not mc16a_file or not mc16d_file: return True
    mc16a_file.print_datasets()
    mc16d_file.print_datasets()
    prwTool = setupPRWTool(
        mc_config_files=[mc16a_file.final_file(),
                         mc16d_file.final_file()],
        isAF2=mc16a_file.isAFII())
    dsids = ClearFromDuplicates(mc16a_file.dsids() + mc16d_file.dsids())
    missing_dsids = []

    for ds in sorted(dsids):
        if not prwTool.isDSIDvalid(ds): missing_dsids.append(ds)
    if len(missing_dsids) == 0:
        print "########################################################################"
        print "INFO: Consistency check of the two prw files is successful"
        print "INFO: In none of the mc16a(%s) and mc16d(%s) files a dsid has been kicked by the tool" % (
            mc16a_file.final_file(), mc16d_file.final_file())
        print "########################################################################"
        return True

    print "#############################################################################################"
    print "INFO: %d out of the %d DSIDs are kicked by the prwTool " % (
        len(missing_dsids), len(dsids))
    prw_files = [
        convertToAOD(f) for f in mc16a_file.datasets() + mc16d_file.datasets()
        if GetPRW_datasetID(f) in missing_dsids and IsOfficialPRWdataset(f)
    ]
    #prw_files = sorted(prw_files, cmp=lambda x, y: PRWdatasetSorter(x, y))
    for p in prw_files:
        print "        --- %s" % (p)
    ### Look in AMI for the AODs
    AODs = []
    print "INFO: Start to search for the corresponding AODs in both campaigns mc16a (%s) and mc16d (%s)" % (
        mc16a_file.rtag(), mc16d_file.rtag())
    for i, ds in enumerate(missing_dsids):
        for A in getAODsFromRucio(mc16a_file.campaign(), ds,
                                  mc16a_file.isAFII()):
            rtag = GetAMITagsMC(A)
            if rtag != mc16a_file.rtag() and rtag != mc16d_file.rtag():
                continue
            if convertToAOD(A) in prw_files: continue
            AODs.append(A)
        if i % int(min([len(missing_dsids) / 10, 100])) == 0:
            print "INFO: Searched %d / %d datasets" % (i, len(missing_dsids))

    if len(AODs) == 0: return True

    ### Delete the prw files since they are inconsistent
    os.system("rm %s" % (mc16a_file.final_file()))
    os.system("rm %s" % (mc16d_file.final_file()))

    print "INFO: Found the following %d AODs to submit prw jobs:" % (len(AODs))
    for A in AODs:
        print "       *** %s" % (A)
    print "#############################################################################################"
    submitPRWFiles(DataSets=AODs,
                   RUCIO=RunOptions.rucio,
                   RSE=RunOptions.destRSE,
                   Official=RunOptions.official)
    return False

Exemple #16

0

Afficher le fichier

Fichier : AMIDataBase.py Projet : jonipham/WZanalysis_release21

    def getMCDataSets(self,
                      channels=[],
                      campaign="mc16_13TeV",
                      derivations=[]):
        getAmiClient()
        import pyAMI.client
        import pyAMI.atlas.api as AtlasAPI

        data_type = ClearFromDuplicates(["AOD"] + derivations)
        channels_to_use = []
        # Check only the dsid which are non-existent or not complete
        for mc in channels:
            ami_channel = self.getMCchannel(dsid=mc, campaign=campaign)
            if not ami_channel:
                channels_to_use.append(mc)
                continue
            # Check if the dsid is already complete w.r.t all data-formats
            to_append = False
            for data in data_type:
                if not ami_channel.hasDataType(data):
                    to_append = True
                if to_append:
                    break
            if to_append:
                channels_to_use.append(mc)

        Blocks = []
        # Try to block the queries in DSIDS of thousands
        for mc in channels_to_use:
            FirstDigits = int(str(mc)[0:3])
            if FirstDigits not in Blocks:
                Blocks.append(FirstDigits)
        # Summarizing into blocks leads to a huge reduction of queries
        if len(Blocks) < len(channels_to_use):
            channels_to_use = Blocks
        print "<AMIDataBase> INFO: going to ask AMI about %d different things" % (
            len(channels_to_use))
        prompt = max(int(len(channels_to_use) / 10), 2)
        for i, mc in enumerate(channels_to_use):
            if i % prompt == 0:
                print "<AMIDataBase> INFO: %d/%d stubbed AMI :-P" % (
                    i, len(channels_to_use))
            # AMI query
            DSIDS = AtlasAPI.list_datasets(
                getAmiClient(),
                patterns=["%s.%i%%.%%" % (campaign, mc)],
                fields=[
                    'type',
                    'events',
                    'ami_status',
                    "physics_short",
                    "dataset_number",
                    "cross_section",
                    "prodsys_status",
                ],
                ### Maximum 1000 datasets and foreach one
                limit=[1, 1000 * 50],
                type=data_type)
            for amiDS in DSIDS:
                DS = int(amiDS["dataset_number"])
                ami_entry = self.getMCchannel(dsid=DS, campaign=campaign)
                # a fresh AMImcEntry needs to be created
                if not ami_entry:
                    physics_name = amiDS["physics_short"]
                    try:
                        xS = float(amiDS["cross_section"])
                    except Exception:
                        print "<AMIDataBase> WARNING: No x-section found for %s (%i) in AMI" % (
                            physics_name, DS)
                        xS = 1.
                    ami_entry = AMImcEntry(dsid=DS,
                                           xsec=xS,
                                           physics_name=physics_name,
                                           campaign=campaign)
                    self.__mc_channels.append(ami_entry)
                ds_type = amiDS["type"]
                tag = self.__getDSTag(amiDS['ldn'])
                nevents = int(amiDS['events'])
                ami_entry.addDataset(data_type=ds_type,
                                     tag=tag,
                                     events=nevents,
                                     status="")
        return True

Exemple #17

0

Afficher le fichier

    def clearFromDuplicates(self, directory=""):
        print "INFO: Clear input of %s from duplicates and empty datasets" % (
            self.final_file())
        samples = []
        #### After downloading everything we need to clean it from the duplicates and remove
        #### all dataset containers which are empty
        #### To remove the empty datasets the fastest way is to check if the rucio download directory contains
        #### ROOT files
        for i in range(len(self.datasets())):
            ### We know persuade a now approach. It seems that extensions of a dataset are assigned
            ### to different NTUP_PILEUP p-tags so we need to download them all?
            ds_list = []
            dsid_to_check = GetPRW_datasetID(self.__datasets[i])
            tag = GetAMITagsMC(self.__datasets[i],
                               SkimPTag=True,
                               SkimSTag=False,
                               SkimETag=False)
            while i < len(self.datasets()) and (
                    GetPRW_datasetID(self.__datasets[i]) == dsid_to_check
                    and tag == GetAMITagsMC(self.__datasets[i],
                                            SkimPTag=True,
                                            SkimSTag=False,
                                            SkimETag=False)):
                ds = self.__datasets[i]
                smp_dir = "%s/%s" % (directory, ds)
                if os.path.isdir(smp_dir) and len(
                    [f for f in os.listdir(smp_dir) if IsROOTFile(f)]) > 0:
                    ds_list += [ds]
                i += 1

            if len(ds_list) == 0: continue
            if len(ds_list) > 1:
                ds_pairs = [(x, self.events_in_prwFile(directory, x))
                            for x in ds_list]
                ds_list = [
                    d[0]
                    for d in sorted(ds_pairs, key=lambda x: x[1], reverse=True)
                ]
            if self.__check_consistency:
                ### Setup the PileupHelper instance to read the prw config files
                ami_lookup = getAMIDataBase().getMCchannel(
                    dsid_to_check, "%s" % (self.campaign()))
                if not ami_lookup:
                    print "WARNING: The dataset %s does not exist in AMI at all. Interesting that we made prw files out of it" % (
                        ds)
                    continue

                config_file_tag = GetAMITagsMC(DS=ds_list[0],
                                               SkimPTag=True,
                                               SkimETag=False,
                                               SkimSTag=False)
                ev_in_ami = ami_lookup.getEvents(tag=config_file_tag)

                if ev_in_ami == -1:
                    print "WARNING: no AMI tag could be found for dataset %s " % (
                        ds)
                    for T in ami_lookup.getTags():
                        print "        --- %s: %d" % (
                            T, ami_lookup.getEvents(tag=T))
                    continue

                ds_to_add = []
                ev_in_prw = 0
                for ds in ds_list:
                    ev_in_ds = self.events_in_prwFile(directory, ds)
                    ev_in_prw += ev_in_ds
                    if ev_in_ds == ev_in_ami:
                        ds_to_add = [ds]
                        break
                    ### We still can add datasets
                    if ev_in_ami >= ev_in_prw:
                        ds_to_add += [ds]
                        if ev_in_ami == ev_in_prw: break

                if ev_in_prw != ev_in_ami:
                    print "WARNING: %s has different number of events in AMI (%d) vs. NTUP_PILEUP (%d)" % (
                        ds, ev_in_ami, ev_in_prw)
                    self.__inconsistent_log += [
                        "%s    %d  %d" % (ds, ev_in_ami, ev_in_prw)
                        for ds in ds_list
                    ]
                    ds_to_add = []
                ### Somehow we've more events in the config file than in AMI... Definetly a candidte to blacklist
                if ev_in_ami < ev_in_prw: self.__to_black_list += ds_to_add

            samples += ds_list

        if self.__check_consistency:
            WriteList(samples, "%s/Finished.txt" % (self.download_dir()))
        new_dsids = ClearFromDuplicates(
            [GetPRW_datasetID(ds) for ds in samples])
        if len(self.dsids()) != len(new_dsids):
            self.__dsids = sorted(new_dsids)
            self.__purged = sorted([
                GetPRW_datasetID(ds) for ds in self.__datasets
                if GetPRW_datasetID(ds) not in self.dsids()
            ])
            print "INFO: %d dsids have been eliminated since all input files are invalid." % (
                len(self.purged()))

        #### for the removal the sorting is important
        #### 1) official vs. privately produced
        #### 2) Newer ptag vs old
        ##samples = sorted(samples, cmp=lambda x,y: PRWdatasetSorter(x,y))
        AOD_Samples = []
        for s in samples:
            AOD = "%d.%s" % (
                GetPRW_datasetID(s),
                GetAMITagsMC(s, SkimPTag=True, SkimETag=False, SkimSTag=False))
            if not AOD in AOD_Samples:
                self.__datasets += [s]
                AOD_Samples.append(AOD)

        print "INFO: Will merge %d files to %s" % (len(
            self.datasets()), self.final_file())

Exemple #18

0

Afficher le fichier

Fichier : BaseToolSetup.py Projet : jonipham/WZanalysis_release21

def configurePRWtool(offset=0):
    from AthenaCommon.AppMgr import ServiceMgr
    from PyUtils import AthFile
    from ClusterSubmission.Utils import ResolvePath, ClearFromDuplicates
    recoLog = logging.getLogger('XAMPP getPrwConfig')

    use1516Data = isData()
    use17Data = isData()
    use18Data = isData()

    ### The actual mu config file is needed to activate the actual mu reweighting recommended for mc16d & mc16e
    ### https://indico.cern.ch/event/712774/contributions/2928042/attachments/1614637/2565496/prw_mc16d.pdf
    prwConfig_mc16a = []
    prwConfig_mc16d = getGRL(17, flavour='actualMu')
    prwConfig_mc16e = getGRL(18, flavour='actualMu')
    run_channel = [] if isData() else [(getRunNumbersMC(), getMCChannelNumber() + offset)]
    athArgs = getAthenaArgs()
    if not isData() and (len(ServiceMgr.EventSelector.InputCollections) > 1 and athArgs.parseFilesForPRW):
        recoLog.info("Run a local job. Try to find foreach job the prw-config file")
        for i, in_file in enumerate(ServiceMgr.EventSelector.InputCollections):
            recoLog.info("Look up the channel number for %s" % (in_file))
            ### That file is used to read the meta-data we do not need to open it twice
            if i == 0: continue
            af = AthFile.fopen(in_file)
            afII = not isData() and 'tag_info' in af.fileinfos and len(
                [key for key in af.fileinfos['tag_info'].iterkeys() if 'AtlfastII' in key or 'Fast' in key]) > 0
            mc_runNumber = af.fileinfos["run_number"][0] if len(af.fileinfos["run_number"]) > 0 else -1
            mc_channel = af.fileinfos["mc_channel_number"][0] if not isData() and len(af.fileinfos["mc_channel_number"]) > 0 else -1
            ## If the user mixes AFII with fullsim calibration
            ## the resuls are likely to mismatch. We must prevent this and kill
            ## the job
            if afII != isAF2():
                recoLog.error("You are mixing AFII with Fullsim files. Scale-factors and jet calibration are largely affected. Please fix")
                exit(1)
            run_channel += [(mc_runNumber, mc_channel + offset)]
    ## Find the central repo
    for period_num, mc_channel in run_channel:
        if period_num == 284500:
            config_file = ResolvePath("dev/PileupReweighting/share/DSID{dsid_short}xxx/pileup_mc16a_dsid{dsid}_{sim}.root".format(
                dsid_short=str(mc_channel)[0:3], dsid=mc_channel, sim="AFII" if isAF2() else "FS"))
            use1516Data = True
            if not config_file: continue
            prwConfig_mc16a += [config_file]
        elif period_num == 300000:
            config_file = ResolvePath("dev/PileupReweighting/share/DSID{dsid_short}xxx/pileup_mc16d_dsid{dsid}_{sim}.root".format(
                dsid_short=str(mc_channel)[0:3], dsid=mc_channel, sim="AFII" if isAF2() else "FS"))
            use17Data = True
            if not config_file: continue
            prwConfig_mc16d += [config_file]
        elif period_num == 310000:
            config_file = ResolvePath("dev/PileupReweighting/share/DSID{dsid_short}xxx/pileup_mc16e_dsid{dsid}_{sim}.root".format(
                dsid_short=str(mc_channel)[0:3], dsid=mc_channel, sim="AFII" if isAF2() else "FS"))
            use18Data = True
            if not config_file: continue
            prwConfig_mc16e += [config_file]
        else:
            recoLog.warning("Nothing has been found for the sample %d in prw period %d" % (mc_channel, period_num))
            continue

    ConfigFiles = []
    if use1516Data: ConfigFiles += prwConfig_mc16a
    if use17Data: ConfigFiles += prwConfig_mc16d
    if use18Data: ConfigFiles += prwConfig_mc16e
    return sorted(ClearFromDuplicates(ConfigFiles)), getLumiCalcConfig(use1516Data=use1516Data, use17Data=use17Data, use18Data=use18Data)

Exemple #19

0

Afficher le fichier

def getUnPreScaledTrigger():
    triggers = []
    for trigs in getLowestUnPrescaled().itervalues():
        triggers += trigs
    return ClearFromDuplicates(constrainToPeriods(triggers))