d = "/".join(jec_path.split("/")[:-1])
	prefix = jec_path.split("/")[-1]
	if not os.path.exists(d):
		return False
	txts = [f for f in os.listdir(d) if ".txt" in f]
	for flavor in flavors:
		if not "{}_{}_{}_{}PFchs.txt".format(prefix, ("MC", "DATA")[data == True], flavor, algorithm.upper()) in txts:
			return False
	return True
# /FUNCTIONS:


# SET UP:
## Very basic variables:
out_dir_default = "~/temp"             # This is where output goes when it's not put into EOS by CRAB.
cmssw_version = cmssw.get_version()                   # The CMSSW version that this configuration file is using.

## Construct process:
process = cms.Process("fatjets")

## Set up variables and options:
options = VarParsing('analysis')
### General options:
options.register ('crab',
	False,
	VarParsing.multiplicity.singleton,
	VarParsing.varType.bool,
	"Turn this on from inside crab configuration files."
)
options.register ('data',
	False,
def main():
    # Arguments:
    a = variables.arguments()
    miniaods = dataset.fetch_entries("miniaod", a.query)
    tstring = utilities.time_string()[:-4]
    suffix = "cutpt{}".format(cut_pt_filter)
    cmssw_version = cmssw.get_version(parsed=False)

    for miniaod in miniaods:
        print "Making condor setup for {} ...".format(miniaod.Name)
        sample = miniaod.get_sample()

        # Create groups of input files:
        groups = []
        group = []
        n_group = 0
        # 	print miniaod.ns
        for i, n in enumerate(miniaod.ns):
            n_group += n
            group.append(miniaod.files[i])
            if (n_group >= n_per) or (i == len(miniaod.ns) - 1):
                groups.append(group)
                group = []
                n_group = 0
        print "\tCreating {} jobs ...".format(len(groups))

        # Prepare directories:
        path = "condor_jobs/tuplizer/{}/{}_{}_{}".format(tstring, miniaod.subprocess, miniaod.generation, suffix)
        log_path = path + "/logs"
        if not os.path.exists(path):
            os.makedirs(path)
        if not os.path.exists(log_path):
            os.makedirs(log_path)
        eos_path = "/store/user/tote/{}/tuple_{}_{}_{}/{}".format(
            sample.name, miniaod.subprocess, miniaod.generation, suffix, tstring
        )  # Output path.

        # Create job scripts:
        for i, group in enumerate(groups):
            job_script = "#!/bin/bash\n"
            job_script += "\n"
            job_script += "# Untar CMSSW area:\n"
            job_script += "tar -xzf {}.tar.gz\n".format(cmssw_version)
            job_script += "cd {}/src/Analyzers/FatjetAnalyzer/test\n".format(cmssw_version)
            job_script += "\n"
            job_script += "# Setup CMSSW:\n"
            job_script += "source /cvmfs/cms.cern.ch/cmsset_default.sh\n"
            job_script += "eval `scramv1 runtime -sh`		#cmsenv\n"
            job_script += "\n"
            job_script += "# Run CMSSW:\n"
            list_str = ",".join(['"{}"'.format(g) for g in group])
            out_file = "tuple_{}_{}_{}_{}.root".format(miniaod.subprocess, miniaod.generation, suffix, i + 1)
            job_script += 'cmsRun tuplizer_cfg.py subprocess="{}" generation="{}" cutPtFilter={} outDir="." outFile="{}" inFile={}'.format(
                miniaod.subprocess, miniaod.generation, cut_pt_filter, out_file, list_str
            )
            if sample.data:
                job_script += " data=True".format(sample.data)
            if sample.mask:
                job_script += ' mask="{}"'.format(sample.mask)
            job_script += " &&\n"
            job_script += "xrdcp -f {} root://cmseos.fnal.gov/{} &&\n".format(out_file, eos_path)
            job_script += "rm {}\n".format(out_file)
            with open("{}/job_{}.sh".format(path, i + 1), "w") as out:
                out.write(job_script)

                # Create condor configs:
        for i, group in enumerate(groups):
            job_config = "universe = vanilla\n"
            job_config += "Executable = job_{}.sh\n".format(i + 1)
            job_config += "Should_Transfer_Files = YES\n"
            job_config += "WhenToTransferOutput = ON_EXIT\n"
            job_config += "Transfer_Input_Files = {}.tar.gz\n".format(cmssw_version)
            job_config += 'Transfer_Output_Files = ""\n'
            job_config += "Output = logs/job_{}.stdout\n".format(i + 1)
            job_config += "Error = logs/job_{}.stderr\n".format(i + 1)
            job_config += "Log = logs/job_{}.log\n".format(i + 1)
            job_config += "notify_user = ${LOGNAME}@FNAL.GOV\n"
            job_config += "x509userproxy = $ENV(X509_USER_PROXY)\n"
            job_config += "Queue 1\n"

            with open("{}/job_{}.jdl".format(path, i + 1), "w") as out:
                out.write(job_config)

                # Create run script:
        run_script = "# Update cache info:\n"
        run_script += "bash $HOME/condor/cache.sh\n"
        run_script += "\n"
        run_script += "# Grid proxy existence & expiration check:\n"
        run_script += "PCHECK=`voms-proxy-info -timeleft`\n"
        run_script += 'if [[ ($? -ne 0) || ("$PCHECK" -eq 0) ]]; then\n'
        run_script += "\tvoms-proxy-init -voms cms --valid 168:00\n"
        run_script += "fi\n"
        run_script += "\n"
        run_script += "# Copy python packages to CMSSW area:\n"
        run_script += "cp -r $HOME/decortication/decortication $CMSSW_BASE/python\n"
        run_script += "cp -r $HOME/decortication/resources $CMSSW_BASE/python\n"
        run_script += "cp -r $HOME/truculence/truculence $CMSSW_BASE/python\n"
        run_script += "\n"
        run_script += "# Make tarball:\n"
        run_script += "echo 'Making a tarball of the CMSSW area ...'\n"
        run_script += "tar --exclude-caches-all -zcf ${CMSSW_VERSION}.tar.gz -C ${CMSSW_BASE}/.. ${CMSSW_VERSION}\n"
        run_script += "\n"
        run_script += "# Prepare EOS:\n"
        run_script += "eos root://cmseos.fnal.gov mkdir -p {}\n".format(eos_path)
        run_script += "\n"
        run_script += "# Submit condor jobs:\n"
        for i, group in enumerate(groups):
            run_script += "condor_submit job_{}.jdl\n".format(i + 1)
        run_script += "\n"
        run_script += "# Remove tarball:\n"
        run_script += "#rm ${CMSSW_VERSION}.tar.gz\n"  # I if remove this, the jobs might complain.
        run_script += "\n"
        run_script += "# Remove python packages:\n"
        run_script += "#rm -rf $CMSSW_BASE/python/decortication\n"
        run_script += "#rm -rf $CMSSW_BASE/python/resources\n"
        run_script += "#rm -rf $CMSSW_BASE/python/truculence\n"

        with open("{}/run.sh".format(path), "w") as out:
            out.write(run_script)

        print "\tThe jobs are in {}".format(path)
    return True
Example #3
0
    def create_jobs(self, cmd="", memory=2000, input_files=None):
        # Create condor jobs for each input file.
        #		if not cmd:
        #			print "ERROR (analyzer.create_jobs): You need to specify a cmd to run for each job."
        #			return False

        # Define variables:
        Site = dataset.Site
        data_dir = Site.get_dir("data")
        cmssw_version = cmssw.get_version(parsed=False)
        if not cmd:
            cmd = "python {}.py -f %%FILE%% -o job_%%N%%.root".format(
                self.name)
        tstring = utilities.time_string()[:-4]
        path = "condor_jobs/{}/{}".format(self.name, tstring)
        log_path = path + "/logs"
        #		out_path = path + "/results"
        out_path = os.path.join(data_dir.path, "analyzer_jobs",
                                tstring)  # Output path.
        files_for_condor = [
            "{}/{}.py".format(os.getcwd(), self.name),
            "{}.tar.gz".format(cmssw_version)
        ]
        if isinstance(input_files, str): input_files = [input_files]
        if input_files:
            input_files = [
                os.getcwd() + "/" + f for f in input_files if "/" not in f
            ]
        if input_files: files_for_condor.extend(input_files)

        # Make directories
        for p in [path, log_path]:
            if not os.path.exists(p): os.makedirs(p)

        # Make job files:
        files = self.get_files(info=True)
        ## Make job scripts:
        for i, f_dict in enumerate(files):
            f = f_dict["file"]
            if f[:12] == "/store/user/": f = "root://cmseos.fnal.gov/" + f
            job_name = "analyzer_job{}_{}".format(i + 1, tstring)
            job_script = "#!/bin/bash\n"
            job_script += "\n"
            job_script += "# Untar CMSSW area:\n"
            job_script += "tar -xzf {}.tar.gz &&\n".format(cmssw_version)
            for input_file in input_files:
                input_file = input_file.split("/")[-1]
                if "CMSSW_" not in f:
                    job_script += "cp {} {}/src/Analyzers/FatjetAnalyzer/test\n".format(
                        input_file, cmssw_version)
            job_script += "cd {}/src/Analyzers/FatjetAnalyzer/test\n".format(
                cmssw_version)
            job_script += "\n"
            job_script += "# Setup CMSSW:\n"
            job_script += "source /cvmfs/cms.cern.ch/cmsset_default.sh\n"
            job_script += "scramv1 b ProjectRename\n"
            job_script += "eval `scramv1 runtime -sh`		#cmsenv\n"
            job_script += "\n"
            job_script += cmd.replace("%%FILE%%", f).replace(
                "%%PROCESS%%", f_dict["process"]).replace("%%N%%",
                                                          str(i + 1)) + "\n"
            if data_dir.eos:
                job_script += "xrdcp -f job_{}.root root://{}/{}\n".format(
                    i + 1, Site.url_eos, out_path)
            else:
                job_script += "mv -f job_{}.root {}\n".format(i + 1, out_path)
            with open("{}/{}.sh".format(path, job_name), "w") as out:
                out.write(job_script)

        ## Make condor configs:
        for i in range(len(files)):
            job_name = "analyzer_job{}_{}".format(i + 1, tstring)
            job_config = "universe = vanilla\n"
            job_config += "Executable = {}.sh\n".format(job_name)
            job_config += "Should_Transfer_Files = YES\n"
            job_config += "WhenToTransferOutput = ON_EXIT\n"
            job_config += "Transfer_Input_Files = {}\n".format(
                ",".join(files_for_condor))
            #			job_config += "Output_Destination = results\n"
            #			job_config += "Transfer_Output_Files = job_{}.root\n".format(i+1)
            job_config += "Transfer_Output_Files = \"\"\n"
            job_config += "Output = logs/{}.stdout\n".format(job_name)
            job_config += "Error = logs/{}.stderr\n".format(job_name)
            job_config += "Log = logs/{}.log\n".format(job_name)
            if Site.name == "hexcms":
                job_config += "notify_user = ${LOGNAME}@FNAL.GOV\n"
                job_config += "x509userproxy = $ENV(HOME)/myproxy\n"
            else:
                job_config += "x509userproxy = $ENV(X509_USER_PROXY)\n"
            job_config += "request_memory = {}\n".format(memory)
            job_config += "Queue 1\n"

            with open("{}/{}.jdl".format(path, job_name), "w") as out:
                out.write(job_config)

        ## Make run script:
        run_script = "#!/bin/bash\n"
        run_script += "\n"
        run_script += "# Grid proxy existence & expiration check:\n"
        run_script += "PCHECK=`voms-proxy-info -timeleft`\n"
        run_script += "if [[ ($? -ne 0) || (\"$PCHECK\" -eq 0) ]]; then\n"
        run_script += "\tvoms-proxy-init -voms cms --valid 168:00\n"
        run_script += "fi\n"
        run_script += "\n"
        run_script += "# Make tarball:\n"
        run_script += "echo 'Making a tarball of the CMSSW area ...'\n"
        run_script += "tar --exclude-caches-all -zcf ${CMSSW_VERSION}.tar.gz -C ${CMSSW_BASE}/.. ${CMSSW_VERSION}\n"
        run_script += "\n"
        run_script += "# Prepare output directory:\n"
        if data_dir.eos:
            run_script += "eos root://{} mkdir -p {}\n".format(
                Site.url_eos, out_path)
        else:
            run_script += "mkdir -p {}\n".format(out_path)
        run_script += "\n"
        run_script += "# Submit condor jobs:\n"
        for i in range(len(files)):
            job_name = "analyzer_job{}_{}".format(i + 1, tstring)
            run_script += "condor_submit {}.jdl\n".format(job_name)

        with open("{}/run.sh".format(path), "w") as out:
            out.write(run_script)

        print "The jobs are in {}".format(path)

        return path