コード例 #1
0
	def __init__(self,
		tuples,                         # A list or dictionary of input file paths.
		name=None,
		out_dir=None,                   # The directory where the output should go.
		out_file=None,                  # The name of the output file (including ".root")
		save=True,
		v=False,
		tt_names=["analyzer/events"],   # The names of the input TTrees
		count=True,
	):
		# Arguments and variables:
		self.name = inspect.stack()[1][1][:-3] if name == None else name		# This makes the name attribute be the script name if it's not already called something.
		self.time_string = utilities.time_string()[:-4]		# A time string indicating when the analyzer was created
		self.save = save
		self.count = count
		
		# Organize input:
		if isinstance(tuples, dict):
			self.tuples_in = tuples		# This is for potential debugging.
		elif isinstance(tuples, str) or isinstance(tuples, list):
			if isinstance(tuples, str):
				tuples = [tuples]
			if isinstance(tuples, list):
				if isinstance(tuples[0], str):
					tuples = {self.name: tuples}
				else:
					tuples = {self.Name: tuples}
		else:
			print "ERROR (analyzer): \"tuples\" should be a string, list, or dictionary."
		
		# Determine if tuples are raw (file locations) or dataset instances:
		
		if v: print "Making TChain(s) ..."
		self.tt_in = {}
		self.tt_info = {}
		self.tc = TCanvas("tc_{}".format(name), "tc_{}".format(name), 500, 500)
		SetOwnership(self.tc, 0)
		samples = tuples.keys()
		for sample, tups in tuples.iteritems():
			ns = []
			
			# Handle different input schemes (either list of tuples or list of file names):
			food = 2 if isinstance(tups[0], dataset.dataset) else 1
			if food == 2:
				# A bit KLUDGY:
				fs = []
				for tup in tups:
					fs += [f if "root://cmsxrootd.fnal.gov/" in f else "root://cmsxrootd.fnal.gov/" + f for f in tup.files]
					ns += tup.ns
				tups = fs
			if v: print "\tMaking TChain(s) for {} ...".format(sample)
#			self.tt_in[sample] = []
			for tt_name in tt_names:
				if v: print "\t\tTChain named {}:".format(tt_name)
				tt = TChain(tt_name)
				for tup in tups:
					tt.Add(tup)
					if v: print "\t\t+ {}".format(tup)
				SetOwnership(tt, 0)
				if (len(tt_names) > 1) and (len(samples) > 1):
					key = "_".join([sample, tt_name])
				elif (len(tt_names) > 1) and (len(samples) == 1):
					key = tt_name
				elif len(tt_names) == 1:
					key = sample
				else:
					print "ERROR (analyzer.__init__): The tuples configuration is weird:\n{}".format(tuples)
					sys.exit()
				self.tt_in[key] = tt
				info = {
					"ns": ns
				}
				self.tt_info[key] = info
#				self.tc[key] = TCanvas("tc_{}".format(key), "tc_{}".format(key), 500, 500)
#				SetOwnership(self.tc[key], 0)
		
		# ROOT setup:
#		if v: print "\tSetting up ROOT ..."
		gROOT.SetStyle("Plain")
		gStyle.SetTitleBorderSize(0)
		gStyle.SetPalette(1)
		gROOT.SetBatch()                 # Prevent canvas windows from opening
#		SetOwnership(gROOT, 0)
		
		# Organize output:
		if save:
			# Set attributes to defaults if they aren't set:
			## Output directory:
			if not out_dir:
				self.out_dir = "results/{}_{}".format(self.name, self.time_string)
			if not os.path.exists(self.out_dir):
				os.makedirs(self.out_dir)
			## Output file:
			if not out_file:
				self.out_file = "{}_{}.root".format(self.name, self.time_string)
			
			# Define new attributes:
			self.out_path = self.out_dir + "/" + self.out_file
			## ROOT output:
			self.out = TFile(self.out_path, "RECREATE")
			SetOwnership(self.out, 0)
			### Tuple output:
			self.tt_out = {}
			for key, tt in self.tt_in.iteritems():
				self.tt_out[key] = TTree(key, 'anatuple')
				SetOwnership(self.tt_out[key], 0)
			### Histograms:
			self.plots = []
		
		# Event loops
		self.loops = {}
		for key, tt in self.tt_in.iteritems():
			self.loops[key] = event_loop(self, key)
コード例 #2
0
    def __init__(
        self,
        tuples,  # A list or dictionary of input file paths.
        name=None,
        out_dir=None,  # The directory where the output should go.
        out_file=None,  # The name of the output file (including ".root")
        save=True,
        v=False,
        tt_names=["tuplizer/events"],  # The names of the input TTrees
        count=None,
        use_condor=False,
    ):
        # Basic configuration:
        gROOT.SetBatch()  # Prevent ROOT canvas windows from opening

        # Arguments and variables:
        self.name = inspect.stack()[1][
            1][:-3] if name == None else name  # This makes the name attribute be the script name if it's not already called something.
        self.time_string = utilities.time_string(
        )[:-4]  # A time string indicating when the analyzer was created
        self.save = save
        self.count = count
        self.condor = use_condor
        self.out_file = out_file
        self.out_dir = out_dir
        self.tuples_in = tuples  # This is useful debugging.
        self.tuples = {}
        self.tt_names = tt_names

        # Organize input:
        if isinstance(tuples, str):
            tuples = [tuples]
        if isinstance(tuples, list):
            if all(isinstance(tup, str) for tup in tuples):
                self.tuples = {self.name: tuples}
            elif all(isinstance(tup, dataset.dataset) for tup in tuples):
                for tup in tuples:
                    if tup.process not in self.tuples:
                        self.tuples[tup.process] = []
                    self.tuples[tup.process].append(tup)
            else:
                print "ERROR (analyzer): unrecognized input:"
                print self.tuples_in
                sys.exit()
        elif isinstance(tuples, dict):
            self.tuples = tuples
        else:
            print "ERROR (analyzer): \"tuples\" should be a string, list, or dictionary."
            print "tuples = {}".format(tuples)
            sys.exit()

        # Calculate event number if necessary:
        if self.count == None:
            if any([
                    isinstance(tup, str) for tup in utilities.flatten_list(
                        [thing for thing in self.tuples.values()])
            ]):
                self.count = True
            else:
                self.count = False

        ## Create TChains:
        if v: print "Making TChain(s) ..."
        self.tt_in = {}
        self.tt_info = {}
        self.tc = TCanvas("tc_{}".format(name), "tc_{}".format(name), 500, 500)
        SetOwnership(self.tc, 0)
        samples = self.tuples.keys()
        for sample, tups in self.tuples.items():
            ns = []

            # Handle different input schemes (either list of tuples or list of file names):
            food = 2 if isinstance(tups[0], dataset.dataset) else 1
            if food == 2:
                # A bit KLUDGY:
                fs = []
                for tup in tups:
                    fs += [
                        f if not tup.dir.eos else
                        "root://cmsxrootd.fnal.gov/" + f for f in tup.files
                    ]
                    ns += tup.ns
                tups = fs
            if v: print "\tMaking TChain(s) for {} ...".format(sample)
            #			self.tt_in[sample] = []
            if not self.tt_names: tt_names = [sample]
            for tt_name in tt_names:
                if v: print "\t\tTChain named {}:".format(tt_name)
                tt = TChain(tt_name)
                for tup in tups:
                    tt.Add(tup)
                    if v: print "\t\t+ {}".format(tup)
                SetOwnership(tt, 0)
                if (len(tt_names) > 1) and (len(samples) > 1):
                    key = "_".join([sample, tt_name])
                elif (len(tt_names) > 1) and (len(samples) == 1):
                    key = tt_name
                elif len(tt_names) == 1:
                    key = sample
                else:
                    print "ERROR (analyzer.__init__): The tuples configuration is weird:\n{}".format(
                        self.tuples)
                    sys.exit()
                self.tt_in[key] = tt
                info = {"ns": ns}
                self.tt_info[key] = info
#				self.tc[key] = TCanvas("tc_{}".format(key), "tc_{}".format(key), 500, 500)
#				SetOwnership(self.tc[key], 0)

# ROOT setup:
#		if v: print "\tSetting up ROOT ..."
        gROOT.SetStyle("Plain")
        gStyle.SetTitleBorderSize(0)
        gStyle.SetPalette(1)
        #		SetOwnership(gROOT, 0)

        # Organize output:
        if save:
            # Set attributes to defaults if they aren't set:
            ## Output directory:
            if not out_dir and not self.condor:
                self.out_dir = "results/{}_{}".format(self.name,
                                                      self.time_string)
            elif self.condor:
                self.out_dir = "."

            if not os.path.exists(self.out_dir): os.makedirs(self.out_dir)
            ## Output file:
            if not out_file and not self.condor:
                self.out_file = "{}_{}.root".format(self.name,
                                                    self.time_string)
            if self.condor: self.out_file = "job_{}.root".format(self.condor)

            # Define new attributes:
            self.out_path = self.out_dir + "/" + self.out_file
            ## ROOT output:
            self.out = TFile(self.out_path, "RECREATE")
            SetOwnership(self.out, 0)
            ### Tuple output:
            self.tt_out = {}
            for key, tt in self.tt_in.iteritems():
                self.tt_out[key] = TTree(key, 'anatuple')
                SetOwnership(self.tt_out[key], 0)
            ### Histograms:
            self.plots = []

        # Event loops
        self.loops = {}
        for key, tt in self.tt_in.iteritems():
            self.loops[key] = event_loop(self, key)
コード例 #3
0
def main():
    # Arguments:
    a = variables.arguments()
    miniaods = dataset.fetch_entries("miniaod", a.query)
    tstring = utilities.time_string()[:-4]
    suffix = "cutpt{}".format(cut_pt_filter)
    cmssw_version = cmssw.get_version(parsed=False)

    for miniaod in miniaods:
        print "Making condor setup for {} ...".format(miniaod.Name)
        sample = miniaod.get_sample()

        # Create groups of input files:
        groups = []
        group = []
        n_group = 0
        # 	print miniaod.ns
        for i, n in enumerate(miniaod.ns):
            n_group += n
            group.append(miniaod.files[i])
            if (n_group >= n_per) or (i == len(miniaod.ns) - 1):
                groups.append(group)
                group = []
                n_group = 0
        print "\tCreating {} jobs ...".format(len(groups))

        # Prepare directories:
        path = "condor_jobs/tuplizer/{}/{}_{}_{}".format(tstring, miniaod.subprocess, miniaod.generation, suffix)
        log_path = path + "/logs"
        if not os.path.exists(path):
            os.makedirs(path)
        if not os.path.exists(log_path):
            os.makedirs(log_path)
        eos_path = "/store/user/tote/{}/tuple_{}_{}_{}/{}".format(
            sample.name, miniaod.subprocess, miniaod.generation, suffix, tstring
        )  # Output path.

        # Create job scripts:
        for i, group in enumerate(groups):
            job_script = "#!/bin/bash\n"
            job_script += "\n"
            job_script += "# Untar CMSSW area:\n"
            job_script += "tar -xzf {}.tar.gz\n".format(cmssw_version)
            job_script += "cd {}/src/Analyzers/FatjetAnalyzer/test\n".format(cmssw_version)
            job_script += "\n"
            job_script += "# Setup CMSSW:\n"
            job_script += "source /cvmfs/cms.cern.ch/cmsset_default.sh\n"
            job_script += "eval `scramv1 runtime -sh`		#cmsenv\n"
            job_script += "\n"
            job_script += "# Run CMSSW:\n"
            list_str = ",".join(['"{}"'.format(g) for g in group])
            out_file = "tuple_{}_{}_{}_{}.root".format(miniaod.subprocess, miniaod.generation, suffix, i + 1)
            job_script += 'cmsRun tuplizer_cfg.py subprocess="{}" generation="{}" cutPtFilter={} outDir="." outFile="{}" inFile={}'.format(
                miniaod.subprocess, miniaod.generation, cut_pt_filter, out_file, list_str
            )
            if sample.data:
                job_script += " data=True".format(sample.data)
            if sample.mask:
                job_script += ' mask="{}"'.format(sample.mask)
            job_script += " &&\n"
            job_script += "xrdcp -f {} root://cmseos.fnal.gov/{} &&\n".format(out_file, eos_path)
            job_script += "rm {}\n".format(out_file)
            with open("{}/job_{}.sh".format(path, i + 1), "w") as out:
                out.write(job_script)

                # Create condor configs:
        for i, group in enumerate(groups):
            job_config = "universe = vanilla\n"
            job_config += "Executable = job_{}.sh\n".format(i + 1)
            job_config += "Should_Transfer_Files = YES\n"
            job_config += "WhenToTransferOutput = ON_EXIT\n"
            job_config += "Transfer_Input_Files = {}.tar.gz\n".format(cmssw_version)
            job_config += 'Transfer_Output_Files = ""\n'
            job_config += "Output = logs/job_{}.stdout\n".format(i + 1)
            job_config += "Error = logs/job_{}.stderr\n".format(i + 1)
            job_config += "Log = logs/job_{}.log\n".format(i + 1)
            job_config += "notify_user = ${LOGNAME}@FNAL.GOV\n"
            job_config += "x509userproxy = $ENV(X509_USER_PROXY)\n"
            job_config += "Queue 1\n"

            with open("{}/job_{}.jdl".format(path, i + 1), "w") as out:
                out.write(job_config)

                # Create run script:
        run_script = "# Update cache info:\n"
        run_script += "bash $HOME/condor/cache.sh\n"
        run_script += "\n"
        run_script += "# Grid proxy existence & expiration check:\n"
        run_script += "PCHECK=`voms-proxy-info -timeleft`\n"
        run_script += 'if [[ ($? -ne 0) || ("$PCHECK" -eq 0) ]]; then\n'
        run_script += "\tvoms-proxy-init -voms cms --valid 168:00\n"
        run_script += "fi\n"
        run_script += "\n"
        run_script += "# Copy python packages to CMSSW area:\n"
        run_script += "cp -r $HOME/decortication/decortication $CMSSW_BASE/python\n"
        run_script += "cp -r $HOME/decortication/resources $CMSSW_BASE/python\n"
        run_script += "cp -r $HOME/truculence/truculence $CMSSW_BASE/python\n"
        run_script += "\n"
        run_script += "# Make tarball:\n"
        run_script += "echo 'Making a tarball of the CMSSW area ...'\n"
        run_script += "tar --exclude-caches-all -zcf ${CMSSW_VERSION}.tar.gz -C ${CMSSW_BASE}/.. ${CMSSW_VERSION}\n"
        run_script += "\n"
        run_script += "# Prepare EOS:\n"
        run_script += "eos root://cmseos.fnal.gov mkdir -p {}\n".format(eos_path)
        run_script += "\n"
        run_script += "# Submit condor jobs:\n"
        for i, group in enumerate(groups):
            run_script += "condor_submit job_{}.jdl\n".format(i + 1)
        run_script += "\n"
        run_script += "# Remove tarball:\n"
        run_script += "#rm ${CMSSW_VERSION}.tar.gz\n"  # I if remove this, the jobs might complain.
        run_script += "\n"
        run_script += "# Remove python packages:\n"
        run_script += "#rm -rf $CMSSW_BASE/python/decortication\n"
        run_script += "#rm -rf $CMSSW_BASE/python/resources\n"
        run_script += "#rm -rf $CMSSW_BASE/python/truculence\n"

        with open("{}/run.sh".format(path), "w") as out:
            out.write(run_script)

        print "\tThe jobs are in {}".format(path)
    return True
コード例 #4
0
    def create_jobs(self, cmd="", memory=2000, input_files=None):
        # Create condor jobs for each input file.
        #		if not cmd:
        #			print "ERROR (analyzer.create_jobs): You need to specify a cmd to run for each job."
        #			return False

        # Define variables:
        Site = dataset.Site
        data_dir = Site.get_dir("data")
        cmssw_version = cmssw.get_version(parsed=False)
        if not cmd:
            cmd = "python {}.py -f %%FILE%% -o job_%%N%%.root".format(
                self.name)
        tstring = utilities.time_string()[:-4]
        path = "condor_jobs/{}/{}".format(self.name, tstring)
        log_path = path + "/logs"
        #		out_path = path + "/results"
        out_path = os.path.join(data_dir.path, "analyzer_jobs",
                                tstring)  # Output path.
        files_for_condor = [
            "{}/{}.py".format(os.getcwd(), self.name),
            "{}.tar.gz".format(cmssw_version)
        ]
        if isinstance(input_files, str): input_files = [input_files]
        if input_files:
            input_files = [
                os.getcwd() + "/" + f for f in input_files if "/" not in f
            ]
        if input_files: files_for_condor.extend(input_files)

        # Make directories
        for p in [path, log_path]:
            if not os.path.exists(p): os.makedirs(p)

        # Make job files:
        files = self.get_files(info=True)
        ## Make job scripts:
        for i, f_dict in enumerate(files):
            f = f_dict["file"]
            if f[:12] == "/store/user/": f = "root://cmseos.fnal.gov/" + f
            job_name = "analyzer_job{}_{}".format(i + 1, tstring)
            job_script = "#!/bin/bash\n"
            job_script += "\n"
            job_script += "# Untar CMSSW area:\n"
            job_script += "tar -xzf {}.tar.gz &&\n".format(cmssw_version)
            for input_file in input_files:
                input_file = input_file.split("/")[-1]
                if "CMSSW_" not in f:
                    job_script += "cp {} {}/src/Analyzers/FatjetAnalyzer/test\n".format(
                        input_file, cmssw_version)
            job_script += "cd {}/src/Analyzers/FatjetAnalyzer/test\n".format(
                cmssw_version)
            job_script += "\n"
            job_script += "# Setup CMSSW:\n"
            job_script += "source /cvmfs/cms.cern.ch/cmsset_default.sh\n"
            job_script += "scramv1 b ProjectRename\n"
            job_script += "eval `scramv1 runtime -sh`		#cmsenv\n"
            job_script += "\n"
            job_script += cmd.replace("%%FILE%%", f).replace(
                "%%PROCESS%%", f_dict["process"]).replace("%%N%%",
                                                          str(i + 1)) + "\n"
            if data_dir.eos:
                job_script += "xrdcp -f job_{}.root root://{}/{}\n".format(
                    i + 1, Site.url_eos, out_path)
            else:
                job_script += "mv -f job_{}.root {}\n".format(i + 1, out_path)
            with open("{}/{}.sh".format(path, job_name), "w") as out:
                out.write(job_script)

        ## Make condor configs:
        for i in range(len(files)):
            job_name = "analyzer_job{}_{}".format(i + 1, tstring)
            job_config = "universe = vanilla\n"
            job_config += "Executable = {}.sh\n".format(job_name)
            job_config += "Should_Transfer_Files = YES\n"
            job_config += "WhenToTransferOutput = ON_EXIT\n"
            job_config += "Transfer_Input_Files = {}\n".format(
                ",".join(files_for_condor))
            #			job_config += "Output_Destination = results\n"
            #			job_config += "Transfer_Output_Files = job_{}.root\n".format(i+1)
            job_config += "Transfer_Output_Files = \"\"\n"
            job_config += "Output = logs/{}.stdout\n".format(job_name)
            job_config += "Error = logs/{}.stderr\n".format(job_name)
            job_config += "Log = logs/{}.log\n".format(job_name)
            if Site.name == "hexcms":
                job_config += "notify_user = ${LOGNAME}@FNAL.GOV\n"
                job_config += "x509userproxy = $ENV(HOME)/myproxy\n"
            else:
                job_config += "x509userproxy = $ENV(X509_USER_PROXY)\n"
            job_config += "request_memory = {}\n".format(memory)
            job_config += "Queue 1\n"

            with open("{}/{}.jdl".format(path, job_name), "w") as out:
                out.write(job_config)

        ## Make run script:
        run_script = "#!/bin/bash\n"
        run_script += "\n"
        run_script += "# Grid proxy existence & expiration check:\n"
        run_script += "PCHECK=`voms-proxy-info -timeleft`\n"
        run_script += "if [[ ($? -ne 0) || (\"$PCHECK\" -eq 0) ]]; then\n"
        run_script += "\tvoms-proxy-init -voms cms --valid 168:00\n"
        run_script += "fi\n"
        run_script += "\n"
        run_script += "# Make tarball:\n"
        run_script += "echo 'Making a tarball of the CMSSW area ...'\n"
        run_script += "tar --exclude-caches-all -zcf ${CMSSW_VERSION}.tar.gz -C ${CMSSW_BASE}/.. ${CMSSW_VERSION}\n"
        run_script += "\n"
        run_script += "# Prepare output directory:\n"
        if data_dir.eos:
            run_script += "eos root://{} mkdir -p {}\n".format(
                Site.url_eos, out_path)
        else:
            run_script += "mkdir -p {}\n".format(out_path)
        run_script += "\n"
        run_script += "# Submit condor jobs:\n"
        for i in range(len(files)):
            job_name = "analyzer_job{}_{}".format(i + 1, tstring)
            run_script += "condor_submit {}.jdl\n".format(job_name)

        with open("{}/run.sh".format(path), "w") as out:
            out.write(run_script)

        print "The jobs are in {}".format(path)

        return path