Beispiel #1
0
def main():
	# Setup:
	## ROOT:
	gROOT.SetBatch()
	
	## Arguments:
	a = variables.arguments()
	args = a.args
	
	tuples = a.input
	if not tuples:
		tuples = dataset.fetch_entries("tuple", a.query)
		tuples = dataset.sort_datasets(tuples, collapse=True)		# This combines "extension" datasets in addition to other things.
	else:
		tuples = {args.process: tuples}
	if not tuples:
		print "[!!] ERROR: The arguments provided don't define any input."
		sys.exit()
	
	## Combine jetht15 and jetht16:
	for name in ["jetht15", "jetht16"]:
		if name in tuples:
			if "jetht" not in tuples: tuples["jetht"] = []
			tuples["jetht"].extend(tuples[name])
			tuples.pop(name, None)
	
	
	## Print an introduction:
	print "The analyzer will run over the following tuples:"
	if isinstance(tuples, dict):
		for key, tups in tuples.items():
			print "\t* {}:".format(key)
			for tup in tups:
				print "\t\t* {}".format(tup)
	else:
		print "\t{}".format(tuples)
	out_dir = None
	out_file = None
	if args.output:
		out_file = args.output.split("/")[-1]
		out_dir = "/".join(args.output.split("/")[:-1])
	
	ana = analyzer.analyzer(tuples, save=True, v=args.verbose, out_file=out_file, use_condor=args.condor)
	vs_out = get_variables()
	ana.define_branches(vs_out)
#	branches = create_tuples(ana)
#	colors = color.pick(len(tuples.keys()))
	
	# Event loop:
	for key, loop in ana.loops.iteritems():
		loop.treatment = treat_event
#		print "here"
		loop.progress = True if not ana.condor else False
		loop.run(n=args.n, rand=False, arguments={"alg": args.algorithm})
#	event_loop(ana.tt[key]["analyzer/events"], branches[key], ana.tuples[key], n_events=n_events_sq)
	# Output:
	ana.write()
	print ana.out_path
def main():
	# Setup:
	## ROOT:
	gROOT.SetBatch()
	
	## Arguments:
	a = variables.arguments()
	args = a.args
	tuples = dataset.fetch_entries("tuple", a.query)
#	print tuples
#	print
	tuples = dataset.sort_datasets(tuples)
	
	## Print an introduction:
	print "The analyzer will run over the following tuples:"
	for key, tups in tuples.items():
		print "\t* {}:".format(key)
		for tup in tups:
			print "\t\t* {}".format(tup)
	
#	print tuples
#	sys.exit()
	
	## Analyzer object:
#	tuples = {}
	
#	### food = 1:
#	for tup in tups:
#		tup.set_connections(down=False, up=True)
#		process = tup.sample.process
#		if process not in tuples:
#			tuples[process] = []
#		tuples[process].extend(["root://cmsxrootd.fnal.gov/" + f for f in tup.files])
##		for ds in dss:
##			if ds.analyze:
##				for path in ds.tuple_path:
##					path_full = "root://cmsxrootd.fnal.gov/" + path
##					tuples[process].append(path_full)
#	print tuples
	
	### food = 2:
#	tuples = {(tup.Name_safe if [t.process for t in tups].count(tup.process) > 1 else tup.process): tup for tup in tups}
	ana = analyzer.analyzer(tuples, save=True, v=args.verbose, count=False)		# Add "out_dir=" and "out_file=".
	ana.define_branches(vs_out)
#	branches = create_tuples(ana)
#	colors = color.pick(len(tuples.keys()))
	
	# Event loop:
	for key, loop in ana.loops.iteritems():
		loop.treatment = treat_event
#		print "here"
		loop.progress = False
		loop.run(n=args.n, rand=False, arguments={"alg": args.algorithm})
#	event_loop(ana.tt[key]["analyzer/events"], branches[key], ana.tuples[key], n_events=n_events_sq)
	# Output:
	ana.write()
	print ana.out_path
def main():
	a = variables.arguments()
	miniaods = dataset.fetch_entries("miniaod", a.query)
	suffix = a.suffix
	if not suffix:
		suffix = "cutpt400"
	indate = a.args.dir
	if not indate:
		indate = "161118_062506"
	
	for miniaod in miniaods:
		indir = "/uscms/home/tote/8_0_20/Analyzers/FatjetAnalyzer/test/condor_jobs/tuplizer/{}/{}_{}_{}".format(indate, miniaod.subprocess, miniaod.generation, suffix)
		print "\nStatusing {}".format(miniaod.Name)
		print "[..] Scanning the condor directory."
		jdls = [f for f in os.listdir(indir) if ".jdl" in f]
		njobs = len(jdls)
	
		log_dict = list_logs(indir)
		logs_log = log_dict["log"]
		logs_stdout = log_dict["stdout"]
		logs_stderr = log_dict["stderr"]
	
		print "[OK] Total jobs: {}".format(njobs)
		good = True
		if len(logs_log) != njobs:
			print "[!!] There are only {} .log logs".format(len(logs_log))
			good = False
		if len(logs_stdout) != njobs:
			print "[!!] There are only {} .stdout logs".format(len(logs_stdout))
			good = False
		if len(logs_stderr) != njobs:
			print "[!!] There are only {} .stderr logs".format(len(logs_stderr))
			good = False
		if good: print "[OK] All logs accounted for."
		else: sys.exit()
	
		print "[..] Checking jobs."
		jobs_bad, jobs_error, jobs_unsubmitted = check_stderr_logs(indir, logs_stderr)
		if jobs_bad:
			print "[!!] There were problems with {} jobs:".format(len(jobs_bad))
			print jobs_bad
			good = False
		if jobs_error:
			print "[!!] There were errors in {} of these jobs:".format(len(jobs_error))
			print jobs_error
			good = False
		if jobs_unsubmitted:
			print "[!!] There are {} unsubmitted jobs:".format(len(jobs_unsubmitted))
			print jobs_unsubmitted
			good = False
		if good:
			print "[OK] All jobs completed successfully."
def main():
	# Arguments
	a = variables.arguments()
	if not a.kinds:
		a.kinds = variables.all_kinds
	
	# Fetch datasets:
	results = {}
	for kind in a.kinds:
		entries = dataset.fetch_entries(kind, a.query)
		for entry in entries:
			if entry.subprocess not in results:
				results[entry.subprocess] = {}
			if entry.kind not in results[entry.subprocess]:
				results[entry.subprocess][entry.kind] = []
			results[entry.subprocess][entry.kind].append(entry)
	
	# Print stuff:
	for subprocess, kinds in results.items():
		for kind, entries in kinds.items():
			for entry in entries:
				entry.Print()
		print "=========================================================="
	return True
Beispiel #5
0
def main():
    # Arguments
    a = variables.arguments()
    if not a.kinds:
        a.kinds = variables.all_kinds

    # Fetch datasets:
    results = {}
    for kind in a.kinds:
        entries = dataset.fetch_entries(kind, a.query)
        for entry in entries:
            if entry.subprocess not in results:
                results[entry.subprocess] = {}
            if entry.kind not in results[entry.subprocess]:
                results[entry.subprocess][entry.kind] = []
            results[entry.subprocess][entry.kind].append(entry)

    # Print stuff:
    for subprocess, kinds in results.items():
        for kind, entries in kinds.items():
            for entry in entries:
                entry.Print()
        print "=========================================================="
    return True
Beispiel #6
0
def main():
    # Arguments:
    a = variables.arguments()
    args = a.args
    if not a.kinds:
        a.kinds = variables.all_kinds
    j = not args.json

    # Step 1: check if anything needs to be added to or updated in the DB:
    print "Step 0: Checking for any new entries that need to be added to the DB ..."
    datasets = dataset.parse_db_yaml(completed=True)

    print "\t[..] Checking samples.yaml against the DB."
    n_added = 0
    for kind, dss in datasets.items():
        for ds in dss:
            check_result = dataset.check_yaml_against_db(ds)
            # Deal with entries that aren't in the DB:
            if not check_result:
                n_added += 1
                print "\t[..] Adding {} to the DB.".format(ds.Name)
                ds.write()

    ## Print a summary:
    print "Step 0 summary:"
    if n_added:
        print "\t{} entries added.".format(n_added)
    else:
        print "\tNothing needed to be added."

    # Step 1: check if anything needs to be updated in the DB:
    print "Step 1: Checking if any specified entries in the DB need to be updated ..."
    dss = []
    for kind in a.kinds:
        dss += dataset.fetch_entries(kind, a.query)

    print "\t[..] Checking samples.yaml against the DB (excluding tuples)."
    n_updated = 0
    for ds in dss:
        if ds.kind != "tuple":
            print "\t{} ({})".format(ds.Name, ds.kind)
            check_result = dataset.check_db_against_yaml(ds)
            if not check_result: continue
            # Update entries in the DB that need updating (e.g., if you recently edited "samples.yaml"):
            keys_update = [
                key for key, value in check_result.items()
                if value["change"] and key != "time"
            ]
            if keys_update:
                n_updated += 1
                info = {key: check_result[key]["new"] for key in keys_update}
                print "\tUpdating the following values for {} ...".format(
                    ds.Name)
                print "\t{}".format(info)
                ds.update(info)

    ## Print a summary:
    print "Step 1 summary:"
    if n_updated:
        print "\t{} entries updated.".format(n_updated)
    else:
        print "\tNothing needed to be updated."

    # Step 2: search (but don't scan) for new tuples:
    print "Step 2: Searching for new tuples ..."
    tuples_new = discover_tuples()
    n_added = 0
    for tup in tuples_new:
        #		print tup.update({"path": tup.path})
        print "Adding {} to the DB ...".format(tup.Name)
        tup.write()
        n_added += 1

    ## Print a summary:
    print "Step 2 summary:"
    if n_added:
        print "\t{} tuples added.".format(n_added)
    else:
        print "\tNo tuples needed to be added."

    # Step 3: Fetch entries to scan:
    print "Step 3: Scanning entries ..."
    entries_dict = {}
    for kind in a.kinds:
        entries_dict[kind] = dataset.fetch_entries(kind, a.query)
    n_entries = sum([len(l) for l in entries_dict.values()])
    if n_entries:
        print "\tThere are {} entries to scan.".format(n_entries)
    else:
        print "\tThere were no entries to scan. Your query was the following:\n{}\nkinds = {}".format(
            a.query, a.kinds)
    for kind, entries in entries_dict.items():
        for entry in entries:
            #			print entry.das
            #			print dataset.set_files(entry, j=j, DAS=entry.das)
            print "\tFixing {} ...".format(entry.Name)
            try:
                entry.fix()
            except Exception as ex:
                print "[!!] Fixing failed:"
                print ex
            print "\tScanning {} ...".format(entry.Name)
            try:
                entry.scan(j=j)
            except Exception as ex:
                print "[!!] Scanning failed:"
                print ex
    return True
import os, sys
from decortication import variables, dataset, production

# VARIABLES:
cut_pt_filter = 400		# The eta 2.5 cut is the default.
# /VARIABLES

if __name__ == "__main__":
	# Prepare:
	a = variables.arguments()
	path = "crab_configs"
	if not os.path.exists(path):
		os.makedirs(path)
	
	# Write configs:
	miniaods = dataset.fetch_entries("miniaod", a.query)
	for miniaod in miniaods:
		print "Making a configuration file for {} ...".format(miniaod.Name)
		config = production.get_crab_config(
			kind="tuple",
			miniaod=miniaod,
			cmssw_config="tuplizer_cfg.py",
			cut_pt_filter=cut_pt_filter
		)
		with open(path + "/{}.py".format(miniaod.Name), "w") as out:
			out.write(config)
def main():
    # Arguments:
    a = variables.arguments()
    miniaods = dataset.fetch_entries("miniaod", a.query)
    tstring = utilities.time_string()[:-4]
    suffix = "cutpt{}".format(cut_pt_filter)
    cmssw_version = cmssw.get_version(parsed=False)

    for miniaod in miniaods:
        print "Making condor setup for {} ...".format(miniaod.Name)
        sample = miniaod.get_sample()

        # Create groups of input files:
        groups = []
        group = []
        n_group = 0
        # 	print miniaod.ns
        for i, n in enumerate(miniaod.ns):
            n_group += n
            group.append(miniaod.files[i])
            if (n_group >= n_per) or (i == len(miniaod.ns) - 1):
                groups.append(group)
                group = []
                n_group = 0
        print "\tCreating {} jobs ...".format(len(groups))

        # Prepare directories:
        path = "condor_jobs/tuplizer/{}/{}_{}_{}".format(tstring, miniaod.subprocess, miniaod.generation, suffix)
        log_path = path + "/logs"
        if not os.path.exists(path):
            os.makedirs(path)
        if not os.path.exists(log_path):
            os.makedirs(log_path)
        eos_path = "/store/user/tote/{}/tuple_{}_{}_{}/{}".format(
            sample.name, miniaod.subprocess, miniaod.generation, suffix, tstring
        )  # Output path.

        # Create job scripts:
        for i, group in enumerate(groups):
            job_script = "#!/bin/bash\n"
            job_script += "\n"
            job_script += "# Untar CMSSW area:\n"
            job_script += "tar -xzf {}.tar.gz\n".format(cmssw_version)
            job_script += "cd {}/src/Analyzers/FatjetAnalyzer/test\n".format(cmssw_version)
            job_script += "\n"
            job_script += "# Setup CMSSW:\n"
            job_script += "source /cvmfs/cms.cern.ch/cmsset_default.sh\n"
            job_script += "eval `scramv1 runtime -sh`		#cmsenv\n"
            job_script += "\n"
            job_script += "# Run CMSSW:\n"
            list_str = ",".join(['"{}"'.format(g) for g in group])
            out_file = "tuple_{}_{}_{}_{}.root".format(miniaod.subprocess, miniaod.generation, suffix, i + 1)
            job_script += 'cmsRun tuplizer_cfg.py subprocess="{}" generation="{}" cutPtFilter={} outDir="." outFile="{}" inFile={}'.format(
                miniaod.subprocess, miniaod.generation, cut_pt_filter, out_file, list_str
            )
            if sample.data:
                job_script += " data=True".format(sample.data)
            if sample.mask:
                job_script += ' mask="{}"'.format(sample.mask)
            job_script += " &&\n"
            job_script += "xrdcp -f {} root://cmseos.fnal.gov/{} &&\n".format(out_file, eos_path)
            job_script += "rm {}\n".format(out_file)
            with open("{}/job_{}.sh".format(path, i + 1), "w") as out:
                out.write(job_script)

                # Create condor configs:
        for i, group in enumerate(groups):
            job_config = "universe = vanilla\n"
            job_config += "Executable = job_{}.sh\n".format(i + 1)
            job_config += "Should_Transfer_Files = YES\n"
            job_config += "WhenToTransferOutput = ON_EXIT\n"
            job_config += "Transfer_Input_Files = {}.tar.gz\n".format(cmssw_version)
            job_config += 'Transfer_Output_Files = ""\n'
            job_config += "Output = logs/job_{}.stdout\n".format(i + 1)
            job_config += "Error = logs/job_{}.stderr\n".format(i + 1)
            job_config += "Log = logs/job_{}.log\n".format(i + 1)
            job_config += "notify_user = ${LOGNAME}@FNAL.GOV\n"
            job_config += "x509userproxy = $ENV(X509_USER_PROXY)\n"
            job_config += "Queue 1\n"

            with open("{}/job_{}.jdl".format(path, i + 1), "w") as out:
                out.write(job_config)

                # Create run script:
        run_script = "# Update cache info:\n"
        run_script += "bash $HOME/condor/cache.sh\n"
        run_script += "\n"
        run_script += "# Grid proxy existence & expiration check:\n"
        run_script += "PCHECK=`voms-proxy-info -timeleft`\n"
        run_script += 'if [[ ($? -ne 0) || ("$PCHECK" -eq 0) ]]; then\n'
        run_script += "\tvoms-proxy-init -voms cms --valid 168:00\n"
        run_script += "fi\n"
        run_script += "\n"
        run_script += "# Copy python packages to CMSSW area:\n"
        run_script += "cp -r $HOME/decortication/decortication $CMSSW_BASE/python\n"
        run_script += "cp -r $HOME/decortication/resources $CMSSW_BASE/python\n"
        run_script += "cp -r $HOME/truculence/truculence $CMSSW_BASE/python\n"
        run_script += "\n"
        run_script += "# Make tarball:\n"
        run_script += "echo 'Making a tarball of the CMSSW area ...'\n"
        run_script += "tar --exclude-caches-all -zcf ${CMSSW_VERSION}.tar.gz -C ${CMSSW_BASE}/.. ${CMSSW_VERSION}\n"
        run_script += "\n"
        run_script += "# Prepare EOS:\n"
        run_script += "eos root://cmseos.fnal.gov mkdir -p {}\n".format(eos_path)
        run_script += "\n"
        run_script += "# Submit condor jobs:\n"
        for i, group in enumerate(groups):
            run_script += "condor_submit job_{}.jdl\n".format(i + 1)
        run_script += "\n"
        run_script += "# Remove tarball:\n"
        run_script += "#rm ${CMSSW_VERSION}.tar.gz\n"  # I if remove this, the jobs might complain.
        run_script += "\n"
        run_script += "# Remove python packages:\n"
        run_script += "#rm -rf $CMSSW_BASE/python/decortication\n"
        run_script += "#rm -rf $CMSSW_BASE/python/resources\n"
        run_script += "#rm -rf $CMSSW_BASE/python/truculence\n"

        with open("{}/run.sh".format(path), "w") as out:
            out.write(run_script)

        print "\tThe jobs are in {}".format(path)
    return True
Beispiel #9
0
def main():
	# Arguments:
	a = variables.arguments()
	args = a.args
	if not a.kinds:
		a.kinds = variables.all_kinds
	j = not args.json
	
	# Step 1: check if anything needs to be added to or updated in the DB:
	print "Step 1: Checking if anything needs to be added to or updated in the DB ..."
	datasets = dataset.parse_db_yaml(completed=False)
	
	print "\tChecking samples.yaml against the DB ..."
	n_added = 0
	n_updated = 0
	for kind, dss in datasets.items():
		for ds in dss:
			check_result = ds.check()
			# Deal with entries that aren't in the DB:
			if not check_result:
				n_added += 1
				print "\tAdding {} to the DB ...".format(ds.Name)
				ds.write()
			
			# Update enries in the DB that need updating (e.g., if you recently edited "samples.yaml"):
			else:
				keys_update = [key for key, value in check_result[1].items() if value and key != "time"]		# "check_result[1]" contains information about what should be updated in the DB.
				if keys_update:
					n_updated += 1
					info = {key: getattr(ds, key) for key in keys_update}
					print "\tUpdating the following values for {} ...".format(ds.Name)
					print "\t{}".format(info)
					ds.update(info)
	
	## Print a summary:
	print "Step 1 summary:"
	if n_added:
		print "\t{} entries added.".format(n_added)
	else:
		print "\tNothing needed to be added."
	if n_updated:
		print "\t{} entries updated.".format(n_updated)
	else:
		print "\tNothing needed to be updated."
	
	
	# Step 2: search (but don't scan) for new tuples:
	print "Step 2: Searching for new tuples ..."
	tuples_new = discover_tuples()
	n_added = 0
	for tup in tuples_new:
#		print tup.update({"path": tup.path})
		print "Adding {} to the DB ...".format(tup.Name)
		tup.write()
		n_added += 1
	
	## Print a summary:
	print "Step 2 summary:"
	if n_added:
		print "\t{} tuples added.".format(n_added)
	else:
		print "\tNo tuples needed to be added."
	
	# Step 3: Fetch entries to scan:
	print "Step 3: Scanning entries ..."
	entries_dict = {}
	for kind in a.kinds:
		entries_dict[kind] = dataset.fetch_entries(kind, a.query)
	n_entries = sum([len(l) for l in entries_dict.values()])
	if n_entries:
		print "\tThere are {} entries to scan.".format(n_entries)
	else:
		print "\tThere were no entries to scan. Your query was the following:\n{}\nkinds = {}".format(a.query, a.kinds)
	for kind, entries in entries_dict.items():
		for entry in entries:
#			print entry.das
#			print dataset.set_files(entry, j=j, DAS=entry.das)
			print "\tFixing {} ...".format(entry.Name)
			entry.fix()
			print "\tScanning {} ...".format(entry.Name)
			entry.scan(j=j)
	return True