def get_running_condor_jobs(self, extra_columns=[]): """ Get list of dictionaries for condor jobs satisfying the classad given by the unique_name, requesting an extra column for the second classad that we submitted the job with (the job number) I.e., each task has the same taskname and each job within a task has a unique job num corresponding to the output file index """ return Utils.condor_q(selection_pairs=[["taskname", self.unique_name]], extra_columns=["jobnum"] + extra_columns, use_python_bindings=True)
def test_condor_submission_and_status(self): basedir = "/tmp/{0}/metis/condor_test/".format(os.getenv("USER")) Utils.do_cmd("mkdir -p {0}".format(basedir)) with open("{0}/temp_test.sh".format(basedir), "w") as fhout: fhout.write("""#!/usr/bin/env bash echo "--- begin header output ---" echo "hostname: $(hostname)" echo "uname -a: $(uname -a)" echo "time: $(date +%s)" echo "args: $@" echo "ls -l output" ls -l # logging every 45 seconds gives ~100kb log file/3 hours dstat -cdngytlmrs --float --nocolor -T --output dsout.csv 45 >& /dev/null & echo "--- end header output ---" # run main job stuff sleep 60s echo "--- begin dstat output ---" cat dsout.csv echo "--- end dstat output ---" kill %1 # kill dstat echo "ls -l output" ls -l """) Utils.do_cmd("chmod a+x {0}/temp_test.sh".format(basedir)) success, cluster_id = Utils.condor_submit( executable=basedir + "temp_test.sh", arguments=["cat", 10, "foo"], inputfiles=[], logdir=basedir, selection_pairs=[["MyVar1", "METIS_TEST"], ["MyVar2", "METIS_TEST2"]]) jobs = Utils.condor_q(selection_pairs=[["MyVar1", "METIS_TEST"], ["MyVar2", "METIS_TEST2"]]) found_job = len(jobs) >= 1 Utils.condor_rm([cluster_id]) self.assertEqual(success, True) self.assertEqual(found_job, True)
# cardnames = [ # "../runs/out_2hdm_scan_v1/proc_card_2hdm_taq_350_0p2.dat", # "../runs/out_2hdm_scan_v1/proc_card_2hdm_taq_350_0p5.dat", # "../runs/out_2hdm_scan_v1/proc_card_2hdm_taq_350_0p8.dat", # "../runs/out_2hdm_scan_v1/proc_card_2hdm_taq_350_1p0.dat", # ] cardnames = glob.glob("../runs/out_phi*v1/*.dat")+glob.glob("../runs/out_zprime*v1/*.dat") baseoutputdir = "/hadoop/cms/store/user/namin/batch_madgraph/" arguments = [] inputfiles = [] jobs = Utils.condor_q(extra_constraint='regexp("proc_card",TransferInput)',use_python_bindings=True) def get_dirproc_condor(x): parts = x["ARGS"].split() return os.path.normpath(parts[0]),parts[1] running_dps = set(map(get_dirproc_condor,jobs)) def get_dirproc_hadoop(x): parts = x.rsplit("/",1) parts[1] = parts[1].rsplit(".",1)[0] return os.path.normpath(parts[0]),parts[1] done_dps = set(map(get_dirproc_hadoop,glob.glob(baseoutputdir+"/*/*.txt"))) for icard,card in enumerate(cardnames): cardbasename = card.split("/")[-1] procdir = card.split("/")[-2] proctag = cardbasename.rsplit(".")[0].replace("proc_card_","") outdir = os.path.normpath("{}/{}/".format(baseoutputdir,procdir))