def __init__(self, **kwargs): # Handle whatever kwargs we want here self.input_files = kwargs.get("input_files", []) self.auxiliary_files = kwargs.get("auxiliary_files", []) self.dataset = kwargs.get("dataset", None) self.work_area = kwargs.get("work_area", "crab/") self.request_name = kwargs.get("request_name", None) self.plugin_name = kwargs.get("plugin_name", "Analysis") self.pset_location = kwargs.get("pset_location", None) self.job_splitting = kwargs.get("job_splitting", "FileBased") self.units_per_job = kwargs.get("units_per_job", 1) if os.getenv("USER") in ["namin"]: # this saves about a second for the lookup self.out_lfn_dir_base = kwargs.get("out_lfn_dir_base", "/store/user/{0}/ProjectMetis/".format(os.getenv("USER"))) else: self.out_lfn_dir_base = kwargs.get("out_lfn_dir_base", "/store/user/{0}/ProjectMetis/".format(getUsernameFromSiteDB())) self.output_primary_dataset = kwargs.get("output_primary_dataset", "ProjectMetisTest") self.input_DBS_instance = kwargs.get("input_DBS_instance", "global") self.storage_site = kwargs.get("storage_site", "T2_US_UCSD") self.whitelist = kwargs.get("whitelist", ["T2_*"]) self.min_completion_fraction = kwargs.get("min_completion_fraction", 1.0) self.check_needed_params() self.crab_config = None self.unique_request_name = None self.task_dir = os.path.join(self.work_area, "crab_{0}".format(self.request_name)) self.status_output = {} self.logger = logging.getLogger(setup_logger()) setConsoleLogLevel(LOGLEVEL_MUTE)
def __init__(self, **kwargs): # Handle whatever kwargs we want here self.input_files = kwargs.get("input_files", []) self.auxiliary_files = kwargs.get("auxiliary_files", []) self.dataset = kwargs.get("dataset", None) self.work_area = kwargs.get("work_area", "./") self.request_name = kwargs.get("request_name", None) self.plugin_name = kwargs.get("plugin_name", "Analysis") self.pset_location = kwargs.get("pset_location", None) self.job_splitting = kwargs.get("job_splitting", "FileBased") self.units_per_job = kwargs.get("units_per_job", 1) hadoop_user = os.environ.get("GRIDUSER",os.environ.get("USER")) # NOTE, might be different for some weird folks # # foolproof way of finding username, but slow. above is probably equivalent (see setup.sh): getUsernameFromSiteDB() self.out_lfn_dir_base = kwargs.get("out_lfn_dir_base", "/store/user/{0}/ProjectMetis/".format(hadoop_user)) self.output_primary_dataset = kwargs.get("output_primary_dataset", "ProjectMetisTest") self.input_DBS_instance = kwargs.get("input_DBS_instance", "global") self.storage_site = kwargs.get("storage_site", "T2_US_UCSD") self.whitelist = kwargs.get("whitelist", ["T2_*"]) self.min_completion_fraction = kwargs.get("min_completion_fraction", 1.0) self.check_needed_params() self.crab_config = None self.unique_request_name = None self.task_dir = os.path.join(self.work_area, "{0}".format(self.request_name)) self.status_output = {} self.logger = logging.getLogger(setup_logger()) setConsoleLogLevel(LOGLEVEL_MUTE)
def main(): ''' Do all steps here ''' # Options bDebug = False # If you want crabCommand to be quiet: if not bDebug: setConsoleLogLevel(LOGLEVEL_MUTE) # Retrieve the current crabCommand console log level: crabConsoleLogLevel = getConsoleLogLevel() if bDebug: print "=== multicrabGet.py:\n\t The current \"crabCommand\" console log level is set to \"%s\"" % (crabConsoleLogLevel) # Ensure script is called with at least one argument (apart from script name) if len(sys.argv) == 1: scriptName = sys.argv[0] usage() # Get the multiCRAB dir(s) name (passed as argument) dirs = sys.argv[1:] # Initialise Variables reports = [] datasetdirs = GetMulticrabAbsolutePaths(dirs) datasets = GetDatasetAbsolutePaths(datasetdirs) if bDebug: print "=== multicrabGet.py:\n\t Found \"%s\" CRAB task directories:" % ( len(datasets) ) for d in datasets: print "\t\t \"%s\"" % ( os.path.basename(d) ) # For-loop: All dataset directories (absolute paths) for index, d in enumerate(datasets): #print "=== multicrabGet.py:\n\t %s (%s/%s)" % ( os.path.basename(d), index+1, len(datasets) ) lastTwoDirs = d.split("/")[-2]+ "/" + d.split("/")[-1] print "=== multicrabGet.py:\n\t %s (%s/%s)" % ( lastTwoDirs, index+1, len(datasets) ) # Check if task is in "DONE" state if GetTaskStatusBool(d, True): continue # Get task dashboard URL taskDashboard = GetTaskDashboardURL(d) # Get task status taskStatus = GetTaskStatus(d) # Get the reports reports += GetTaskReports(d, taskStatus, taskDashboard) # For-loop: All CRAB reports if bDebug: for r in reports: r.Print() return
def GetTaskReports(datasetPath, status, dashboardURL, verbose=False): ''' ''' # Variable Declaration reports = [] # Get all files under <dataset_dir>/results/ files = execute("ls %s" % os.path.join( datasetPath, "results") ) try: if verbose: print "\t Executing \"crab status\" command" # Execute "crab status --dir=d" result = crabCommand('status', dir = datasetPath) # Assess JOB success/failure for task finished, failed, retrievedLog, retrievedOut = retrievedFiles(datasetPath, result, False) # Proceed according to the job status if retrievedLog < finished: touch(datasetPath) dummy = crabCommand('getlog', dir = datasetPath) #xenios if retrievedOut < finished: dummy = crabCommand('getoutput', dir = datasetPath) #xenios touch(datasetPath) if failed > 0: print "\t Found \"Failed\" jobs for task \"%s\". Executing command \"crab resubmit --dir=\"%s\"" % ( os.path.basename(datasetPath), datasetPath ) dummy = crabCommand('resubmit', dir = datasetPath) # Assess JOB success/failure for task (again) finished, failed, retrievedLog, retrievedOut = retrievedFiles(datasetPath, result, True) retrieved = min(finished, retrievedLog, retrievedOut) alljobs = len(result['jobList']) # Append the report reports.append( Report(datasetPath, alljobs, retrieved, status, dashboardURL) ) # Determine if task is DONE or not if retrieved == alljobs and retrieved > 0: absolutePath = os.path.join(datasetPath, "crab.log") os.system("sed -i -e '$a\DONE! (Written by multicrabGet.py)' %s" % absolutePath ) # Catch exceptions (Errors detected during execution which may not be "fatal") except: #if 0: msg = sys.exc_info()[1] reports.append( Report(datasetPath, "?", "?", "?", dashboardURL) ) print "\t The \"crab status\" command failed with exception \"%s\"" % ( msg ) if verbose: print "\t Re-executing \"crab status\" command, this time with full verbosity" setConsoleLogLevel(1) res = crabCommand('status', dir = datasetPath) return reports
def querystatus(submissiondir, toquerylist): setConsoleLogLevel(LOGLEVEL_MUTE) res = {} print("Making queries for {} jobs..".format(len(toquerylist))) for d in toquerylist: if not d.startswith(submissiondir): d = join(submissiondir, d) statusdict = crabCommand("status", dir=d) _status = statusdict.get("status", "") _jobsPerStatus = statusdict.get("jobsPerStatus", {}) res[d] = {'status': _status, 'jobsPerStatus': _jobsPerStatus} return res
def __init__(self, debug=0, logger=None, workingArea=None, voGroup=None, username=None): setConsoleLogLevel(LOGLEVEL_MUTE) self.debug = debug if workingArea is not None: self.workingArea = workingArea else: self.workingArea = os.getcwd() self.dry_run = False if voGroup is not None: self.voGroup = voGroup else: self.voGroup = "dcms" if username is not None: self.username = username else: self.username = None if logger is not None: self.logger = logger.getChild("CrabController") else: # add instance logger as logger to root self.logger = logging.getLogger("CrabController") # check if handlers are present for root logger # we assume that the default logging is not configured # if handler is present if len(logging.getLogger().handlers) < 1: ch = logging.FileHandler('crabController.log', mode='a', encoding=None, delay=False) ch.setLevel(logging.DEBUG) # create formatter formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') # add formatter to ch ch.setFormatter(formatter) self.logger.addHandler(ch) self.crab_q = Queue()
def __init__(self, debug=0, logger = None , workingArea = None, voGroup = None, username = None): setConsoleLogLevel(LOGLEVEL_MUTE) self.debug = debug if workingArea is not None: self.workingArea = workingArea else: self.workingArea = os.getcwd() self.dry_run = False if voGroup is not None: self.voGroup = voGroup else: self.voGroup = "dcms" if username is not None: self.username = username else: self.username = None if logger is not None: self.logger = logger.getChild("CrabController") else: # add instance logger as logger to root self.logger = logging.getLogger("CrabController") # check if handlers are present for root logger # we assume that the default logging is not configured # if handler is present if len(logging.getLogger().handlers) < 1 : ch = logging.FileHandler('crabController.log', mode='a', encoding=None, delay=False) ch.setLevel(logging.DEBUG) # create formatter formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) # add formatter to ch ch.setFormatter(formatter) self.logger.addHandler(ch) self.crab_q = Queue()
def main(): ''' Do all steps here ''' # Options bDebug = False # If you want crabCommand to be quiet: if not bDebug: setConsoleLogLevel(LOGLEVEL_MUTE) # Retrieve the current crabCommand console log level: crabConsoleLogLevel = getConsoleLogLevel() if bDebug: print "=== multicrabGet.py:\n\t The current \"crabCommand\" console log level is set to \"%s\"" % ( crabConsoleLogLevel) # Ensure script is called with at least one argument (apart from script name) if len(sys.argv) == 1: scriptName = sys.argv[0] usage() # Get the multiCRAB dir(s) name (passed as argument) dirs = sys.argv[1:] # Initialise Variables reports = [] datasetdirs = GetMulticrabAbsolutePaths(dirs) datasets = GetDatasetAbsolutePaths(datasetdirs) if bDebug: print "=== multicrabGet.py:\n\t Found \"%s\" CRAB task directories:" % ( len(datasets)) for d in datasets: print "\t\t \"%s\"" % (os.path.basename(d)) # For-loop: All dataset directories (absolute paths) for index, d in enumerate(datasets): #print "=== multicrabGet.py:\n\t %s (%s/%s)" % ( os.path.basename(d), index+1, len(datasets) ) lastTwoDirs = d.split("/")[-2] + "/" + d.split("/")[-1] print "=== multicrabGet.py:\n\t %s (%s/%s)" % (lastTwoDirs, index + 1, len(datasets)) # Check if task is in "DONE" state if GetTaskStatusBool(d, True): continue # Get task dashboard URL taskDashboard = GetTaskDashboardURL(d) # Get task status taskStatus = GetTaskStatus(d) # Get the reports reports += GetTaskReports(d, taskStatus, taskDashboard) # For-loop: All CRAB reports if bDebug: for r in reports: r.Print() return
def GetTaskReports(datasetPath, status, dashboardURL, verbose=False): ''' ''' # Variable Declaration reports = [] # Get all files under <dataset_dir>/results/ files = execute("ls %s" % os.path.join(datasetPath, "results")) try: if verbose: print "\t Executing \"crab status\" command" # Execute "crab status --dir=d" result = crabCommand('status', dir=datasetPath) # Assess JOB success/failure for task finished, failed, retrievedLog, retrievedOut = retrievedFiles( datasetPath, result, False) # Proceed according to the job status if retrievedLog < finished: touch(datasetPath) dummy = crabCommand('getlog', dir=datasetPath) #xenios if retrievedOut < finished: dummy = crabCommand('getoutput', dir=datasetPath) #xenios touch(datasetPath) if failed > 0: print "\t Found \"Failed\" jobs for task \"%s\". Executing command \"crab resubmit --dir=\"%s\"" % ( os.path.basename(datasetPath), datasetPath) dummy = crabCommand('resubmit', dir=datasetPath) # Assess JOB success/failure for task (again) finished, failed, retrievedLog, retrievedOut = retrievedFiles( datasetPath, result, True) retrieved = min(finished, retrievedLog, retrievedOut) alljobs = len(result['jobList']) # Append the report reports.append( Report(datasetPath, alljobs, retrieved, status, dashboardURL)) # Determine if task is DONE or not if retrieved == alljobs and retrieved > 0: absolutePath = os.path.join(datasetPath, "crab.log") os.system("sed -i -e '$a\DONE! (Written by multicrabGet.py)' %s" % absolutePath) # Catch exceptions (Errors detected during execution which may not be "fatal") except: #if 0: msg = sys.exc_info()[1] reports.append(Report(datasetPath, "?", "?", "?", dashboardURL)) print "\t The \"crab status\" command failed with exception \"%s\"" % ( msg) if verbose: print "\t Re-executing \"crab status\" command, this time with full verbosity" setConsoleLogLevel(1) res = crabCommand('status', dir=datasetPath) return reports
## Run with python mergeCRABOutput.sh, instead of ./mergeCRABOutput.py ## This guarantees the right python from the CMSSW release is called from CRABAPI.RawCommand import crabCommand from CRABClient.UserUtilities import setConsoleLogLevel from CRABClient.ClientUtilities import LOGLEVEL_MUTE setConsoleLogLevel(LOGLEVEL_MUTE) import sys # import os # cwd = os.getcwd() # print "Current directory: ", cwd if len(sys.argv) > 1: res = crabCommand('status', dir=sys.argv[1]) # else: # res = crabCommand('status') #if res['status'] == 'COMPLETED': userWebDirURL = res['userWebDirURL'] datestamp = userWebDirURL.split(':')[-2].split('/')[-1] print datestamp #exit(0) # else: # exit(1)
def __init__(self, dataset=None, gtag=None, kfact=None, efact=None, xsec=None, sparms=[], debug=False, specialdir_test=False, do_skip_tail=True,logger_callback=None): setConsoleLogLevel(LOGLEVEL_MUTE) # debug bools if debug: self.fake_submission = False self.fake_status = True self.fake_crab_done = True self.fake_legit_sweeproot = True self.fake_miniaod_map = True self.fake_merge_lists = True self.fake_check = True self.fake_copy = True else: self.fake_submission = False self.fake_status = False self.fake_crab_done = False self.fake_legit_sweeproot = False self.fake_miniaod_map = False self.fake_merge_lists = False self.fake_check = False self.fake_copy = False self.specialdir_test = specialdir_test self.do_skip_tail = do_skip_tail # dirs are wrt the base directory where this script is located self.misc = {} self.misc["pfx_pset"] = 'pset' # where to hold the psets self.misc["pfx_crab"] = 'crab' # where to keep all crab tasks self.misc["crab_config"] = None self.misc["handled_more_than_1k"] = False self.misc["rootfiles"] = [] self.misc["logfiles"] = [] self.misc["last_saved"] = None # when was the last time we backed up this sample data self.misc["can_skip_tail"] = False # self.misc["handled_prechecks"] = False # self.misc["passed_prechecks"] = True self.sample = { "basedir" : "", "dataset" : dataset, "shortname": dataset.split("/")[1]+"_"+dataset.split("/")[2], "user" : u.get_hadoop_name(), "cms3tag" : params.cms3tag, "cmsswver" : params.cmssw_ver, "gtag" : gtag, "kfact" : kfact, "efact" : efact, "xsec" : xsec, "sparms": sparms, # always keep as list. e.g., ["mlsp","mstop"] "isdata": False, # by default, MC "pset": "", # *_cfg.py pset location "specialdir": "", # /hadoop/cms/store/group/snt/{specialdir}/ (e.g., run2_25ns, run2_fastsim) "finaldir": "", # where final files will live "status" : "new", # general sample status "crab": { }, # crab task information here "postprocessing": { }, # postprocessing counts for monitor "checks": { }, # checkCMS3 info for monitor "ijob_to_miniaod": { }, # map from ijob to list of miniaod "imerged_to_ijob": { }, # map from imerged to iunmerged "ijob_to_nevents": { }, # map from ijob to (nevents, nevents_eff) "nevents_DAS": 0, "nevents_unmerged": 0, "nevents_merged": 0, } self.sample["crab"]["requestname"] = self.sample["shortname"][:99] # damn crab has size limit for name self.sample["crab"]["outputdir"] = None self.sample["crab"]["taskdir"] = self.misc["pfx_crab"]+"/crab_"+self.sample["crab"]["requestname"] self.sample["crab"]["datetime"] = None # "160220_151313" from crab request name self.sample["crab"]["resubmissions"] = 0 # number of times we've "successfully" resubmitted a crab job self.sample["crab"]["jobs_left"] = [] # keep track of job ids that are not done self.sample["crab"]["jobs_left_tail"] = [] # keep track of job ids that are taking forever (in the tail) self.logger_callback = None self.crab_status_res = { } self.set_sample_specifics() self.load() # load backup of this sample when we instantiate it
def main(): sql_table_creation = """CREATE TABLE IF NOT EXISTS crabJobStatuses ( directory TEXT PRIMARY KEY, status TEXT, dataset TEXT );""" conn = sqlite3.connect(JOB_STATUS_DB) crabTaskListByPass = [] with conn: c = conn.cursor() c.execute(sql_table_creation) for row in c.execute( "SELECT * FROM crabJobStatuses WHERE status='completed'"): crabTaskListByPass.append(row[0]) for row in c.execute( "SELECT * FROM crabJobStatuses WHERE status='failed-emptycache'" ): crabTaskListByPass.append(row[0]) crabTaskList = [ os.path.join(CRAB_WORK_DIR, d) for d in os.listdir(CRAB_WORK_DIR) if os.path.isdir("%s/%s" % (CRAB_WORK_DIR, d)) and (datetime.now() - datetime.strptime( d.rsplit("_", 1)[-1], "%y%m%d-%H%M%S")).days < MOST_RECENT_DAYS ] crabTaskListToCheck = [ t for t in crabTaskList if t not in crabTaskListByPass ] print( "Checking tasks submmited for most recent {} day(s), total tasks to check: {}. Checking..." .format(MOST_RECENT_DAYS, len(crabTaskListToCheck))) # crabLoggers = getLoggers() setConsoleLogLevel(LOGLEVEL_MUTE) crabTaskStatuses = [] if ASYNC_CHECK: p = ThreadPool() r = p.map_async(checkSingleTask, crabTaskListToCheck, callback=crabTaskStatuses.extend) r.wait() p.close() else: crabTaskStatuses = [checkSingleTask(d) for d in crabTaskListToCheck] task_completed = [] task_failed = [] task_submitfailed = [] task_tapecall = [] task_others = [] task_exception = [] for d in crabTaskStatuses: if d.get("exception", False): task_exception.append(d) else: _status = d.get("status", None) if _status == "completed": task_completed.append(d) elif _status == "failed" or d['jobsperstatus'].get('failed', None): task_failed.append(d) elif _status == "submitfailed": task_submitfailed.append(d) elif _status == "tapecall": task_tapecall.append(d) else: task_others.append(d) # updating local db conn = sqlite3.connect(JOB_STATUS_DB) with conn: c = conn.cursor() set_complete = [(t["directory"], "completed", t["outdatasets"]) for t in task_completed] c.executemany("INSERT OR REPLACE INTO crabJobStatuses VALUES (?,?,?)", set_complete) set_noncomplete = [(t["directory"], "failed", "") for t in task_failed + task_tapecall + task_others] exceptedTasks = [(t["directory"], "failed-emptycache", "") for t in task_exception if ".requestcache" in t["msg"]] if exceptedTasks: print("Number of tasks excepted when querying: ", len(exceptedTasks)) print(*[t[0] for t in exceptedTasks], sep="\n") set_noncomplete.extend(exceptedTasks) c.executemany("INSERT OR REPLACE INTO crabJobStatuses VALUES (?,?,?)", set_noncomplete) print("Trying to resubmit {} task(s) ...".format(len(task_failed))) crabResubmitResult = [] p = ThreadPool() r = p.map_async( resubmitSingleTask, task_failed, callback=crabResubmitResult.extend, ) r.wait() p.close() resubmittedTasks = [t for t in crabResubmitResult if t] resubTaskSuccess, resubTaskFail = [], [] for t in resubmittedTasks: if t.get("success", False): resubTaskSuccess.append(t) else: resubTaskFail.append(t) print("Successfully resubmit {} task(s), !Yay!".format( len(resubTaskSuccess))) print("Writing Check&Resub result to:\n\t", LOGSHEET) with open(LOGSHEET, "w") as of: of.write("Generated at " + time.asctime() + "\n") of.write("=" * 79 + "\n\n") if task_completed: of.write("Completed tasks: [{}]\n".format(len(task_completed))) of.write("===========================\n") for t in task_completed: toprint = "directory: {0}\ntask: {1}\ndataset: {2}\n\n".format( t["directory"], t["task"], t["outdatasets"]) of.write(toprint) of.write("-" * 79 + "\n\n") if task_others: of.write("Other tasks: [{}]\n".format(len(task_others))) of.write("===========================\n") for t in task_others: toprint = "directory: {0}\ntask: {1}\nstatus: {2}\njobsPerStatus: {3}\npublication: {4}\n\n".format( t["directory"], t["task"], t["status"], str(t["jobsperstatus"]), str(t["publication"]), ) of.write(toprint) of.write("-" * 79 + "\n\n") if task_failed: of.write("Failed tasks: [{}]\n".format(len(task_failed))) of.write("===========================\n") for t in task_failed: toprint = "directory: {0}\ntask: {1}\njobsPerStatus: {2}\npublication: {3}\n\n".format( t["directory"], t["task"], str(t["jobsperstatus"]), str(t["publication"]), ) of.write(toprint) of.write("-" * 79 + "\n\n") if task_submitfailed: of.write("Submitfailed tasks: [{}]\n".format( len(task_submitfailed))) of.write("===========================\n") print("Following tasks are failed to submit:\n") for t in task_submitfailed: towrite = "directory: {}\n".format(t["directory"]) of.write(towrite) print("crab resubmit -d {}".format(t["directory"])) of.write("-" * 79 + "\n\n") if task_tapecall: of.write( ("tasks in tapecall state: [{}]\n".format(len(task_tapecall)))) of.write("===============================\n") print("Follwoing tasks are in tapecall:\n") for t in task_tapecall: towrite = "directory: {}\n".format(t["directory"]) of.write((towrite)) of.write("-" * 79 + "\n\n") if task_exception: of.write("Exception tasks [{}]:\n".format(len(task_exception))) of.write("===========================\n") print("Resubmit manually for the following:") for t in task_exception: toprint = "directory: {0}\nmessage: {1}\nqueryResult: {2}\n\n".format( t["directory"], t["msg"], t["queryResult"]) of.write(toprint) print("crab resubmit -d {}".format(t["directory"])) of.write("+" * 79 + "\n\n") if resubTaskSuccess: of.write("Successfully resubmitted tasks: [{}]\n".format( len(resubTaskSuccess))) of.write("====================================\n") for d in resubTaskSuccess: of.write(d["directory"] + "\n") of.write("-" * 79 + "\n\n") if resubTaskFail: of.write("Failed resubmitted tasks: [{}]\n".format( len(resubTaskFail))) of.write("==============================\n") print("Following tasks are failed to resubmit:\n") for d in resubTaskFail: of.write(d["directory"] + "\n") of.write(d["exceptionMsg"] + "\n\n") print("crab resubmit -d {}".format(d["directory"])) of.write("-" * 79 + "\n\n")
import tarfile import xml.etree.ElementTree as ET import imp import optparse import subprocess import logging import datetime import uuid from httplib import HTTPException from multiprocessing import Process, Queue from CRABAPI.RawCommand import crabCommand from CRABClient.UserUtilities import getConsoleLogLevel, setConsoleLogLevel from CRABClient.ClientUtilities import LOGLEVEL_MUTE from CRABClient.ClientExceptions import CachefileNotFoundException setConsoleLogLevel(LOGLEVEL_MUTE) import gridFunctions import dbutilscms import aix3adb from aix3adb import Aix3adbException ## The CrabController class # # This class can be used to manage Analyses using crab3 class CrabController: ## The constructor.