Ejemplo n.º 1
0
def main():
    Script.registerSwitch("f:", "File=",
                          "Get status for jobs with IDs from the file")
    Script.registerSwitch("g:", "JobGroup=",
                          "Get status for jobs in the given group")

    Script.parseCommandLine(ignoreErrors=True)
    args = Script.getPositionalArgs()

    from DIRAC import exit as DIRACExit
    from DIRAC.Core.Utilities.Time import toString, date, day
    from DIRAC.Interfaces.API.Dirac import Dirac, parseArguments

    dirac = Dirac()
    exitCode = 0

    jobs = []
    for key, value in Script.getUnprocessedSwitches():
        if key.lower() in ('f', 'file'):
            if os.path.exists(value):
                jFile = open(value)
                jobs += jFile.read().split()
                jFile.close()
        elif key.lower() in ('g', 'jobgroup'):
            jobDate = toString(date() - 30 * day)
            # Choose jobs no more than 30 days old
            result = dirac.selectJobs(jobGroup=value, date=jobDate)
            if not result['OK']:
                print("Error:", result['Message'])
                DIRACExit(-1)
            jobs += result['Value']

    if len(args) < 1 and not jobs:
        Script.showHelp(exitCode=1)

    if len(args) > 0:
        jobs += parseArguments(args)

    result = dirac.getJobStatus(jobs)
    if result['OK']:
        for job in result['Value']:
            print('JobID=' + str(job), end=' ')
            for status in result['Value'][job].items():
                print('%s=%s;' % status, end=' ')
            print()
    else:
        exitCode = 2
        print("ERROR: %s" % result['Message'])

    DIRACExit(exitCode)
Ejemplo n.º 2
0
def main():
    Script.registerSwitch("f:", "File=",
                          "Get status for jobs with IDs from the file")
    Script.registerSwitch("g:", "JobGroup=",
                          "Get status for jobs in the given group")
    # Registering arguments will automatically add their description to the help menu
    Script.registerArgument(["JobID:    DIRAC Job ID"], mandatory=False)
    sws, args = Script.parseCommandLine(ignoreErrors=True)

    from DIRAC import exit as DIRACExit
    from DIRAC.Core.Utilities.Time import toString, date, day
    from DIRAC.Interfaces.API.Dirac import Dirac, parseArguments

    dirac = Dirac()
    exitCode = 0

    jobs = []
    for key, value in sws:
        if key.lower() in ("f", "file"):
            if os.path.exists(value):
                jFile = open(value)
                jobs += jFile.read().split()
                jFile.close()
        elif key.lower() in ("g", "jobgroup"):
            jobDate = toString(date() - 30 * day)
            # Choose jobs no more than 30 days old
            result = dirac.selectJobs(jobGroup=value, date=jobDate)
            if not result["OK"]:
                print("Error:", result["Message"])
                DIRACExit(-1)
            jobs += result["Value"]

    if len(args) < 1 and not jobs:
        Script.showHelp(exitCode=1)

    if len(args) > 0:
        jobs += parseArguments(args)

    result = dirac.getJobStatus(jobs)
    if result["OK"]:
        for job in result["Value"]:
            print("JobID=" + str(job), end=" ")
            for status in result["Value"][job].items():
                print("%s=%s;" % status, end=" ")
            print()
    else:
        exitCode = 2
        print("ERROR: %s" % result["Message"])

    DIRACExit(exitCode)
Ejemplo n.º 3
0
def main():
    Script.registerSwitch("f:", "File=",
                          "Get output for jobs with IDs from the file")
    Script.registerSwitch("g:", "JobGroup=",
                          "Get output for jobs in the given group")
    # Registering arguments will automatically add their description to the help menu
    Script.registerArgument(["JobID:    DIRAC Job ID"], mandatory=False)
    sws, args = Script.parseCommandLine(ignoreErrors=True)

    import DIRAC
    from DIRAC.Interfaces.API.Dirac import Dirac, parseArguments
    from DIRAC.Core.Utilities.TimeUtilities import toString, day

    dirac = Dirac()

    jobs = []
    for sw, value in sws:
        if sw.lower() in ("f", "file"):
            if os.path.exists(value):
                jFile = open(value)
                jobs += jFile.read().split()
                jFile.close()
        elif sw.lower() in ("g", "jobgroup"):
            group = value
            jobDate = toString(datetime.datetime.utcnow().date() - 30 * day)
            result = dirac.selectJobs(jobGroup=value, date=jobDate)
            if not result["OK"]:
                if "No jobs selected" not in result["Message"]:
                    print("Error:", result["Message"])
                    DIRAC.exit(-1)
            else:
                jobs += result["Value"]

    for arg in parseArguments(args):
        jobs.append(arg)

    if not jobs:
        print("Warning: no jobs selected")
        Script.showHelp()
        DIRAC.exit(0)

    result = dirac.deleteJob(jobs)
    if result["OK"]:
        print("Deleted jobs %s" % ",".join([str(j) for j in result["Value"]]))
        exitCode = 0
    else:
        print(result["Message"])
        exitCode = 2

    DIRAC.exit(exitCode)
Ejemplo n.º 4
0
def main():
    Script.registerSwitch("f:", "File=",
                          "Get output for jobs with IDs from the file")
    Script.registerSwitch("g:", "JobGroup=",
                          "Get output for jobs in the given group")

    Script.parseCommandLine(ignoreErrors=True)
    args = Script.getPositionalArgs()

    import DIRAC
    from DIRAC.Interfaces.API.Dirac import Dirac, parseArguments
    from DIRAC.Core.Utilities.Time import toString, date, day
    dirac = Dirac()

    jobs = []
    for sw, value in Script.getUnprocessedSwitches():
        if sw.lower() in ('f', 'file'):
            if os.path.exists(value):
                jFile = open(value)
                jobs += jFile.read().split()
                jFile.close()
        elif sw.lower() in ('g', 'jobgroup'):
            group = value
            jobDate = toString(date() - 30 * day)
            result = dirac.selectJobs(jobGroup=value, date=jobDate)
            if not result['OK']:
                if "No jobs selected" not in result['Message']:
                    print("Error:", result['Message'])
                    DIRAC.exit(-1)
            else:
                jobs += result['Value']

    for arg in parseArguments(args):
        jobs.append(arg)

    if not jobs:
        print("Warning: no jobs selected")
        Script.showHelp()
        DIRAC.exit(0)

    result = dirac.deleteJob(jobs)
    if result['OK']:
        print('Deleted jobs %s' % ','.join([str(j) for j in result['Value']]))
        exitCode = 0
    else:
        print(result['Message'])
        exitCode = 2

    DIRAC.exit(exitCode)
Ejemplo n.º 5
0
def get_job_list(owner, job_group, n_hours):
    ''' get a list of jobs for a selection
    '''
    from DIRAC.Interfaces.API.Dirac import Dirac
    dirac = Dirac()

    now = datetime.datetime.now()
    onehour = datetime.timedelta(hours=1)
    results = dirac.selectJobs(jobGroup=job_group,
                               owner=owner,
                               date=now - n_hours * onehour)
    if 'Value' not in results:
        DIRAC.gLogger.error(
            "No job found for group \"%s\" and owner \"%s\" in the past %s hours"
            % (job_group, owner, n_hours))
        DIRAC.exit(-1)

    # Found some jobs, print information)
    jobs_list = results['Value']
    return jobs_list
Ejemplo n.º 6
0
def main():
  Script.registerSwitch("D:", "Dir=", "Store the output in this directory")
  Script.registerSwitch("f:", "File=", "Get output for jobs with IDs from the file")
  Script.registerSwitch("g:", "JobGroup=", "Get output for jobs in the given group")

  Script.parseCommandLine(ignoreErrors=True)
  args = Script.getPositionalArgs()

  from DIRAC.Interfaces.API.Dirac import Dirac, parseArguments
  from DIRAC.Core.Utilities.Time import toString, date, day
  from DIRAC.Core.Utilities.File import mkDir

  dirac = Dirac()
  exitCode = 0
  errorList = []

  outputDir = None
  group = None
  jobs = []
  for sw, value in Script.getUnprocessedSwitches():
    if sw in ('D', 'Dir'):
      outputDir = value
    elif sw.lower() in ('f', 'file'):
      if os.path.exists(value):
        jFile = open(value)
        jobs += jFile.read().split()
        jFile.close()
    elif sw.lower() in ('g', 'jobgroup'):
      group = value
      jobDate = toString(date() - 30 * day)

      # Choose jobs in final state, no more than 30 days old
      result = dirac.selectJobs(jobGroup=value, date=jobDate, status='Done')
      if not result['OK']:
        if "No jobs selected" not in result['Message']:
          print("Error:", result['Message'])
          DIRAC.exit(-1)
      else:
        jobs += result['Value']
      result = dirac.selectJobs(jobGroup=value, date=jobDate, status='Failed')
      if not result['OK']:
        if "No jobs selected" not in result['Message']:
          print("Error:", result['Message'])
          DIRAC.exit(-1)
      else:
        jobs += result['Value']

  for arg in parseArguments(args):
    if os.path.isdir(arg):
      print("Output for job %s already retrieved, remove the output directory to redownload" % arg)
    else:
      jobs.append(arg)

  if not jobs:
    print("No jobs selected")
    DIRAC.exit(0)

  if group:
    if outputDir:
      outputDir = os.path.join(outputDir, group)
    else:
      outputDir = group

  if outputDir:
    mkDir(outputDir)
  else:
    outputDir = os.getcwd()

  jobs = [str(job) for job in jobs]
  doneJobs = os.listdir(outputDir)
  todoJobs = [job for job in jobs if job not in doneJobs]

  for job in todoJobs:

    result = dirac.getOutputSandbox(job, outputDir=outputDir)

    jobDir = str(job)
    if outputDir:
      jobDir = os.path.join(outputDir, job)
    if result['OK']:
      if os.path.exists(jobDir):
        print('Job output sandbox retrieved in %s/' % (jobDir))
    else:
      if os.path.exists('%s' % jobDir):
        shutil.rmtree(jobDir)
      errorList.append((job, result['Message']))
      exitCode = 2

  for error in errorList:
    print("ERROR %s: %s" % error)

  DIRAC.exit(exitCode)
Ejemplo n.º 7
0
def main():
    site = 'BOINC.World.org'
    status = ["Running"]
    minorStatus = None
    workerNodes = None
    since = None
    date = 'today'
    full = False
    until = None
    batchIDs = None
    Script.registerSwitch('', 'Site=', '   Select site (default: %s)' % site)
    Script.registerSwitch('', 'Status=',
                          '   Select status (default: %s)' % status)
    Script.registerSwitch('', 'MinorStatus=', '   Select minor status')
    Script.registerSwitch('', 'WorkerNode=', '  Select WN')
    Script.registerSwitch('', 'BatchID=', '  Select batch jobID')
    Script.registerSwitch(
        '', 'Since=',
        '   Date since when to select jobs, or number of days (default: today)'
    )
    Script.registerSwitch('', 'Date=',
                          '   Specify the date (check for a full day)')
    Script.registerSwitch(
        '', 'Full',
        '   Printout full list of job (default: False except if --WorkerNode)')

    Script.parseCommandLine()
    from DIRAC import gLogger
    from DIRAC.Interfaces.API.Dirac import Dirac
    from DIRAC.WorkloadManagementSystem.Client.JobMonitoringClient import JobMonitoringClient

    switches = Script.getUnprocessedSwitches()
    for switch in switches:
        if switch[0] == 'Site':
            site = switch[1]
        elif switch[0] == 'MinorStatus':
            minorStatus = switch[1]
        elif switch[0] == 'Status':
            if switch[1].lower() == 'all':
                status = [None]
            else:
                status = switch[1].split(',')
        elif switch[0] == 'WorkerNode':
            workerNodes = switch[1].split(',')
        elif switch[0] == 'BatchID':
            try:
                batchIDs = [int(id) for id in switch[1].split(',')]
            except BaseException:
                gLogger.error('Invalid jobID', switch[1])
                DIRAC.exit(1)
        elif switch[0] == 'Full':
            full = True
        elif switch[0] == 'Date':
            since = switch[1].split()[0]
            until = str(
                datetime.datetime.strptime(since, '%Y-%m-%d') +
                datetime.timedelta(days=1)).split()[0]
        elif switch[0] == 'Since':
            date = switch[1].lower()
            if date == 'today':
                since = None
            elif date == 'yesterday':
                since = 1
            elif date == 'ever':
                since = 2 * 365
            elif date.isdigit():
                since = int(date)
                date += ' days'
            else:
                since = date
            if isinstance(since, int):
                since = str(datetime.datetime.now() -
                            datetime.timedelta(days=since)).split()[0]

    if workerNodes or batchIDs:
        # status = [None]
        full = True

    monitoring = JobMonitoringClient()
    dirac = Dirac()

    # Get jobs according to selection
    jobs = set()
    for stat in status:
        res = dirac.selectJobs(site=site,
                               date=since,
                               status=stat,
                               minorStatus=minorStatus)
        if not res['OK']:
            gLogger.error('Error selecting jobs', res['Message'])
            DIRAC.exit(1)
        allJobs = set(int(job) for job in res['Value'])
        if until:
            res = dirac.selectJobs(site=site, date=until, status=stat)
            if not res['OK']:
                gLogger.error('Error selecting jobs', res['Message'])
                DIRAC.exit(1)
            allJobs -= set(int(job) for job in res['Value'])
        jobs.update(allJobs)
    if not jobs:
        gLogger.always('No jobs found...')
        DIRAC.exit(0)
    # res = monitoring.getJobsSummary( jobs )
    # print eval( res['Value'] )[jobs[0]]

    allJobs = set()
    result = {}
    wnJobs = {}
    gLogger.always('%d jobs found' % len(jobs))
    # Get host name
    for job in jobs:
        res = monitoring.getJobParameter(job, 'HostName')
        node = res.get('Value', {}).get('HostName', 'Unknown')
        res = monitoring.getJobParameter(job, 'LocalJobID')
        batchID = res.get('Value', {}).get('LocalJobID', 'Unknown')
        if workerNodes:
            if not [wn for wn in workerNodes if node.startswith(wn)]:
                continue
            allJobs.add(job)
        if batchIDs:
            if batchID not in batchIDs:
                continue
            allJobs.add(job)
        if full or status == [None]:
            allJobs.add(job)
        result.setdefault(job, {})['Status'] = status
        result[job]['Node'] = node
        result[job]['LocalJobID'] = batchID
        wnJobs[node] = wnJobs.setdefault(node, 0) + 1

    # If necessary get jobs' status
    statusCounters = {}
    if allJobs:
        allJobs = sorted(allJobs, reverse=True)
        res = monitoring.getJobsStatus(allJobs)
        if res['OK']:
            jobStatus = res['Value']
            res = monitoring.getJobsMinorStatus(allJobs)
            if res['OK']:
                jobMinorStatus = res['Value']
                res = monitoring.getJobsApplicationStatus(allJobs)
                if res['OK']:
                    jobApplicationStatus = res['Value']
        if not res['OK']:
            gLogger.error('Error getting job parameter', res['Message'])
        else:
            for job in allJobs:
                stat = jobStatus.get(job, {}).get('Status', 'Unknown') + '; ' + \
                    jobMinorStatus.get(job, {}).get('MinorStatus', 'Unknown') + '; ' + \
                    jobApplicationStatus.get(job, {}).get('ApplicationStatus', 'Unknown')
                result[job]['Status'] = stat
                statusCounters[stat] = statusCounters.setdefault(stat, 0) + 1
    elif not workerNodes and not batchIDs:
        allJobs = sorted(jobs, reverse=True)

    # Print out result
    if workerNodes or batchIDs:
        gLogger.always('Found %d jobs at %s, WN %s (since %s):' %
                       (len(allJobs), site, workerNodes, date))
        if allJobs:
            gLogger.always('List of jobs:',
                           ','.join([str(job) for job in allJobs]))
    else:
        if status == [None]:
            gLogger.always('Found %d jobs at %s (since %s):' %
                           (len(allJobs), site, date))
            for stat in sorted(statusCounters):
                gLogger.always('%d jobs %s' % (statusCounters[stat], stat))
        else:
            gLogger.always('Found %d jobs %s at %s (since %s):' %
                           (len(allJobs), status, site, date))
        gLogger.always(
            'List of WNs:', ','.join([
                '%s (%d)' % (node, wnJobs[node]) for node in sorted(
                    wnJobs, cmp=(lambda n1, n2: (wnJobs[n2] - wnJobs[n1])))
            ]))
    if full:
        if workerNodes or batchIDs:
            nodeJobs = {}
            for job in allJobs:
                status = result[job]['Status']
                node = result[job]['Node'].split('.')[0]
                jobID = result[job].get('LocalJobID')
                nodeJobs.setdefault(node, []).append((jobID, job, status))
            if not workerNodes:
                workerNodes = sorted(nodeJobs)
            for node in workerNodes:
                for job in nodeJobs.get(node.split('.')[0], []):
                    gLogger.always('%s ' % node + '(%s): %s - %s' % job)
        else:
            for job in allJobs:
                status = result[job]['Status']
                node = result[job]['Node']
                jobID = result[job].get('LocalJobID')
                gLogger.always('%s (%s): %s - %s' % (node, jobID, job, status))
Ejemplo n.º 8
0
group = None
jobs = []
for sw, value in Script.getUnprocessedSwitches():
  if sw in ( 'D', 'Dir' ):
    outputDir = value
  elif sw.lower() in ( 'f', 'file' ):
    if os.path.exists( value ):
      jFile = open( value )
      jobs += jFile.read().split()
      jFile.close()
  elif sw.lower() in ( 'g', 'jobgroup' ):
    group = value
    jobDate = toString( date() - 30 * day )

    # Choose jobs in final state, no more than 30 days old
    result = dirac.selectJobs( jobGroup = value, date = jobDate, status = 'Done' )
    if not result['OK']:
      if not "No jobs selected" in result['Message']:
        print "Error:", result['Message']
        DIRAC.exit( -1 )
    else:
      jobs += result['Value']
    result = dirac.selectJobs( jobGroup = value, date = jobDate, status = 'Failed' )
    if not result['OK']:
      if not "No jobs selected" in result['Message']:
        print "Error:", result['Message']
        DIRAC.exit( -1 )
    else:
      jobs += result['Value']

for arg in parseArguments( args ):
Ejemplo n.º 9
0
from DIRAC.Interfaces.API.Dirac  import Dirac, parseArguments
dirac = Dirac()
exitCode = 0

jobs = []
for key, value in Script.getUnprocessedSwitches():
  if key.lower() in ( 'f', 'file' ):
    if os.path.exists( value ):
      jFile = open( value )
      jobs += jFile.read().split()
      jFile.close()
  elif key.lower() in ( 'g', 'jobgroup' ):
    jobDate = toString( date() - 30 * day )
    # Choose jobs no more than 30 days old
    result = dirac.selectJobs( jobGroup = value, date = jobDate )
    if not result['OK']:
      print "Error:", result['Message']
      DIRACExit( -1 )
    jobs += result['Value']

if len( args ) < 1 and not jobs:
  Script.showHelp()

if len( args ) > 0:
  jobs += parseArguments( args )

result = dirac.getJobStatus( jobs )
if result['OK']:
  for job in result['Value']:
    print 'JobID=' + str( job ),
Ejemplo n.º 10
0
def main():
    Script.registerSwitch("D:", "Dir=", "Store the output in this directory")
    Script.registerSwitch("f:", "File=", "Get output for jobs with IDs from the file")
    Script.registerSwitch("g:", "JobGroup=", "Get output for jobs in the given group")
    # Registering arguments will automatically add their description to the help menu
    Script.registerArgument(["JobID: DIRAC Job ID or a name of the file with JobID per line"], mandatory=False)
    sws, args = Script.parseCommandLine(ignoreErrors=True)

    from DIRAC.Interfaces.API.Dirac import Dirac, parseArguments
    from DIRAC.Core.Utilities.Time import toString, date, day
    from DIRAC.Core.Utilities.File import mkDir

    dirac = Dirac()
    exitCode = 0
    errorList = []

    outputDir = None
    group = None
    jobs = []
    for sw, value in sws:
        if sw in ("D", "Dir"):
            outputDir = value
        elif sw.lower() in ("f", "file"):
            if os.path.exists(value):
                jFile = open(value)
                jobs += jFile.read().split()
                jFile.close()
        elif sw.lower() in ("g", "jobgroup"):
            group = value
            jobDate = toString(date() - 30 * day)

            # Choose jobs in final state, no more than 30 days old
            result = dirac.selectJobs(jobGroup=value, date=jobDate, status="Done")
            if not result["OK"]:
                if "No jobs selected" not in result["Message"]:
                    print("Error:", result["Message"])
                    DIRAC.exit(-1)
            else:
                jobs += result["Value"]
            result = dirac.selectJobs(jobGroup=value, date=jobDate, status="Failed")
            if not result["OK"]:
                if "No jobs selected" not in result["Message"]:
                    print("Error:", result["Message"])
                    DIRAC.exit(-1)
            else:
                jobs += result["Value"]

    for arg in parseArguments(args):
        if os.path.isdir(arg):
            print("Output for job %s already retrieved, remove the output directory to redownload" % arg)
        else:
            jobs.append(arg)

    if not jobs:
        print("No jobs selected")
        DIRAC.exit(0)

    if group:
        if outputDir:
            outputDir = os.path.join(outputDir, group)
        else:
            outputDir = group

    if outputDir:
        mkDir(outputDir)
    else:
        outputDir = os.getcwd()

    jobs = [str(job) for job in jobs]
    doneJobs = os.listdir(outputDir)
    todoJobs = [job for job in jobs if job not in doneJobs]

    for job in todoJobs:

        result = dirac.getOutputSandbox(job, outputDir=outputDir)

        jobDir = str(job)
        if outputDir:
            jobDir = os.path.join(outputDir, job)
        if result["OK"]:
            if os.path.exists(jobDir):
                print("Job output sandbox retrieved in %s/" % (jobDir))
        else:
            if os.path.exists("%s" % jobDir):
                shutil.rmtree(jobDir)
            errorList.append((job, result["Message"]))
            exitCode = 2

    for error in errorList:
        print("ERROR %s: %s" % error)

    DIRAC.exit(exitCode)
Ejemplo n.º 11
0
    'Owner': owner,
    'JobGroup': ','.join(str(jg) for jg in jobGroups),
    'Date': selDate
}

from DIRAC.Interfaces.API.Dirac import Dirac

dirac = Dirac()
jobs = []

if jobGroups:
    for jobGroup in jobGroups:
        res = dirac.selectJobs(status=status,
                               minorStatus=minorStatus,
                               applicationStatus=appStatus,
                               site=site,
                               owner=owner,
                               jobGroup=jobGroup,
                               date=date,
                               printErrors=False)
        if res['OK']:
            jobs.extend(res['Value'])
        else:
            gLogger.error("Can't select jobs: ", res['Message'])
else:
    res = dirac.selectJobs(status=status,
                           minorStatus=minorStatus,
                           applicationStatus=appStatus,
                           site=site,
                           owner=owner,
                           date=date,
                           printErrors=False)
Ejemplo n.º 12
0
# ##    ##  ##     ## ##   ### ##   ###  ##  ##   ### ##    ##
# ##     ##  #######  ##    ## ##    ## #### ##    ##  ######

# get jobs from today and yesterday...
days = []
for i in range(2):  # how many days do you want to look back?
    days.append((datetime.date.today() - datetime.timedelta(days=i)).isoformat())

# get list of run_tokens that are currently running / waiting
running_ids = set()
running_names = []
for status in ["Waiting", "Running", "Checking"]:
    for day in days:
        try:
            [running_ids.add(id) for id in dirac.selectJobs(
                status=status, date=day,
                owner=username)['Value']]
        except KeyError:
            pass

n_jobs = len(running_ids)
if n_jobs > 0:
    print("getting names from {} running/waiting jobs... please wait..."
          .format(n_jobs))
    for i, id in enumerate(running_ids):
        if ((100 * i) / n_jobs) % 5 == 0:
            print("\r{} %".format(((20 * i) / n_jobs) * 5)),
        jobname = dirac.attributes(id)["Value"]["JobName"]
        running_names.append(jobname)
    else:
        print("\n... done")
Ejemplo n.º 13
0
# ##    ##  ##     ## ##   ### ##   ###  ##  ##   ### ##    ##
# ##     ##  #######  ##    ## ##    ## #### ##    ##  ######

# get jobs from today and yesterday...
days = []
for i in range(2):  # how many days do you want to look back?
    days.append((datetime.date.today() - datetime.timedelta(days=i)).isoformat())

# get list of run_tokens that are currently running / waiting
running_ids = set()
running_names = []
for status in ["Waiting", "Running", "Checking"]:
    for day in days:
        try:
            [running_ids.add(id) for id in dirac.selectJobs(
                status=status, date=day,
                owner="tmichael")['Value']]
        except KeyError:
            pass

n_jobs = len(running_ids)
if n_jobs > 0:
    print("getting names from {} running/waiting jobs... please wait..."
          .format(n_jobs))
    for i, id in enumerate(running_ids):
        if ((100 * i) / n_jobs) % 5 == 0:
            print("\r{} %".format(((20 * i) / n_jobs) * 5)),
        jobname = dirac.attributes(id)["Value"]["JobName"]
        running_names.append(jobname)
    else:
        print("\n... done")
Ejemplo n.º 14
0
def main():
    Script.registerSwitch("", "Status=", "Primary status")
    Script.registerSwitch("", "MinorStatus=", "Secondary status")
    Script.registerSwitch("", "ApplicationStatus=", "Application status")
    Script.registerSwitch("", "Site=", "Execution site")
    Script.registerSwitch("", "Owner=", "Owner (DIRAC nickname)")
    Script.registerSwitch("", "JobGroup=",
                          "Select jobs for specified job group")
    Script.registerSwitch(
        "", "Date=",
        "Date in YYYY-MM-DD format, if not specified default is today")
    Script.registerSwitch("", "File=",
                          "File name,if not specified default is std.out ")
    Script.parseCommandLine(ignoreErrors=True)
    args = Script.getPositionalArgs()

    # Default values
    status = None
    minorStatus = None
    appStatus = None
    site = None
    owner = None
    jobGroup = None
    date = None
    filename = 'std.out'

    if len(args) != 1:
        Script.showHelp()

    searchstring = str(args[0])

    for switch in Script.getUnprocessedSwitches():
        if switch[0].lower() == "status":
            status = switch[1]
        elif switch[0].lower() == "minorstatus":
            minorStatus = switch[1]
        elif switch[0].lower() == "applicationstatus":
            appStatus = switch[1]
        elif switch[0].lower() == "site":
            site = switch[1]
        elif switch[0].lower() == "owner":
            owner = switch[1]
        elif switch[0].lower() == "jobgroup":
            jobGroup = switch[1]
        elif switch[0].lower() == "date":
            date = switch[1]
        elif switch[0].lower() == "file":
            filename = switch[1]

    selDate = date
    if not date:
        selDate = 'Today'

    from DIRAC.Interfaces.API.Dirac import Dirac

    dirac = Dirac()
    exitCode = 0
    errorList = []
    resultDict = {}

    result = dirac.selectJobs(status=status,
                              minorStatus=minorStatus,
                              applicationStatus=appStatus,
                              site=site,
                              owner=owner,
                              jobGroup=jobGroup,
                              date=date)
    if result['OK']:
        jobs = result['Value']
    else:
        print("Error in selectJob", result['Message'])
        DIRAC.exit(2)

    for job in jobs:

        result = dirac.getOutputSandbox(job)
        if result['OK']:
            if os.path.exists('%s' % job):

                lines = []
                try:
                    lines = open(os.path.join(job, filename)).readlines()
                except Exception as x:
                    errorList.append((job, x))
                for line in lines:
                    if line.count(searchstring):
                        resultDict[job] = line
                rmtree("%s" % (job))
        else:
            errorList.append((job, result['Message']))
            exitCode = 2

    for result in resultDict.items():
        print(result)

    DIRAC.exit(exitCode)
Ejemplo n.º 15
0
def main():
    site = "BOINC.World.org"
    status = ["Running"]
    minorStatus = None
    workerNodes = None
    since = None
    date = "today"
    full = False
    until = None
    batchIDs = None
    Script.registerSwitch("", "Site=", "   Select site (default: %s)" % site)
    Script.registerSwitch("", "Status=",
                          "   Select status (default: %s)" % status)
    Script.registerSwitch("", "MinorStatus=", "   Select minor status")
    Script.registerSwitch("", "WorkerNode=", "  Select WN")
    Script.registerSwitch("", "BatchID=", "  Select batch jobID")
    Script.registerSwitch(
        "", "Since=",
        "   Date since when to select jobs, or number of days (default: today)"
    )
    Script.registerSwitch("", "Date=",
                          "   Specify the date (check for a full day)")
    Script.registerSwitch(
        "", "Full",
        "   Printout full list of job (default: False except if --WorkerNode)")

    Script.parseCommandLine()
    from DIRAC import gLogger
    from DIRAC.Interfaces.API.Dirac import Dirac
    from DIRAC.WorkloadManagementSystem.Client.JobMonitoringClient import JobMonitoringClient

    switches = Script.getUnprocessedSwitches()
    for switch in switches:
        if switch[0] == "Site":
            site = switch[1]
        elif switch[0] == "MinorStatus":
            minorStatus = switch[1]
        elif switch[0] == "Status":
            if switch[1].lower() == "all":
                status = [None]
            else:
                status = switch[1].split(",")
        elif switch[0] == "WorkerNode":
            workerNodes = switch[1].split(",")
        elif switch[0] == "BatchID":
            try:
                batchIDs = [int(id) for id in switch[1].split(",")]
            except Exception:
                gLogger.error("Invalid jobID", switch[1])
                DIRAC.exit(1)
        elif switch[0] == "Full":
            full = True
        elif switch[0] == "Date":
            since = switch[1].split()[0]
            until = str(
                datetime.datetime.strptime(since, "%Y-%m-%d") +
                datetime.timedelta(days=1)).split()[0]
        elif switch[0] == "Since":
            date = switch[1].lower()
            if date == "today":
                since = None
            elif date == "yesterday":
                since = 1
            elif date == "ever":
                since = 2 * 365
            elif date.isdigit():
                since = int(date)
                date += " days"
            else:
                since = date
            if isinstance(since, int):
                since = str(datetime.datetime.now() -
                            datetime.timedelta(days=since)).split()[0]

    if workerNodes or batchIDs:
        # status = [None]
        full = True

    monitoring = JobMonitoringClient()
    dirac = Dirac()

    # Get jobs according to selection
    jobs = set()
    for stat in status:
        res = dirac.selectJobs(site=site,
                               date=since,
                               status=stat,
                               minorStatus=minorStatus)
        if not res["OK"]:
            gLogger.error("Error selecting jobs", res["Message"])
            DIRAC.exit(1)
        allJobs = set(int(job) for job in res["Value"])
        if until:
            res = dirac.selectJobs(site=site, date=until, status=stat)
            if not res["OK"]:
                gLogger.error("Error selecting jobs", res["Message"])
                DIRAC.exit(1)
            allJobs -= set(int(job) for job in res["Value"])
        jobs.update(allJobs)
    if not jobs:
        gLogger.always("No jobs found...")
        DIRAC.exit(0)
    # res = monitoring.getJobsSummary( jobs )
    # print eval( res['Value'] )[jobs[0]]

    allJobs = set()
    result = {}
    wnJobs = {}
    gLogger.always("%d jobs found" % len(jobs))
    # Get host name
    for job in jobs:
        res = monitoring.getJobParameter(job, "HostName")
        node = res.get("Value", {}).get("HostName", "Unknown")
        res = monitoring.getJobParameter(job, "LocalJobID")
        batchID = res.get("Value", {}).get("LocalJobID", "Unknown")
        if workerNodes:
            if not [wn for wn in workerNodes if node.startswith(wn)]:
                continue
            allJobs.add(job)
        if batchIDs:
            if batchID not in batchIDs:
                continue
            allJobs.add(job)
        if full or status == [None]:
            allJobs.add(job)
        result.setdefault(job, {})["Status"] = status
        result[job]["Node"] = node
        result[job]["LocalJobID"] = batchID
        wnJobs[node] = wnJobs.setdefault(node, 0) + 1

    # If necessary get jobs' status
    statusCounters = {}
    if allJobs:
        allJobs = sorted(allJobs, reverse=True)
        res = monitoring.getJobsStates(allJobs)
        if not res["OK"]:
            gLogger.error("Error getting job parameter", res["Message"])
        else:
            jobStates = res["Value"]
            for job in allJobs:
                stat = (
                    jobStates.get(job, {}).get("Status", "Unknown") + "; " +
                    jobStates.get(job, {}).get("MinorStatus", "Unknown") +
                    "; " +
                    jobStates.get(job, {}).get("ApplicationStatus", "Unknown"))
                result[job]["Status"] = stat
                statusCounters[stat] = statusCounters.setdefault(stat, 0) + 1
    elif not workerNodes and not batchIDs:
        allJobs = sorted(jobs, reverse=True)

    # Print out result
    if workerNodes or batchIDs:
        gLogger.always("Found %d jobs at %s, WN %s (since %s):" %
                       (len(allJobs), site, workerNodes, date))
        if allJobs:
            gLogger.always("List of jobs:",
                           ",".join([str(job) for job in allJobs]))
    else:
        if status == [None]:
            gLogger.always("Found %d jobs at %s (since %s):" %
                           (len(allJobs), site, date))
            for stat in sorted(statusCounters):
                gLogger.always("%d jobs %s" % (statusCounters[stat], stat))
        else:
            gLogger.always("Found %d jobs %s at %s (since %s):" %
                           (len(allJobs), status, site, date))
        gLogger.always(
            "List of WNs:",
            ",".join([
                "%s (%d)" % (node, wnJobs[node]) for node in sorted(
                    wnJobs,
                    key=cmp_to_key(lambda n1, n2: (wnJobs[n2] - wnJobs[n1])))
            ]),
        )
    if full:
        if workerNodes or batchIDs:
            nodeJobs = {}
            for job in allJobs:
                status = result[job]["Status"]
                node = result[job]["Node"].split(".")[0]
                jobID = result[job].get("LocalJobID")
                nodeJobs.setdefault(node, []).append((jobID, job, status))
            if not workerNodes:
                workerNodes = sorted(nodeJobs)
            for node in workerNodes:
                for job in nodeJobs.get(node.split(".")[0], []):
                    gLogger.always("%s " % node + "(%s): %s - %s" % job)
        else:
            for job in allJobs:
                status = result[job]["Status"]
                node = result[job]["Node"]
                jobID = result[job].get("LocalJobID")
                gLogger.always("%s (%s): %s - %s" % (node, jobID, job, status))
def main():
    """
    Launch job on the GRID
    """
    # this thing pilots everything related to the GRID
    dirac = Dirac()

    if switches["output_type"] in "TRAINING":
        print("Preparing submission for TRAINING data")
    elif switches["output_type"] in "DL2":
        print("Preparing submission for DL2 data")
    else:
        print("You have to choose either TRAINING or DL2 as output type!")
        sys.exit()

    # Read configuration file
    cfg = load_config(switches["config_file"])

    # Analysis
    config_path = cfg["General"]["config_path"]
    config_file = cfg["General"]["config_file"]
    mode = cfg["General"]["mode"]  # One mode naw
    particle = cfg["General"]["particle"]
    estimate_energy = cfg["General"]["estimate_energy"]
    force_tailcut_for_extended_cleaning = cfg["General"][
        "force_tailcut_for_extended_cleaning"]

    # Take parameters from the analysis configuration file
    ana_cfg = load_config(os.path.join(config_path, config_file))
    config_name = ana_cfg["General"]["config_name"]
    cam_id_list = ana_cfg["General"]["cam_id_list"]

    # Regressor and classifier methods
    regressor_method = ana_cfg["EnergyRegressor"]["method_name"]
    classifier_method = ana_cfg["GammaHadronClassifier"]["method_name"]

    # Someone might want to create DL2 without score or energy estimation
    if regressor_method in ["None", "none", None]:
        use_regressor = False
    else:
        use_regressor = True

    if classifier_method in ["None", "none", None]:
        use_classifier = False
    else:
        use_classifier = True

    # GRID
    outdir = os.path.join(cfg["GRID"]["outdir"], config_name)
    n_file_per_job = cfg["GRID"]["n_file_per_job"]
    n_jobs_max = cfg["GRID"]["n_jobs_max"]
    model_dir = cfg["GRID"]["model_dir"]
    training_dir_energy = cfg["GRID"]["training_dir_energy"]
    training_dir_classification = cfg["GRID"]["training_dir_classification"]
    dl2_dir = cfg["GRID"]["dl2_dir"]
    home_grid = cfg["GRID"]["home_grid"]
    user_name = cfg["GRID"]["user_name"]
    banned_sites = cfg["GRID"]["banned_sites"]

    # HACK
    if force_tailcut_for_extended_cleaning is True:
        print("Force tail cuts for extended cleaning!!!")

    # Prepare command to launch script
    source_ctapipe = "source /cvmfs/cta.in2p3.fr/software/conda/dev/setupConda.sh"
    source_ctapipe += " && conda activate ctapipe_v0.11.0"

    if switches["output_type"] in "TRAINING":
        execute = "data_training.py"
        script_args = [
            "--config_file={}".format(config_file),
            "--estimate_energy={}".format(str(estimate_energy)),
            "--regressor_config={}.yaml".format(regressor_method),
            "--regressor_dir=./",
            "--outfile {outfile}",
            "--indir ./ --infile_list={infile_name}",
            "--max_events={}".format(switches["max_events"]),
            "--{mode}",
            "--cam_ids {}".format(cam_id_list),
        ]
        output_filename_template = "TRAINING"
    elif switches["output_type"] in "DL2":
        execute = "write_dl2.py"
        script_args = [
            "--config_file={}".format(config_file),
            "--regressor_config={}.yaml".format(regressor_method),
            "--regressor_dir=./",
            "--classifier_config={}.yaml".format(classifier_method),
            "--classifier_dir=./",
            "--outfile {outfile}",
            "--indir ./ --infile_list={infile_name}",
            "--max_events={}".format(switches["max_events"]),
            "--{mode}",
            "--force_tailcut_for_extended_cleaning={}".format(
                force_tailcut_for_extended_cleaning),
            "--cam_ids {}".format(cam_id_list),
        ]
        output_filename_template = "DL2"

    # Make the script save also the full calibrated images if required
    if switches["save_images"] is True:
        script_args.append("--save_images")

    # Make the script print debug information if required
    if switches["debug_script"] is True:
        script_args.append("--debug")

    cmd = [source_ctapipe, "&&", "./" + execute]
    cmd += script_args

    pilot_args_write = " ".join(cmd)

    # For table merging if multiple runs
    pilot_args_merge = " ".join([
        source_ctapipe,
        "&&",
        "./merge_tables.py",
        "--template_file_name",
        "{in_name}",
        "--outfile",
        "{out_name}",
    ])

    prod3b_filelist = dict()
    if estimate_energy is False and switches["output_type"] in "TRAINING":
        prod3b_filelist["gamma"] = cfg["EnergyRegressor"]["gamma_list"]
    elif estimate_energy is True and switches["output_type"] in "TRAINING":
        prod3b_filelist["gamma"] = cfg["GammaHadronClassifier"]["gamma_list"]
        prod3b_filelist["proton"] = cfg["GammaHadronClassifier"]["proton_list"]
    elif switches["output_type"] in "DL2":
        prod3b_filelist["gamma"] = cfg["Performance"]["gamma_list"]
        prod3b_filelist["proton"] = cfg["Performance"]["proton_list"]
        prod3b_filelist["electron"] = cfg["Performance"]["electron_list"]

    # from IPython import embed
    # embed()

    # Split list of files according to stoprage elements
    with open(prod3b_filelist[particle]) as f:
        filelist = f.readlines()

    filelist = ["{}".format(_.replace("\n", "")) for _ in filelist]
    res = dirac.splitInputData(filelist, n_file_per_job)
    list_run_to_loop_on = res["Value"]

    # define a template name for the file that's going to be written out.
    # the placeholder braces are going to get set during the file-loop
    output_filename = output_filename_template
    output_path = outdir
    if estimate_energy is False and switches["output_type"] in "TRAINING":
        output_path += "/{}/".format(training_dir_energy)
        step = "energy"
    if estimate_energy is True and switches["output_type"] in "TRAINING":
        output_path += "/{}/".format(training_dir_classification)
        step = "classification"
    if switches["output_type"] in "DL2":
        if force_tailcut_for_extended_cleaning is False:
            output_path += "/{}/".format(dl2_dir)
        else:
            output_path += "/{}_force_tc_extended_cleaning/".format(dl2_dir)
        step = ""
    output_filename += "_{}.h5"

    # sets all the local files that are going to be uploaded with the job
    # plus the pickled classifier
    # if file name starts with `LFN:`, it will be copied from the GRID
    input_sandbox = [
        # Utility to assign one job to one command...
        os.path.expandvars("$GRID/pilot.sh"),
        os.path.expandvars("$PROTOPIPE/protopipe/"),
        os.path.expandvars("$GRID/merge_tables.py"),
        # python wrapper for the mr_filter wavelet cleaning
        # os.path.expandvars("$PYWI/pywi/"),
        # os.path.expandvars("$PYWICTA/pywicta/"),
        # script that is being run
        os.path.expandvars("$PROTOPIPE/protopipe/scripts/" + execute),
        # Configuration file
        os.path.expandvars(os.path.join(config_path, config_file)),
    ]

    models_to_upload = []
    configs_to_upload = []
    if estimate_energy is True and switches["output_type"] in "TRAINING":
        config_path_template = "LFN:" + os.path.join(home_grid, outdir,
                                                     model_dir, "{}.yaml")
        config_to_upload = config_path_template.format(regressor_method)
        model_path_template = "LFN:" + os.path.join(
            home_grid, outdir, model_dir, "regressor_{}_{}.pkl.gz")
        for cam_id in cam_id_list:

            model_to_upload = model_path_template.format(
                cam_id, regressor_method)  # TBC
            print("The following model(s) will be uploaded to the GRID:")
            print(model_to_upload)
            models_to_upload.append(model_to_upload)

        print(
            "The following configs(s) for such models will be uploaded to the GRID:"
        )
        print(config_to_upload)
        configs_to_upload.append(config_to_upload)
        # input_sandbox.append(model_to_upload)
    elif estimate_energy is False and switches["output_type"] in "TRAINING":
        pass
    else:  # Charge also classifer for DL2
        model_type_list = ["regressor", "classifier"]
        model_method_list = [regressor_method, classifier_method]
        config_path_template = "LFN:" + os.path.join(home_grid, outdir,
                                                     model_dir, "{}.yaml")
        model_path_template = "LFN:" + os.path.join(
            home_grid, outdir, model_dir, "{}_{}_{}.pkl.gz")
        if force_tailcut_for_extended_cleaning is True:
            force_mode = mode.replace("wave", "tail")
            print("################")
            print(force_mode)
        else:
            force_mode = mode

        for idx, model_type in enumerate(model_type_list):

            print(
                "The following configuration file will be uploaded to the GRID:"
            )

            config_to_upload = config_path_template.format(
                model_method_list[idx])
            print(config_to_upload)
            configs_to_upload.append(config_to_upload)  # upload only 1 copy

            print(
                "The following model(s) related to such configuration file will be uploaded to the GRID:"
            )

            for cam_id in cam_id_list:

                if model_type in "regressor" and use_regressor is False:
                    print("Do not upload regressor model on GRID!!!")
                    continue

                if model_type in "classifier" and use_classifier is False:
                    print("Do not upload classifier model on GRID!!!")
                    continue

                model_to_upload = model_path_template.format(
                    model_type_list[idx], cam_id, model_method_list[idx])
                print(model_to_upload)

                models_to_upload.append(model_to_upload)
                # input_sandbox.append(model_to_upload)

    # summary before submitting
    print("\nDEBUG> running as:")
    print(pilot_args_write)
    print("\nDEBUG> with input_sandbox:")
    print(input_sandbox)
    print("\nDEBUG> with output file:")
    print(output_filename.format("{job_name}"))
    print("\nDEBUG> Particles:")
    print(particle)
    print("\nDEBUG> Energy estimation:")
    print(estimate_energy)

    # ########  ##     ## ##    ## ##    ## #### ##    ##  ######
    # ##     ## ##     ## ###   ## ###   ##  ##  ###   ## ##    ##
    # ##     ## ##     ## ####  ## ####  ##  ##  ####  ## ##
    # ########  ##     ## ## ## ## ## ## ##  ##  ## ## ## ##   ####
    # ##   ##   ##     ## ##  #### ##  ####  ##  ##  #### ##    ##
    # ##    ##  ##     ## ##   ### ##   ###  ##  ##   ### ##    ##
    # ##     ##  #######  ##    ## ##    ## #### ##    ##  ######

    # list of files on the GRID SE space
    # not submitting jobs where we already have the output
    batcmd = "dirac-dms-user-lfns --BaseDir {}".format(
        os.path.join(home_grid, output_path))
    result = subprocess.check_output(batcmd, shell=True)
    try:
        grid_filelist = open(result.split()[-1]).read()
    except IOError:
        raise IOError("ERROR> cannot read GRID filelist...")

    # get jobs from today and yesterday...
    days = []
    for i in range(2):  # how many days do you want to look back?
        days.append(
            (datetime.date.today() - datetime.timedelta(days=i)).isoformat())

    # get list of run_tokens that are currently running / waiting
    running_ids = set()
    running_names = []
    for status in ["Waiting", "Running", "Checking"]:
        for day in days:
            try:
                [
                    running_ids.add(id) for id in dirac.selectJobs(
                        status=status, date=day, owner=user_name)["Value"]
                ]
            except KeyError:
                pass

    n_jobs = len(running_ids)
    if n_jobs > 0:
        print("Scanning {} running/waiting jobs... please wait...".format(
            n_jobs))
        for i, id in enumerate(running_ids):
            if ((100 * i) / n_jobs) % 5 == 0:
                print("\r{} %".format(((20 * i) / n_jobs) * 5)),
            jobname = dirac.getJobAttributes(id)["Value"]["JobName"]
            running_names.append(jobname)
        else:
            print("\n... done")

    for bunch in list_run_to_loop_on:

        # for bunch in bunches_of_run:

        # from IPython import embed
        # embed()

        # this selects the `runxxx` part of the first and last file in the run
        # list and joins them with a dash so that we get a nice identifier in
        # the output file name.
        # if there is only one file in the list, use only that one
        # run_token = re.split('_', bunch[+0])[3]  # JLK JLK
        run_token = re.split("_", bunch[0])[3]
        if len(bunch) > 1:
            run_token = "-".join([run_token, re.split("_", bunch[-1])[3]])

        print("-" * 50)
        print("-" * 50)

        # setting output name
        output_filenames = dict()
        if switches["output_type"] in "DL2":
            job_name = "protopipe_{}_{}_{}_{}_{}".format(
                config_name, switches["output_type"], particle, run_token,
                mode)
            output_filenames[mode] = output_filename.format("_".join(
                [particle, mode, run_token]))
        else:
            job_name = "protopipe_{}_{}_{}_{}_{}_{}".format(
                config_name, switches["output_type"], step, particle,
                run_token, mode)
            output_filenames[mode] = output_filename.format("_".join(
                [step, particle, mode, run_token]))

        # if job already running / waiting, skip
        if job_name in running_names:
            print("\n WARNING> {} still running\n".format(job_name))
            continue

        print("Output file name: {}".format(output_filenames[mode]))

        # if file already in GRID storage, skip
        # (you cannot overwrite it there, delete it and resubmit)
        # (assumes tail and wave will always be written out together)
        already_exist = False
        file_on_grid = os.path.join(output_path, output_filenames[mode])
        print("DEBUG> check for existing file on GRID...")
        if file_on_grid in grid_filelist:
            print("\n WARNING> {} already on GRID SE\n".format(job_name))
            continue

        if n_jobs_max == 0:
            print("WARNING> maximum number of jobs to submit reached")
            print("WARNING> breaking loop now")
            break
        else:
            n_jobs_max -= 1

        j = Job()

        # runtime in seconds times 8 (CPU normalisation factor)
        j.setCPUTime(6 * 3600 * 8)
        j.setName(job_name)
        j.setInputSandbox(input_sandbox)

        if banned_sites:
            j.setBannedSites(banned_sites)

        # Add simtel files as input data
        j.setInputData(bunch)

        for run_file in bunch:
            file_token = re.split("_", run_file)[3]

            # wait for a random number of seconds (up to five minutes) before
            # starting to add a bit more entropy in the starting times of the
            # dirac queries.
            # if too many jobs try in parallel to access the SEs,
            # the interface crashes
            # #sleep = random.randint(0, 20)  # seconds
            # #j.setExecutable('sleep', str(sleep))

            # JLK: Try to stop doing that
            # consecutively downloads the data files, processes them,
            # deletes the input
            # and goes on to the next input file;
            # afterwards, the output files are merged
            # j.setExecutable('dirac-dms-get-file', "LFN:" + run_file)

            # source the miniconda ctapipe environment and
            # run the python script with all its arguments
            if switches["output_type"] in "DL2":
                output_filename_temp = output_filename.format("_".join(
                    [particle, mode, file_token]))
            if switches["output_type"] in "TRAINING":
                output_filename_temp = output_filename.format("_".join(
                    [step, particle, mode, file_token]))
            j.setExecutable(
                "./pilot.sh",
                pilot_args_write.format(
                    outfile=output_filename_temp,
                    infile_name=os.path.basename(run_file),
                    mode=mode,
                ),
            )

            # remove the current file to clear space
            j.setExecutable("rm", os.path.basename(run_file))

        # simple `ls` for good measure
        j.setExecutable("ls", "-lh")

        # if there is more than one file per job, merge the output tables
        if len(bunch) > 1:
            names = []

            names.append(("*_{}_".format(particle), output_filenames[mode]))

            for in_name, out_name in names:
                print("in_name: {}, out_name: {}".format(in_name, out_name))
                j.setExecutable(
                    "./pilot.sh",
                    pilot_args_merge.format(in_name=in_name,
                                            out_name=out_name),
                )

                print("DEBUG> args append: {}".format(
                    pilot_args_merge.format(in_name=in_name,
                                            out_name=out_name)))

        bunch.extend(models_to_upload)
        bunch.extend(configs_to_upload)
        j.setInputData(bunch)

        print("Input data set to job = {}".format(bunch))

        outputs = []
        outputs.append(output_filenames[mode])
        print("Output file path: {}{}".format(output_path,
                                              output_filenames[mode]))

        j.setOutputData(outputs, outputSE=None, outputPath=output_path)

        # check if we should somehow stop doing what we are doing
        if switches["dry"] is True:
            print("\nThis is a DRY RUN! -- NO job has been submitted!")
            print("Name of the job: {}".format(job_name))
            print("Name of the output file: {}".format(outputs))
            print("Output path from GRID home: {}".format(output_path))
            break

        # this sends the job to the GRID and uploads all the
        # files into the input sandbox in the process
        print("\nSUBMITTING job with the following INPUT SANDBOX:")
        print(input_sandbox)
        print("Submission RESULT: {}\n".format(dirac.submitJob(j)["Value"]))

        # break if this is only a test submission
        if switches["test"] is True:
            print("This is a TEST RUN! -- Only ONE job will be submitted!")
            print("Name of the job: {}".format(job_name))
            print("Name of the output file: {}".format(outputs))
            print("Output path from GRID home: {}".format(output_path))
            break

        # since there are two nested loops, need to break again
        if switches["test"] is True:
            break

    try:
        os.remove("datapipe.tar.gz")
        os.remove("modules.tar.gz")
    except:
        pass

    # Upload analysis configuration file for provenance

    SE_LIST = ['CC-IN2P3-USER', 'DESY-ZN-USER', 'CNAF-USER', 'CEA-USER']
    analysis_config_local = os.path.join(config_path, config_file)
    # the configuration file is uploaded to the data directory because
    # the training samples (as well as their cleaning settings) are independent
    analysis_config_dirac = os.path.join(home_grid, output_path, config_file)
    print("Uploading {} to {}...".format(analysis_config_local,
                                         analysis_config_dirac))

    if switches["dry"] is False:
        # Upload this file to all Dirac Storage Elements in SE_LIST
        for se in SE_LIST:
            # the uploaded config file overwrites any old copy
            ana_cfg_upload_cmd = "dirac-dms-add-file -f {} {} {}".format(
                analysis_config_dirac, analysis_config_local, se)
            ana_cfg_upload_result = subprocess.check_output(ana_cfg_upload_cmd,
                                                            shell=True)
            print(ana_cfg_upload_result)
    else:
        print("This is a DRY RUN! -- analysis.yaml has NOT been uploaded.")

    print("\nall done -- exiting now")
    exit()
Ejemplo n.º 17
0
def main():
    maxJobs = 100
    Script.registerSwitch("", "Status=", "Primary status")
    Script.registerSwitch("", "MinorStatus=", "Secondary status")
    Script.registerSwitch("", "ApplicationStatus=", "Application status")
    Script.registerSwitch("", "Site=", "Execution site")
    Script.registerSwitch("", "Owner=", "Owner (DIRAC nickname)")
    Script.registerSwitch("", "JobGroup=", "Select jobs for specified job group")
    Script.registerSwitch("", "Date=", "Date in YYYY-MM-DD format, if not specified default is today")
    Script.registerSwitch("", "Maximum=", "Maximum number of jobs shown (default %d, 0 means all)" % maxJobs)
    switches, args = Script.parseCommandLine(ignoreErrors=True)

    # Default values
    status = None
    minorStatus = None
    appStatus = None
    site = None
    owner = None
    jobGroups = []
    date = None

    if args:
        Script.showHelp()

    exitCode = 0

    for switch in switches:
        if switch[0].lower() == "status":
            status = switch[1]
        elif switch[0].lower() == "minorstatus":
            minorStatus = switch[1]
        elif switch[0].lower() == "applicationstatus":
            appStatus = switch[1]
        elif switch[0].lower() == "site":
            site = switch[1]
        elif switch[0].lower() == "owner":
            owner = switch[1]
        elif switch[0].lower() == "jobgroup":
            for jg in switch[1].split(","):
                if jg.isdigit():
                    jobGroups.append("%08d" % int(jg))
                else:
                    jobGroups.append(jg)
        elif switch[0].lower() == "date":
            date = switch[1]
        elif switch[0] == "Maximum":
            try:
                maxJobs = int(switch[1])
            except TypeError:
                gLogger.fatal("Invalid max number of jobs", switch[1])
                DIRAC.exit(1)

    selDate = date
    if not date:
        selDate = "Today"
    conditions = {
        "Status": status,
        "MinorStatus": minorStatus,
        "ApplicationStatus": appStatus,
        "Owner": owner,
        "JobGroup": ",".join(str(jg) for jg in jobGroups),
        "Date": selDate,
    }

    from DIRAC.Interfaces.API.Dirac import Dirac

    dirac = Dirac()
    jobs = []

    if jobGroups:
        for jobGroup in jobGroups:
            res = dirac.selectJobs(
                status=status,
                minorStatus=minorStatus,
                applicationStatus=appStatus,
                site=site,
                owner=owner,
                jobGroup=jobGroup,
                date=date,
                printErrors=False,
            )
            if res["OK"]:
                jobs.extend(res["Value"])
            else:
                gLogger.error("Can't select jobs: ", res["Message"])
    else:
        res = dirac.selectJobs(
            status=status,
            minorStatus=minorStatus,
            applicationStatus=appStatus,
            site=site,
            owner=owner,
            date=date,
            printErrors=False,
        )
        if res["OK"]:
            jobs.extend(res["Value"])
        else:
            gLogger.error("Can't select jobs: ", res["Message"])

    conds = ["%s = %s" % (n, v) for n, v in conditions.items() if v]
    if maxJobs and len(jobs) > maxJobs:
        jobs = jobs[:maxJobs]
        constrained = " (first %d shown) " % maxJobs
    else:
        constrained = " "

    if jobs:
        gLogger.notice(
            "==> Selected %s jobs%swith conditions: %s\n%s" % (len(jobs), constrained, ", ".join(conds), ",".join(jobs))
        )
    else:
        gLogger.notice("No jobs were selected with conditions:", ", ".join(conds))

    DIRAC.exit(exitCode)
Ejemplo n.º 18
0
              'JobGroup': ','.join(str(jg) for jg in jobGroups),
              'Date': selDate}



from DIRAC.Interfaces.API.Dirac import Dirac

dirac = Dirac()
jobs = []

if jobGroups:
  for jobGroup in jobGroups:
    res = dirac.selectJobs(status=status,
                           minorStatus=minorStatus,
                           applicationStatus=appStatus,
                           site=site,
                           owner=owner,
                           jobGroup=jobGroup,
                           date=date,
                           printErrors=False)
    if res['OK']:
      jobs.extend(res['Value'])
    else:
      gLogger.error("Can't select jobs: ", res['Message'])
else:
  res = dirac.selectJobs(status=status,
                         minorStatus=minorStatus,
                         applicationStatus=appStatus,
                         site=site,
                         owner=owner,
                         date=date,
                         printErrors=False)
exitCode = 0
iKey = "FailureReason"
args = Script.getPositionalArgs()
if len(args):
    iKey = args[0]
print '*INFO* looking for key %s'%iKey
# first, get all jobs which are marked as stalled
status = "Failed"
minor_stat = "Job stalled: pilot not running"
owner = "zimmer"
dirac = Dirac()
jobs = []
conditions = {"Status":status,"MinorStatus":minor_stat,"Owner":owner}
res = dirac.selectJobs( status = status,
                       minorStatus = minor_stat,
                       owner = owner)
if not res['OK']:
    gLogger("ERROR retrieving jobs")
    gLogger(res["Message"])
    exitCode = 2
else:
    conds = []
    for n, v in conditions.items():
        if v:
            conds.append( '%s = %s' % ( n, v ) )
    jobs = res['Value']
pilot_refs = {}
for job in jobs:
    # next get pilot refs
    key = job
Ejemplo n.º 20
0
group = None
jobs = []
for sw, value in Script.getUnprocessedSwitches():
    if sw in ('D', 'Dir'):
        outputDir = value
    elif sw.lower() in ('f', 'file'):
        if os.path.exists(value):
            jFile = open(value)
            jobs += jFile.read().split()
            jFile.close()
    elif sw.lower() in ('g', 'jobgroup'):
        group = value
        jobDate = toString(date() - 30 * day)

        # Choose jobs in final state, no more than 30 days old
        result = dirac.selectJobs(jobGroup=value, date=jobDate, status='Done')
        if not result['OK']:
            if not "No jobs selected" in result['Message']:
                print "Error:", result['Message']
                DIRAC.exit(-1)
        else:
            jobs += result['Value']
        result = dirac.selectJobs(jobGroup=value,
                                  date=jobDate,
                                  status='Failed')
        if not result['OK']:
            if not "No jobs selected" in result['Message']:
                print "Error:", result['Message']
                DIRAC.exit(-1)
        else:
            jobs += result['Value']
Ejemplo n.º 21
0
#print owner, jobGroup, nHours

# Start doing something
# import Dirac here (and not on the top of the file) if you don't want to get into trouble
from DIRAC.Interfaces.API.Dirac  import Dirac
dirac = Dirac()


# dirac.selectJobs( status='Failed', owner='paterson', site='LCG.CERN.ch')
# owner=owner, date=jobDate
onehour = datetime.timedelta(hours = 1)
now=datetime.datetime.now()
Script.gLogger.notice(now)

results=dirac.selectJobs(jobGroup=jobGroup, owner=owner, date=now-nHours*onehour)
if not results.has_key('Value'):
    Script.gLogger.notice("No job found for group \"%s\" and owner \"%s\" in the past %s hours"%
       (jobGroup, owner, nHours))
    Script.sys.exit(0)

# Found some jobs, print information
jobsList=results['Value']
Script.gLogger.notice("%s jobs found for group \"%s\" and owner \"%s\" in the past %s hours\n"%
       (len(jobsList), jobGroup, owner, nHours))

status=dirac.status(jobsList)

# for details
#print dirac.getJobSummary(3075536)
Ejemplo n.º 22
0
# get jobs from today and yesterday...
days = []
for i in range(2):  # how many days do you want to look back?
    days.append(
        (datetime.date.today() - datetime.timedelta(days=i)).isoformat())

# get list of run_tokens that are currently running / waiting
running_ids = set()
running_names = []
for status in ["Waiting", "Running", "Checking"]:
    for day in days:
        try:
            [
                running_ids.add(id) for id in dirac.selectJobs(
                    status=status,
                    date=day,
                    #owner="tmichael")['Value']]
                    owner="jlefaucheur")['Value']
            ]
        except KeyError:
            pass

n_jobs = len(running_ids)
if n_jobs > 0:
    print(
        "getting names from {} running/waiting jobs... please wait...".format(
            n_jobs))
    for i, id in enumerate(running_ids):
        if ((100 * i) / n_jobs) % 5 == 0:
            print("\r{} %".format(((20 * i) / n_jobs) * 5)),
        jobname = dirac.attributes(id)["Value"]["JobName"]
Ejemplo n.º 23
0
    from DIRAC.Interfaces.API.Dirac import Dirac, parseArguments
    from DIRAC.Core.Utilities.Time import toString, date, day
    dirac = Dirac()

    jobs = []
    for sw, value in Script.getUnprocessedSwitches():
        if sw.lower() in ('f', 'file'):
            if os.path.exists(value):
                jFile = open(value)
                jobs += jFile.read().split()
                jFile.close()
        elif sw.lower() in ('g', 'jobgroup'):
            group = value
            jobDate = toString(date() - 30 * day)
            result = dirac.selectJobs(jobGroup=value, date=jobDate)
            if not result['OK']:
                if "No jobs selected" not in result['Message']:
                    print("Error:", result['Message'])
                    DIRAC.exit(-1)
            else:
                jobs += result['Value']

    for arg in parseArguments(args):
        jobs.append(arg)

    if not jobs:
        print("Warning: no jobs selected")
        Script.showHelp()
        DIRAC.exit(0)
Ejemplo n.º 24
0
    from DIRAC.Interfaces.API.Dirac import Dirac

    dirac = Dirac()
    exitCode = 0
    errorList = []

    jobs = []
    for sw, value in Script.getUnprocessedSwitches():
        if sw.lower() in ("f", "file"):
            if os.path.exists(value):
                jFile = open(value)
                jobs += jFile.read().split()
                jFile.close()
        elif sw.lower() in ("g", "jobgroup"):
            group = value
            result = dirac.selectJobs(jobGroup=value)
            if not result["OK"]:
                if not "No jobs selected" in result["Message"]:
                    print "Error:", result["Message"]
                    DIRAC.exit(-1)
            else:
                jobs += result["Value"]

    for arg in args:
        jobs.append(arg)

    if not jobs:
        print "Warning: no jobs selected"
        Script.showHelp()
        DIRAC.exit(0)
Ejemplo n.º 25
0
    date = switch[1]
  elif switch[0].lower() == "file":
    filename = switch[1]

selDate = date
if not date:
  selDate = 'Today'

from DIRAC.Interfaces.API.Dirac import Dirac

dirac = Dirac()
exitCode = 0
errorList = []
resultDict = {}

result = dirac.selectJobs( status = status, minorStatus = minorStatus, applicationStatus = appStatus,
                           site = site, owner = owner, jobGroup = jobGroup, date = date )
if result['OK']:
  jobs = result['Value']
else:
  print("Error in selectJob", result['Message'])
  DIRAC.exit( 2 )

for job in jobs:

  result = dirac.getOutputSandbox( job )
  if result['OK']:
    if os.path.exists( '%s' % job ):

      lines = []
      try:
        lines = open( os.path.join( job, filename ) ).readlines()
Ejemplo n.º 26
0
            if isinstance(since, int):
                since = str(datetime.datetime.now() -
                            datetime.timedelta(days=since)).split()[0]

    if workerNodes or batchIDs:
        # status = [None]
        full = True

    monitoring = JobMonitoringClient()
    dirac = Dirac()

    # Get jobs according to selection
    jobs = set()
    for stat in status:
        res = dirac.selectJobs(site=site,
                               date=since,
                               status=stat,
                               minorStatus=minorStatus)
        if not res['OK']:
            gLogger.error('Error selecting jobs', res['Message'])
            DIRAC.exit(1)
        allJobs = set(int(job) for job in res['Value'])
        if until:
            res = dirac.selectJobs(site=site, date=until, status=stat)
            if not res['OK']:
                gLogger.error('Error selecting jobs', res['Message'])
                DIRAC.exit(1)
            allJobs -= set(int(job) for job in res['Value'])
        jobs.update(allJobs)
    if not jobs:
        gLogger.always('No jobs found...')
        DIRAC.exit(0)
Ejemplo n.º 27
0
selDate = date
if not date:
  selDate = 'Today'
conditions = { 'Status':status,
               'MinorStatus':minorStatus,
               'ApplicationStatus':appStatus,
               'Owner':owner,
               'JobGroup':jobGroup,
               'Date':selDate }

from DIRAC.Interfaces.API.Dirac import Dirac
dirac = Dirac()
result = dirac.selectJobs( status = status,
                           minorStatus = minorStatus,
                           applicationStatus = appStatus,
                           site = site,
                           owner = owner,
                           jobGroup = jobGroup,
                           date = date )
if not result['OK']:
  print 'ERROR %s' % result['Message']
  exitCode = 2
else:
  conds = []
  for n, v in conditions.items():
    if v:
      conds.append( '%s = %s' % ( n, v ) )
  jobs = result['Value']
  constrained = ' '
  if len( jobs ) > 100:
    jobs = jobs[:100]
Ejemplo n.º 28
0
selDate = date
if not date:
  selDate = 'Today'
conditions = { 'Status':status,
               'MinorStatus':minorStatus,
               'ApplicationStatus':appStatus,
               'Owner':owner,
               'JobGroup':jobGroup,
               'Date':selDate }

from DIRAC.Interfaces.API.Dirac import Dirac
dirac = Dirac()
result = dirac.selectJobs( Status = status,
                           MinorStatus = minorStatus,
                           ApplicationStatus = appStatus,
                           Site = site,
                           Owner = owner,
                           JobGroup = jobGroup,
                           Date = date )
if not result['OK']:
  print 'ERROR %s' % result['Message']
  exitCode = 2
else:
  conds = []
  for n, v in conditions.items():
    if v:
      conds.append( '%s = %s' % ( n, v ) )
  jobs = result['Value']
  constrained = ' '
  if len( jobs ) > 100:
    jobs = jobs[:100]