Python Job.setOutputDataの例、DIRAC.Interfaces.API.Job.Job.setOutputData Pythonの例

コード例 #1

0

ファイルを表示

def read_hessjob(args=None):

    from DIRAC.Interfaces.API.Dirac import Dirac
    from DIRAC.Interfaces.API.Job import Job

    if (len(args) != 1):
        Script.showHelp()

    version = args[0]

    user_script = './read_hess2dst.sh'

    sim_file = 'simtel_file.list'

    infileLFNList = [
        '/vo.cta.in2p3.fr/MC/PROD2/Config_310113/prod-2_21122012_corsika/gamma/prod-2_06052013_simtel_STD/Data/002xxx/gamma_20.0_180.0_alt2662.0_run002997.simtel.gz',
        '/vo.cta.in2p3.fr/MC/PROD2/Config_310113/prod-2_21122012_corsika/gamma/prod-2_06052013_simtel_STD/Data/002xxx/gamma_20.0_180.0_alt2662.0_run002998.simtel.gz'
    ]

    f = open(sim_file, 'w')

    for infileLFN in infileLFNList:
        filein = os.path.basename(infileLFN)
        f.write(filein)
        f.write('\n')

    f.close()

    j = Job()

    j.setInputData(infileLFNList)

    options = []
    options = [sim_file]

    executablestr = "%s %s %s" % (version, user_script, ' '.join(options))

    j.setExecutable('./cta-read_hess.py', executablestr)

    j.setInputSandbox(['cta-read_hess.py', user_script, sim_file])

    j.setOutputSandbox(['read_hess.log'])

    j.setOutputData(['*dst.gz'])

    j.setName(user_script)

    j.setCPUTime(100000)

    Script.gLogger.info(j._toJDL())

    Dirac().submit(j)

コード例 #2

0

ファイルを表示

ファイル: dsub.py プロジェクト: yan-tian/dsub

def submitJob(jobPara):
    dirac = Dirac()
    j = Job()
    j.setName(jobPara['jobName'])
    j.setJobGroup(jobPara['jobGroup'])
    j.setExecutable(jobPara['jobScript'], logFile = jobPara['jobScriptLog'])
    j.setInputSandbox(jobPara['inputSandbox'])
    j.setOutputSandbox(jobPara['outputSandbox'])
    j.setOutputData(jobPara['outputData'], jobPara['SE'])
    j.setDestination(jobPara['sites'])
    j.setCPUTime(jobPara['CPUTime'])
    result = dirac.submit(j)
    if result['OK']:
        print 'Job %s submitted successfully. ID = %d' %(jobPara['jobName'],result['Value'])
    else:
        print 'Job %s submitted failed' %jobPara['jobName']
    return result

コード例 #3

0

ファイルを表示

def submitJob(jobPara):
    dirac = Dirac()
    j = Job()
    j.setName(jobPara['jobName'])
    j.setJobGroup(jobPara['jobGroup'])
    j.setExecutable(jobPara['jobScript'], logFile=jobPara['jobScriptLog'])
    j.setInputSandbox(jobPara['inputSandbox'])
    j.setOutputSandbox(jobPara['outputSandbox'])
    j.setOutputData(jobPara['outputData'], jobPara['SE'])
    j.setDestination(jobPara['sites'])
    j.setCPUTime(jobPara['CPUTime'])
    result = dirac.submit(j)
    if result['OK']:
        print 'Job %s submitted successfully. ID = %d' % (jobPara['jobName'],
                                                          result['Value'])
    else:
        print 'Job %s submitted failed' % jobPara['jobName']
    return result

コード例 #4

0

ファイルを表示

ファイル: unitTestUserJobs.py プロジェクト: fstagni/TestDIRAC

    def test_submit(self):

        print "**********************************************************************************************************"
        gLogger.info("\n Submitting hello world job")

        helloJ = Job()

        helloJ.setName("helloWorld-test-T2s")
        helloJ.setInputSandbox(
            [find_all('exe-script.py', '.', 'GridTestSubmission')[0]])

        helloJ.setExecutable("exe-script.py", "", "helloWorld.log")

        helloJ.setCPUTime(17800)
        result = self.dirac.submit(helloJ)
        gLogger.info("Hello world job: ", result)

        jobID = int(result['Value'])
        jobsSubmittedList.append(jobID)

        self.assert_(result['OK'])

        print "**********************************************************************************************************"

        gLogger.info("\n Submitting a job that uploads an output")

        helloJ = Job()

        helloJ.setName("upload-Output-test")
        helloJ.setInputSandbox(
            [find_all('testFileUpload.txt', '.', 'GridTestSubmission')[0]])
        helloJ.setExecutable("exe-script.py", "", "helloWorld.log")

        helloJ.setCPUTime(17800)

        helloJ.setOutputData(['testFileUpload.txt'])

        result = self.dirac.submit(helloJ)
        gLogger.info("Hello world with output: ", result)

        jobID = int(result['Value'])
        jobsSubmittedList.append(jobID)

        self.assert_(result['OK'])

コード例 #5

0

ファイルを表示

def simteljob(args = None ):

  from DIRAC.Interfaces.API.Dirac import Dirac
  from DIRAC.Interfaces.API.Job import Job

  if (len(args)!=1):
    Script.showHelp()

  version = args[0]

  user_script = './run_simtel.sh'
  
  infileLFNList = ['/vo.cta.in2p3.fr/MC/PROD2/Config_120213/prod-2_21122012_corsika/proton/Data/044xxx/proton_20.0_180.0_alt2662.0_run044019.corsika.gz',
'/vo.cta.in2p3.fr/MC/PROD2/Config_120213/prod-2_21122012_corsika/proton/Data/044xxx/proton_20.0_180.0_alt2662.0_run044085.corsika.gz']


  for infileLFN in infileLFNList:
    filein = os.path.basename(infileLFN)

    j = Job()

    j.setInputSandbox( ['cta-simtel.py', user_script] )  
    j.setInputData(infileLFN)
  
    user_args = []
    user_args = [filein]
  
    executablestr = "%s %s %s" % ( version, user_script, ' '.join( user_args ) )

    j.setExecutable('./cta-simtel.py', executablestr)

    sim_out = 'Data/sim_telarray/cta-ultra5/0.0deg/Data/*.simtel.gz'
    log_out = 'Data/sim_telarray/cta-ultra5/0.0deg/Log/*.log.gz'
    hist_out = 'Data/sim_telarray/cta-ultra5/0.0deg/Histograms/*.hdata.gz'
   
    j.setOutputData([sim_out,log_out,hist_out])
    j.setOutputSandbox('simtel.log')
    j.setName(user_script)
    j.setCPUTime(100000)

    Script.gLogger.info( j._toJDL() )

    Dirac().submit( j )

コード例 #6

0

ファイルを表示

ファイル: localJobRun.py プロジェクト: zhangxiaomei/DIRAC

 def basicTest(self):
     j = Job()
     j.setCPUTime(50000)
     j.setExecutable(
         '/Users/stuart/dirac/workspace/DIRAC3/DIRAC/Interfaces/API/test/myPythonScript.py'
     )
     # j.setExecutable('/bin/echo hello')
     j.setOwner('paterson')
     j.setType('test')
     j.setName('MyJobName')
     #j.setAncestorDepth(1)
     j.setInputSandbox([
         '/Users/stuart/dirac/workspace/DIRAC3/DIRAC/Interfaces/API/test/DV.opts',
         '/Users/stuart/dirac/workspace/DIRAC3/DIRAC/Interfaces/API/test/DV2.opts'
     ])
     j.setOutputSandbox(['firstfile.txt', 'anotherfile.root'])
     j.setInputData([
         '/lhcb/production/DC04/v2/DST/00000742_00003493_11.dst',
         '/lhcb/production/DC04/v2/DST/00000742_00003493_10.dst'
     ])
     j.setOutputData(['my.dst', 'myfile.log'])
     j.setDestination('LCG.CERN.ch')
     j.setPlatform('LCG')
     j.setSystemConfig('x86_64-slc5-gcc43-opt')
     j.setSoftwareTags(['VO-lhcb-Brunel-v30r17', 'VO-lhcb-Boole-v12r10'])
     #print j._toJDL()
     #print j.printObj()
     xml = j._toXML()
     testFile = 'jobDescription.xml'
     if os.path.exists(testFile):
         os.remove(testFile)
     xmlfile = open(testFile, 'w')
     xmlfile.write(xml)
     xmlfile.close()
     print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Creating code for the workflow'
     print j.createCode()
     print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Executing the workflow'
     j.execute()
     print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Trying to run the same workflow from generated XML file'
     workflow = fromXMLFile(testFile)
     code = workflow.createCode()
     print code
     workflow.execute()

コード例 #7

0

ファイルを表示

def submitWMS(args):

    first_line = args[0]

    job = Job()
    dirac = Dirac()

    job.setName('mandelbrot')

    job.setExecutable('git clone https://github.com/bregeon/mandel4ts.git')

    job.setExecutable('./mandel4ts/mandelbrot.py',
                      arguments="-P 0.0005 -M 1000 -L %s -N 200" % first_line)

    job.setOutputData(['data_*.bmp', 'data*.txt'])

    res = dirac.submitJob(job)

    return res

コード例 #8

0

ファイルを表示

ファイル: Dirac.py プロジェクト: suosdu/jsub

    def submit(self, param):        
        j = Job()
        j.setName(param['jobName'])
        j.setExecutable(param['jobScript'],logFile = param['jobScriptLog'])
        if self.site:
            j.setDestination(self.site)
        if self.jobGroup:
            j.setJobGroup(self.jobGroup)            
        j.setInputSandbox(param['inputSandbox'])
        j.setOutputSandbox(param['outputSandbox'])
        j.setOutputData(param['outputData'], outputSE = self.outputSE, outputPath = self.outputPath)

        dirac = GridDirac()
        result = dirac.submit(j)

        status = {}
        status['submit'] = result['OK']
        if status['submit']:
            status['job_id'] = result['Value']

        return status

コード例 #9

0

ファイルを表示

ファイル: dirac-daemon.py プロジェクト: rob-tay/LZProduction

    def submit_job(self,
                   request_id,
                   executable,
                   macro,
                   starting_seed=8000000,
                   njobs=10,
                   platform='ANY',
                   output_data_site='UKI-LT2-IC-HEP-disk',
                   output_log='lzproduction_output.log'):
        """
        Submit LZProduction job to DIRAC.

        Args:
            request_id (int): The id number of the associated request
            executable (str): The full path to the executable job script
            macro (str): The full path to the macro for this job
            starting_seed (int): The random seed for the first of the parametric jobs
            njobs (int): The number of parametric jobs to create
            platform (str): The required platform
            output_data_site (str): The name of the grid site to store the output data at
            output_log (str): The file name for the output log file

        Returns:
           list: The list of created parametric job DIRAC ids
        """
        j = Job()
        j.setName(os.path.splitext(os.path.basename(macro))[0] + '%(args)s')
        j.setExecutable(os.path.basename(executable),
                        os.path.basename(macro) + ' %(args)s', output_log)
        j.setInputSandbox([executable, macro])
        j.setOutputData('*.root', output_data_site, str(request_id))
        j.setParameterSequence(
            "args",
            [str(i) for i in xrange(starting_seed, starting_seed + njobs)],
            addToWorkflow=True)
        j.setPlatform(platform)

        return self.status(self._dirac_api.submit(j).get("Value", []))

コード例 #10

0

ファイルを表示

    def submit(self, param):
        j = Job()
        j.setName(param['jobName'])
        j.setExecutable(param['jobScript'], logFile=param['jobScriptLog'])
        if self.site:
            j.setDestination(self.site)
        if self.jobGroup:
            j.setJobGroup(self.jobGroup)
        j.setInputSandbox(param['inputSandbox'])
        j.setOutputSandbox(param['outputSandbox'])
        j.setOutputData(param['outputData'],
                        outputSE=self.outputSE,
                        outputPath=self.outputPath)

        dirac = GridDirac()
        result = dirac.submit(j)

        status = {}
        status['submit'] = result['OK']
        if status['submit']:
            status['job_id'] = result['Value']

        return status

コード例 #11

0

ファイルを表示

ファイル: localJobRun.py プロジェクト: KrzysztofCiba/DIRAC

 def basicTest(self):
   j = Job()
   j.setCPUTime(50000)
   j.setExecutable('/Users/stuart/dirac/workspace/DIRAC3/DIRAC/Interfaces/API/test/myPythonScript.py')
  # j.setExecutable('/bin/echo hello')
   j.setOwner('paterson')
   j.setType('test')
   j.setName('MyJobName')
   #j.setAncestorDepth(1)
   j.setInputSandbox(['/Users/stuart/dirac/workspace/DIRAC3/DIRAC/Interfaces/API/test/DV.opts','/Users/stuart/dirac/workspace/DIRAC3/DIRAC/Interfaces/API/test/DV2.opts'])
   j.setOutputSandbox(['firstfile.txt','anotherfile.root'])
   j.setInputData(['/lhcb/production/DC04/v2/DST/00000742_00003493_11.dst',
                   '/lhcb/production/DC04/v2/DST/00000742_00003493_10.dst'])
   j.setOutputData(['my.dst','myfile.log'])
   j.setDestination('LCG.CERN.ch')
   j.setPlatform('LCG')
   j.setSystemConfig('x86_64-slc5-gcc43-opt')
   j.setSoftwareTags(['VO-lhcb-Brunel-v30r17','VO-lhcb-Boole-v12r10'])
   #print j._toJDL()
   #print j.printObj()
   xml = j._toXML()
   testFile = 'jobDescription.xml'
   if os.path.exists(testFile):
     os.remove(testFile)
   xmlfile = open(testFile,'w')
   xmlfile.write(xml)
   xmlfile.close()
   print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Creating code for the workflow'
   print j.createCode()
   print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Executing the workflow'
   j.execute()
   print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Trying to run the same workflow from generated XML file'
   workflow = fromXMLFile(testFile)
   code = workflow.createCode()
   print code
   workflow.execute()

コード例 #12

0

ファイルを表示

ファイル: submit.py プロジェクト: thomasarmstrong/tino_cta

        # simple `ls` for good measure
        j.setExecutable('ls', '-lh')

        # if there is more than one file per job, merge the output tables
        if window_sizes[i] > 1:
            for in_name, out_name in [('classified_events_wave', output_filename_wave),
                                      ('classified_events_tail', output_filename_tail)]:
                j.setExecutable('./pilot.sh',
                                pilot_args_append.format(
                                    in_name=in_name,
                                    out_name=out_name))

        print
        print("OutputData: {}{}".format(output_path, output_filename_wave))
        print("OutputData: {}{}".format(output_path, output_filename_tail))
        j.setOutputData([output_filename_wave, output_filename_tail],
                        outputSE=None, outputPath=output_path)

        # check if we should somehow stop doing what we are doing
        if "dry" in sys.argv:
            print("\nrunning dry -- not submitting")
            exit()

        # this sends the job to the GRID and uploads all the
        # files into the input sandbox in the process
        print("\nsubmitting job")
        print('Submission Result: {}\n'.format(dirac.submit(j)['Value']))

        # break if this is only a test submission
        if "test" in sys.argv:
            print("test run -- only submitting one job")
            exit()

コード例 #13

0

ファイルを表示

ファイル: start_jobs.py プロジェクト: tudo-astroparticlephysics/cta_preprocessing

def main(dataset, chunksize, test):
    '''
    The DATASET argument is a list of paths to MC files on the grid. Like the output of
    cta-prod3-dump-dataset for example. See also
    https://forge.in2p3.fr/projects/cta_dirac/wiki/CTA-DIRAC_MC_PROD3_Status

    Keep in mind that for some effing reason this needs to be executed within this weird 'dirac'
    environment which comes with its own glibc, python and pip. I guess the real Mr. Dirac would turn in his grave.

    '''
    dirac = Dirac()

    with open(dataset) as f:
        simtel_files = f.readlines()
        print('Analysing {}'.format(len(simtel_files)))

    server_list = [
        "TORINO-USER", "CYF-STORM-USER", "CYF-STORM-Disk", "M3PEC-Disk",
        "OBSPM-Disk", "POLGRID-Disk", "FRASCATI-USER", "LAL-Disk",
        "CIEMAT-Disk", "CIEMAT-USER", "CPPM-Disk", "LAL-USER", "CYFRONET-Disk",
        "DESY-ZN-USER", "M3PEC-USER", "LPNHE-Disk", "LPNHE-USER", "LAPP-USER",
        "LAPP-Disk"
    ]
    desy_server = 'DESY-ZN-USER'

    servers_with_miniconda = [
        'LCG.IN2P3-CC.fr', 'LCG.DESY-ZEUTHEN.de', 'LCG.CNAF.it', 'LCG.GRIF.fr',
        'LCG.CYFRONET.pl', 'LCG.Prague.cz', 'LCG.CIEMAT.es'
    ]

    chunks = np.array_split(sorted(simtel_files),
                            int(len(simtel_files) / chunksize))

    print('Got a total of {} chunks'.format(len(chunks)))
    for c, simtel_filenames in tqdm(enumerate(
            chunks[0:2])):  # send just 2 jobs for now.
        # convert chunk to a list of strings. becasue this dirac thing cant take numpy arrays
        simtel_filenames = [
            str(s).strip() for s in simtel_filenames if 'SCT' not in s
        ]
        print('Starting processing for chunk {}'.format(c))
        print(simtel_filenames)
        j = Job()
        # set runtime to 0.5h
        j.setCPUTime(30 * 60)
        j.setName('cta_preprocessing_{}'.format(c))
        j.setInputData(simtel_filenames)
        j.setOutputData(['./processing_output/*.hdf5'],
                        outputSE=None,
                        outputPath='cta_preprocessing/')

        j.setInputSandbox(
            ['../process_simtel.py', './install_dependencies.py'])
        j.setOutputSandbox(['cta_preprocessing.log'])
        j.setExecutable('./job_script.sh')
        # These servers seem to  have mini conda installed
        # destination = np.random.choice(servers_with_miniconda)
        j.setDestination(servers_with_miniconda)

        value = dirac.submit(j)
        print('Number {} Submission Result: {}'.format(c, value))

コード例 #14

0

ファイルを表示

    j.setOutputSandbox([
        'StdOut', 'StdErr',
        'outputtxt_' + str(id_start) + '_' + str(id_end - 1) + '.txt',
        'prmon' + str(id_start) + '_' + str(id_end - 1) + '.txt'
    ])
    o_data_file = lfn + 'second/results_experiment_' + str(
        expmnt) + '/' + 'LOS_' + str(id_start) + '_to_' + str(id_end -
                                                              1) + '.npy'
    try:
        output_process = subprocess.check_output('dirac-dms-remove-files ' +
                                                 o_data_file,
                                                 stderr=subprocess.STDOUT,
                                                 shell=True)
    except subprocess.CalledProcessError as e:
        print 'Failed: ' + str(e.returncode) + ' ' + e.output
    else:
        print "Output: ", output_process
    j.setOutputData(
        ['LOS_' + str(id_start) + '_to_' + str(id_end - 1) + '.npy'],
        outputSE=SEList,
        outputPath='/second/results_experiment_' + str(expmnt))
    try:
        diracUsername = getProxyInfo()['Value']['username']
    except:
        print 'Failed to get DIRAC username. No proxy set up?'
        sys.exit(1)
    j.setJobGroup('rmsynthesis_by_' + expmnt + '_' + timestamp)
    jobID = dirac.submitJob(j)
    print 'Submission Result: ', j._toJDL()
    print '\n'

コード例 #15

0

ファイルを表示

ファイル: submit_jobs.py プロジェクト: HealthyPear/protopipe-grid-interface

def main():
    """
    Launch job on the GRID
    """
    # this thing pilots everything related to the GRID
    dirac = Dirac()

    if switches["output_type"] in "TRAINING":
        print("Preparing submission for TRAINING data")
    elif switches["output_type"] in "DL2":
        print("Preparing submission for DL2 data")
    else:
        print("You have to choose either TRAINING or DL2 as output type!")
        sys.exit()

    # Read configuration file
    cfg = load_config(switches["config_file"])

    # Analysis
    config_path = cfg["General"]["config_path"]
    config_file = cfg["General"]["config_file"]
    mode = cfg["General"]["mode"]  # One mode naw
    particle = cfg["General"]["particle"]
    estimate_energy = cfg["General"]["estimate_energy"]
    force_tailcut_for_extended_cleaning = cfg["General"][
        "force_tailcut_for_extended_cleaning"]

    # Take parameters from the analysis configuration file
    ana_cfg = load_config(os.path.join(config_path, config_file))
    config_name = ana_cfg["General"]["config_name"]
    cam_id_list = ana_cfg["General"]["cam_id_list"]

    # Regressor and classifier methods
    regressor_method = ana_cfg["EnergyRegressor"]["method_name"]
    classifier_method = ana_cfg["GammaHadronClassifier"]["method_name"]

    # Someone might want to create DL2 without score or energy estimation
    if regressor_method in ["None", "none", None]:
        use_regressor = False
    else:
        use_regressor = True

    if classifier_method in ["None", "none", None]:
        use_classifier = False
    else:
        use_classifier = True

    # GRID
    outdir = os.path.join(cfg["GRID"]["outdir"], config_name)
    n_file_per_job = cfg["GRID"]["n_file_per_job"]
    n_jobs_max = cfg["GRID"]["n_jobs_max"]
    model_dir = cfg["GRID"]["model_dir"]
    training_dir_energy = cfg["GRID"]["training_dir_energy"]
    training_dir_classification = cfg["GRID"]["training_dir_classification"]
    dl2_dir = cfg["GRID"]["dl2_dir"]
    home_grid = cfg["GRID"]["home_grid"]
    user_name = cfg["GRID"]["user_name"]
    banned_sites = cfg["GRID"]["banned_sites"]

    # HACK
    if force_tailcut_for_extended_cleaning is True:
        print("Force tail cuts for extended cleaning!!!")

    # Prepare command to launch script
    source_ctapipe = "source /cvmfs/cta.in2p3.fr/software/conda/dev/setupConda.sh"
    source_ctapipe += " && conda activate ctapipe_v0.11.0"

    if switches["output_type"] in "TRAINING":
        execute = "data_training.py"
        script_args = [
            "--config_file={}".format(config_file),
            "--estimate_energy={}".format(str(estimate_energy)),
            "--regressor_config={}.yaml".format(regressor_method),
            "--regressor_dir=./",
            "--outfile {outfile}",
            "--indir ./ --infile_list={infile_name}",
            "--max_events={}".format(switches["max_events"]),
            "--{mode}",
            "--cam_ids {}".format(cam_id_list),
        ]
        output_filename_template = "TRAINING"
    elif switches["output_type"] in "DL2":
        execute = "write_dl2.py"
        script_args = [
            "--config_file={}".format(config_file),
            "--regressor_config={}.yaml".format(regressor_method),
            "--regressor_dir=./",
            "--classifier_config={}.yaml".format(classifier_method),
            "--classifier_dir=./",
            "--outfile {outfile}",
            "--indir ./ --infile_list={infile_name}",
            "--max_events={}".format(switches["max_events"]),
            "--{mode}",
            "--force_tailcut_for_extended_cleaning={}".format(
                force_tailcut_for_extended_cleaning),
            "--cam_ids {}".format(cam_id_list),
        ]
        output_filename_template = "DL2"

    # Make the script save also the full calibrated images if required
    if switches["save_images"] is True:
        script_args.append("--save_images")

    # Make the script print debug information if required
    if switches["debug_script"] is True:
        script_args.append("--debug")

    cmd = [source_ctapipe, "&&", "./" + execute]
    cmd += script_args

    pilot_args_write = " ".join(cmd)

    # For table merging if multiple runs
    pilot_args_merge = " ".join([
        source_ctapipe,
        "&&",
        "./merge_tables.py",
        "--template_file_name",
        "{in_name}",
        "--outfile",
        "{out_name}",
    ])

    prod3b_filelist = dict()
    if estimate_energy is False and switches["output_type"] in "TRAINING":
        prod3b_filelist["gamma"] = cfg["EnergyRegressor"]["gamma_list"]
    elif estimate_energy is True and switches["output_type"] in "TRAINING":
        prod3b_filelist["gamma"] = cfg["GammaHadronClassifier"]["gamma_list"]
        prod3b_filelist["proton"] = cfg["GammaHadronClassifier"]["proton_list"]
    elif switches["output_type"] in "DL2":
        prod3b_filelist["gamma"] = cfg["Performance"]["gamma_list"]
        prod3b_filelist["proton"] = cfg["Performance"]["proton_list"]
        prod3b_filelist["electron"] = cfg["Performance"]["electron_list"]

    # from IPython import embed
    # embed()

    # Split list of files according to stoprage elements
    with open(prod3b_filelist[particle]) as f:
        filelist = f.readlines()

    filelist = ["{}".format(_.replace("\n", "")) for _ in filelist]
    res = dirac.splitInputData(filelist, n_file_per_job)
    list_run_to_loop_on = res["Value"]

    # define a template name for the file that's going to be written out.
    # the placeholder braces are going to get set during the file-loop
    output_filename = output_filename_template
    output_path = outdir
    if estimate_energy is False and switches["output_type"] in "TRAINING":
        output_path += "/{}/".format(training_dir_energy)
        step = "energy"
    if estimate_energy is True and switches["output_type"] in "TRAINING":
        output_path += "/{}/".format(training_dir_classification)
        step = "classification"
    if switches["output_type"] in "DL2":
        if force_tailcut_for_extended_cleaning is False:
            output_path += "/{}/".format(dl2_dir)
        else:
            output_path += "/{}_force_tc_extended_cleaning/".format(dl2_dir)
        step = ""
    output_filename += "_{}.h5"

    # sets all the local files that are going to be uploaded with the job
    # plus the pickled classifier
    # if file name starts with `LFN:`, it will be copied from the GRID
    input_sandbox = [
        # Utility to assign one job to one command...
        os.path.expandvars("$GRID/pilot.sh"),
        os.path.expandvars("$PROTOPIPE/protopipe/"),
        os.path.expandvars("$GRID/merge_tables.py"),
        # python wrapper for the mr_filter wavelet cleaning
        # os.path.expandvars("$PYWI/pywi/"),
        # os.path.expandvars("$PYWICTA/pywicta/"),
        # script that is being run
        os.path.expandvars("$PROTOPIPE/protopipe/scripts/" + execute),
        # Configuration file
        os.path.expandvars(os.path.join(config_path, config_file)),
    ]

    models_to_upload = []
    configs_to_upload = []
    if estimate_energy is True and switches["output_type"] in "TRAINING":
        config_path_template = "LFN:" + os.path.join(home_grid, outdir,
                                                     model_dir, "{}.yaml")
        config_to_upload = config_path_template.format(regressor_method)
        model_path_template = "LFN:" + os.path.join(
            home_grid, outdir, model_dir, "regressor_{}_{}.pkl.gz")
        for cam_id in cam_id_list:

            model_to_upload = model_path_template.format(
                cam_id, regressor_method)  # TBC
            print("The following model(s) will be uploaded to the GRID:")
            print(model_to_upload)
            models_to_upload.append(model_to_upload)

        print(
            "The following configs(s) for such models will be uploaded to the GRID:"
        )
        print(config_to_upload)
        configs_to_upload.append(config_to_upload)
        # input_sandbox.append(model_to_upload)
    elif estimate_energy is False and switches["output_type"] in "TRAINING":
        pass
    else:  # Charge also classifer for DL2
        model_type_list = ["regressor", "classifier"]
        model_method_list = [regressor_method, classifier_method]
        config_path_template = "LFN:" + os.path.join(home_grid, outdir,
                                                     model_dir, "{}.yaml")
        model_path_template = "LFN:" + os.path.join(
            home_grid, outdir, model_dir, "{}_{}_{}.pkl.gz")
        if force_tailcut_for_extended_cleaning is True:
            force_mode = mode.replace("wave", "tail")
            print("################")
            print(force_mode)
        else:
            force_mode = mode

        for idx, model_type in enumerate(model_type_list):

            print(
                "The following configuration file will be uploaded to the GRID:"
            )

            config_to_upload = config_path_template.format(
                model_method_list[idx])
            print(config_to_upload)
            configs_to_upload.append(config_to_upload)  # upload only 1 copy

            print(
                "The following model(s) related to such configuration file will be uploaded to the GRID:"
            )

            for cam_id in cam_id_list:

                if model_type in "regressor" and use_regressor is False:
                    print("Do not upload regressor model on GRID!!!")
                    continue

                if model_type in "classifier" and use_classifier is False:
                    print("Do not upload classifier model on GRID!!!")
                    continue

                model_to_upload = model_path_template.format(
                    model_type_list[idx], cam_id, model_method_list[idx])
                print(model_to_upload)

                models_to_upload.append(model_to_upload)
                # input_sandbox.append(model_to_upload)

    # summary before submitting
    print("\nDEBUG> running as:")
    print(pilot_args_write)
    print("\nDEBUG> with input_sandbox:")
    print(input_sandbox)
    print("\nDEBUG> with output file:")
    print(output_filename.format("{job_name}"))
    print("\nDEBUG> Particles:")
    print(particle)
    print("\nDEBUG> Energy estimation:")
    print(estimate_energy)

    # ########  ##     ## ##    ## ##    ## #### ##    ##  ######
    # ##     ## ##     ## ###   ## ###   ##  ##  ###   ## ##    ##
    # ##     ## ##     ## ####  ## ####  ##  ##  ####  ## ##
    # ########  ##     ## ## ## ## ## ## ##  ##  ## ## ## ##   ####
    # ##   ##   ##     ## ##  #### ##  ####  ##  ##  #### ##    ##
    # ##    ##  ##     ## ##   ### ##   ###  ##  ##   ### ##    ##
    # ##     ##  #######  ##    ## ##    ## #### ##    ##  ######

    # list of files on the GRID SE space
    # not submitting jobs where we already have the output
    batcmd = "dirac-dms-user-lfns --BaseDir {}".format(
        os.path.join(home_grid, output_path))
    result = subprocess.check_output(batcmd, shell=True)
    try:
        grid_filelist = open(result.split()[-1]).read()
    except IOError:
        raise IOError("ERROR> cannot read GRID filelist...")

    # get jobs from today and yesterday...
    days = []
    for i in range(2):  # how many days do you want to look back?
        days.append(
            (datetime.date.today() - datetime.timedelta(days=i)).isoformat())

    # get list of run_tokens that are currently running / waiting
    running_ids = set()
    running_names = []
    for status in ["Waiting", "Running", "Checking"]:
        for day in days:
            try:
                [
                    running_ids.add(id) for id in dirac.selectJobs(
                        status=status, date=day, owner=user_name)["Value"]
                ]
            except KeyError:
                pass

    n_jobs = len(running_ids)
    if n_jobs > 0:
        print("Scanning {} running/waiting jobs... please wait...".format(
            n_jobs))
        for i, id in enumerate(running_ids):
            if ((100 * i) / n_jobs) % 5 == 0:
                print("\r{} %".format(((20 * i) / n_jobs) * 5)),
            jobname = dirac.getJobAttributes(id)["Value"]["JobName"]
            running_names.append(jobname)
        else:
            print("\n... done")

    for bunch in list_run_to_loop_on:

        # for bunch in bunches_of_run:

        # from IPython import embed
        # embed()

        # this selects the `runxxx` part of the first and last file in the run
        # list and joins them with a dash so that we get a nice identifier in
        # the output file name.
        # if there is only one file in the list, use only that one
        # run_token = re.split('_', bunch[+0])[3]  # JLK JLK
        run_token = re.split("_", bunch[0])[3]
        if len(bunch) > 1:
            run_token = "-".join([run_token, re.split("_", bunch[-1])[3]])

        print("-" * 50)
        print("-" * 50)

        # setting output name
        output_filenames = dict()
        if switches["output_type"] in "DL2":
            job_name = "protopipe_{}_{}_{}_{}_{}".format(
                config_name, switches["output_type"], particle, run_token,
                mode)
            output_filenames[mode] = output_filename.format("_".join(
                [particle, mode, run_token]))
        else:
            job_name = "protopipe_{}_{}_{}_{}_{}_{}".format(
                config_name, switches["output_type"], step, particle,
                run_token, mode)
            output_filenames[mode] = output_filename.format("_".join(
                [step, particle, mode, run_token]))

        # if job already running / waiting, skip
        if job_name in running_names:
            print("\n WARNING> {} still running\n".format(job_name))
            continue

        print("Output file name: {}".format(output_filenames[mode]))

        # if file already in GRID storage, skip
        # (you cannot overwrite it there, delete it and resubmit)
        # (assumes tail and wave will always be written out together)
        already_exist = False
        file_on_grid = os.path.join(output_path, output_filenames[mode])
        print("DEBUG> check for existing file on GRID...")
        if file_on_grid in grid_filelist:
            print("\n WARNING> {} already on GRID SE\n".format(job_name))
            continue

        if n_jobs_max == 0:
            print("WARNING> maximum number of jobs to submit reached")
            print("WARNING> breaking loop now")
            break
        else:
            n_jobs_max -= 1

        j = Job()

        # runtime in seconds times 8 (CPU normalisation factor)
        j.setCPUTime(6 * 3600 * 8)
        j.setName(job_name)
        j.setInputSandbox(input_sandbox)

        if banned_sites:
            j.setBannedSites(banned_sites)

        # Add simtel files as input data
        j.setInputData(bunch)

        for run_file in bunch:
            file_token = re.split("_", run_file)[3]

            # wait for a random number of seconds (up to five minutes) before
            # starting to add a bit more entropy in the starting times of the
            # dirac queries.
            # if too many jobs try in parallel to access the SEs,
            # the interface crashes
            # #sleep = random.randint(0, 20)  # seconds
            # #j.setExecutable('sleep', str(sleep))

            # JLK: Try to stop doing that
            # consecutively downloads the data files, processes them,
            # deletes the input
            # and goes on to the next input file;
            # afterwards, the output files are merged
            # j.setExecutable('dirac-dms-get-file', "LFN:" + run_file)

            # source the miniconda ctapipe environment and
            # run the python script with all its arguments
            if switches["output_type"] in "DL2":
                output_filename_temp = output_filename.format("_".join(
                    [particle, mode, file_token]))
            if switches["output_type"] in "TRAINING":
                output_filename_temp = output_filename.format("_".join(
                    [step, particle, mode, file_token]))
            j.setExecutable(
                "./pilot.sh",
                pilot_args_write.format(
                    outfile=output_filename_temp,
                    infile_name=os.path.basename(run_file),
                    mode=mode,
                ),
            )

            # remove the current file to clear space
            j.setExecutable("rm", os.path.basename(run_file))

        # simple `ls` for good measure
        j.setExecutable("ls", "-lh")

        # if there is more than one file per job, merge the output tables
        if len(bunch) > 1:
            names = []

            names.append(("*_{}_".format(particle), output_filenames[mode]))

            for in_name, out_name in names:
                print("in_name: {}, out_name: {}".format(in_name, out_name))
                j.setExecutable(
                    "./pilot.sh",
                    pilot_args_merge.format(in_name=in_name,
                                            out_name=out_name),
                )

                print("DEBUG> args append: {}".format(
                    pilot_args_merge.format(in_name=in_name,
                                            out_name=out_name)))

        bunch.extend(models_to_upload)
        bunch.extend(configs_to_upload)
        j.setInputData(bunch)

        print("Input data set to job = {}".format(bunch))

        outputs = []
        outputs.append(output_filenames[mode])
        print("Output file path: {}{}".format(output_path,
                                              output_filenames[mode]))

        j.setOutputData(outputs, outputSE=None, outputPath=output_path)

        # check if we should somehow stop doing what we are doing
        if switches["dry"] is True:
            print("\nThis is a DRY RUN! -- NO job has been submitted!")
            print("Name of the job: {}".format(job_name))
            print("Name of the output file: {}".format(outputs))
            print("Output path from GRID home: {}".format(output_path))
            break

        # this sends the job to the GRID and uploads all the
        # files into the input sandbox in the process
        print("\nSUBMITTING job with the following INPUT SANDBOX:")
        print(input_sandbox)
        print("Submission RESULT: {}\n".format(dirac.submitJob(j)["Value"]))

        # break if this is only a test submission
        if switches["test"] is True:
            print("This is a TEST RUN! -- Only ONE job will be submitted!")
            print("Name of the job: {}".format(job_name))
            print("Name of the output file: {}".format(outputs))
            print("Output path from GRID home: {}".format(output_path))
            break

        # since there are two nested loops, need to break again
        if switches["test"] is True:
            break

    try:
        os.remove("datapipe.tar.gz")
        os.remove("modules.tar.gz")
    except:
        pass

    # Upload analysis configuration file for provenance

    SE_LIST = ['CC-IN2P3-USER', 'DESY-ZN-USER', 'CNAF-USER', 'CEA-USER']
    analysis_config_local = os.path.join(config_path, config_file)
    # the configuration file is uploaded to the data directory because
    # the training samples (as well as their cleaning settings) are independent
    analysis_config_dirac = os.path.join(home_grid, output_path, config_file)
    print("Uploading {} to {}...".format(analysis_config_local,
                                         analysis_config_dirac))

    if switches["dry"] is False:
        # Upload this file to all Dirac Storage Elements in SE_LIST
        for se in SE_LIST:
            # the uploaded config file overwrites any old copy
            ana_cfg_upload_cmd = "dirac-dms-add-file -f {} {} {}".format(
                analysis_config_dirac, analysis_config_local, se)
            ana_cfg_upload_result = subprocess.check_output(ana_cfg_upload_cmd,
                                                            shell=True)
            print(ana_cfg_upload_result)
    else:
        print("This is a DRY RUN! -- analysis.yaml has NOT been uploaded.")

    print("\nall done -- exiting now")
    exit()

コード例 #16

0

ファイルを表示

ファイル: process_frames.py プロジェクト: gridpp/dirac-getting-started

    # Set the name of the job (viewable in the web portal).
    j.setName(jobname)

    #
    j.setExecutable('/bin/sh', arguments='%s %s %s' % ('run.sh', '/cvmfs/cernatschool.gridpp.ac.uk/grid-klustering-001-00-07/', 'process-frames.py'))

    #
    j.setInputSandbox(inputfiles)

    #...and added to the desired storage element with the corresponding
    # LFN via the job's OutputData. You may wish to change:
    # * The Storage Element - by changing the outputSE parameter;
    # * The LFN base name   - by changing the outputPath parameter.
    j.setOutputData(kluster_file_names, \
                    outputSE='%s' % (se), \
                    outputPath='/%s/' % (gridoutdir)\
                   )

    # These are the files retrieved with the local job output.
    j.setOutputSandbox(['StdOut', 'StdErr', 'klusters.json', 'log_process_frames.log'])

    # You can set your preferred site here.
    j.setDestination(sitename)

    ## The DIRAC instance.
    dirac = Dirac()

#    # Submit the job and print the result.
#    result = dirac.submit(j)
#    print 'Submission result: ', result

コード例 #17

0

ファイルを表示

ファイル: compile_mrfilter_submit.py プロジェクト: thomasarmstrong/tino_cta

from DIRAC.Core.Base import Script
Script.parseCommandLine()
from DIRAC.Interfaces.API.Dirac import Dirac
from DIRAC.Interfaces.API.Job import Job


dirac = Dirac()


j = Job()
j.setName("compile_mrfilter")
j.setCPUTime(80)
j.setInputSandbox(["dirac_compile_mrfilter_pilot.sh"])
j.setExecutable("dirac_compile_mrfilter_pilot.sh", "")
j.setOutputData(["mr_filter"], outputSE=None,
                outputPath="cta/bin/mr_filter/v3_1/")
Dirac().submit(j)

コード例 #18

0

ファイルを表示

                #for in_name, out_name in [('classified_events_wave', output_filename_wave),
                #                          ('classified_events_tail', output_filename_tail)]:
                j.setExecutable(
                    './pilot.sh',
                    pilot_args_append.format(in_name=in_name,
                                             out_name=out_name))

        outputs = []
        for mode in modes:
            outputs.append(output_filenames[mode])
            print("OutputData: {}{}".format(output_path,
                                            output_filenames[mode]))
        #print("OutputData: {}{}".format(output_path, output_filename_wave))
        #print("OutputData: {}{}".format(output_path, output_filename_tail))
        #j.setOutputData([output_filename_wave, output_filename_tail],
        j.setOutputData(outputs, outputSE=None, outputPath=output_path)

        # check if we should somehow stop doing what we are doing
        if "dry" in sys.argv:
            print("\nrunning dry -- not submitting")
            break

        # this sends the job to the GRID and uploads all the
        # files into the input sandbox in the process
        print("\nsubmitting job")
        print('Submission Result: {}\n'.format(dirac.submit(j)['Value']))

        # break if this is only a test submission
        if "test" in sys.argv:
            print("test run -- only submitting one job")
            break

コード例 #19

0

ファイルを表示

ファイル: dirac.job_submit.py プロジェクト: adriansev/auger-dirac

    ## prepare the output location in GRID storage; the input path will be the used also for GRID storage
    # outdir = grid_basedir_output + PROD_NAME + "/" + str(e_min) + "_" + str(e_max) + "/" + str(theta_min) + "_" + str(theta_max) + "/" + str(prmpar) + "/" + str(runnr)
    # outdir = "/" + PROD_NAME + "/" + str(e_min) + "_" + str(e_max) + "/" + str(theta_min) + "_" + str(theta_max) + "/" + str(prmpar) + "/" + str(runnr)
    outdir = "/" + PROD_NAME + "/" + str(e_min) + "/" + str(theta_min) + "/" + str(prmpar) + "/" + str(runnr)

    print 'SE = ',se

    lfns_list = []
    if ( group == "auger_prod"  ):
        base_output_path = prod_path
        ## add base directory to each file to have a list of lfns
        for f in output_files:
            lfn = "LFN:" + base_output_path + outdir + "/" + f
            lfns_list.append(lfn)

        j.setOutputData(lfns_list, outputSE=se)
        print 'Output - list of lfns :'
        pp.pprint (lfns_list)
    else:
##        base_output_path = PWD
        j.setOutputData(output_files, outputSE=se, outputPath=outdir)
        print 'Output files = ', output_files
        print 'outputPath = ', outdir

#####################
##   PREPARE JOB   ##
#####################
    if (DO_NOT_SUBMIT):
        sys.exit(os.EX_USAGE)

    ### ALWAYS, INFO, VERBOSE, WARN, DEBUG

コード例 #20

0

ファイルを表示

ファイル: dirac.job_submit.py プロジェクト: pigay/auger-dirac

    j.setCPUTime(345600) ## 4 days

    ## download the script for preparing corsika input file for usage with cvmfs
    j.setExecutable( 'curl', arguments = ' -fsSLkO http://issaf.spacescience.ro/adrian/AUGER/make_run4cvmfs',logFile='cmd_logs.log')
    j.setExecutable( 'chmod', arguments = ' +x make_run4cvmfs',logFile='cmd_logs.log')

    ## create the simulation script configured for use with cvmfs
    ## set the make_run4cvmfs arguments to include the corsika_version and corsika_bin
    make_run4cvmfs_arg = input_file_base + " " + corsika_version + " " + corsika_bin
    j.setExecutable( './make_run4cvmfs', arguments = make_run4cvmfs_arg, logFile='cmd_logs.log')

    ## run simulation
    j.setExecutable( './execsim',logFile='cmd_logs.log')

    j.setOutputSandbox(output_files)
    j.setOutputData(output_files, outputSE=se, outputPath=outdir)

    ##j.runLocal()  ## test local

    jobID = dirac.submit(j)
    print 'Submission Result: ',jobID

    id = str(jobID) + "\n"

    with open('jobids.list', 'a') as f_id_log:
        f_id_log.write(jobID.Value + '\n')

コード例 #21

0

ファイルを表示

ファイル: submit_site_test_jobs.py プロジェクト: LSSTDESC/DC2_grid_scripts

    args = visit + ' ' + insidename + ' ' + str(startsensor) + ' ' + str(
        numsensors) + ' ' + str(idx)
    outputname = 'fits_' + visit + '_' + str(idx) + '.tar'

    j.setCPUTime(1209600)
    j.setExecutable('runimsim2.1.sh', arguments=args)
    j.stderr = "std.err"
    j.stdout = "std.out"
    #!!! May need the 2.1i directory here depending on visit number !!!
    j.setInputSandbox([
        "runimsim2.1.sh", "run_imsim_nersc.py",
        "LFN:/lsst/user/j/james.perry/instcats/2.1i/" + instcatname
    ])
    j.setOutputSandbox(["std.out", "std.err"])
    j.setTag(["8Processors"])
    #j.setOutputData([visit + "/" + outputname], outputPath="", outputSE=["IN2P3-CC-disk"])
    j.setOutputData([visit + "/" + outputname],
                    outputPath="",
                    outputSE=["UKI-NORTHGRID-LANCS-HEP-disk"])
    j.setPlatform("AnyPlatform")

    j.setDestination(site)

    jobID = dirac.submitJob(j)
    print("Submitted job to " + site + " as ID " + str(jobID))
    print "Status is:", dirac.status(jobID['JobID'])

    joblistfile.write(str(jobID['JobID']) + '\n')

joblistfile.close()

コード例 #22

0

ファイルを表示

ファイル: upload_frames.py プロジェクト: gridpp/dirac-getting-started

    # As we're just copying the input sandbox to the storage element
    # via OutputData, we'll just list the files as a check for the
    # output written to StdOut.
    j.setExecutable('/bin/ls -l')

    # Here we add the names of the temporary copies of the frame data
    # files in the dataset to the input sandbox. These will be uploaded
    # to the grid with the job...
    j.setInputSandbox(file_dict.keys())

    #...and added to the desired storage element with the corresponding
    # LFN via the job's OutputData. You may wish to change:
    # * The Storage Element - by changing the outputSE parameter;
    # * The LFN base name   - by changing the outputPath parameter.
    j.setOutputData(file_dict.keys(), \
                    outputSE='%s' % (se), \
                    outputPath='/%s/' % (gridoutdir)\
                   )

    # These are the files retrieved with the local job output.
    j.setOutputSandbox(['StdOut', 'StdErr'])

    # You can set your preferred site here.
    j.setDestination(sitename)

    ## The DIRAC instance.
    dirac = Dirac()

#    # Submit the job and print the result.
#    result = dirac.submit(j)
#    print 'Submission result: ', result