Python DAX3.Job 예제들, Pegasus.DAX3.Job Python 예제들

예제 #1

0

파일 보기

파일: wf_mvm.py 프로젝트: lsst-dm/s20-hsc-pdr2-reprocessing

def generateDax(name="mvm", inputData=None):
    """Generate a Pegasus DAX abstract workflow"""
    dax = peg.ADAG(name)
    taskname = "matchedVisitMetrics"
    arguments = " --doraise --config instrumentName='HSC' datasetName='HSC-PDR2' " \
                "doApplyExternalPhotoCalib=True doApplyExternalSkyWcs=True externalPhotoCalibName=fgcm "

    with open(inputData, 'r') as f:
        for line in f:
            filt, tract, visits = line.strip().split(' ')
            outNonRepoPath = os.path.join(outPath, tract, filt)

            logger.debug("add job of dataId: %s %s %s to %s", filt, tract,
                         visits, outNonRepoPath)
            task = peg.Job(name=taskname)
            task.addArguments(
                inputRepo, "--output", outNonRepoPath, arguments,
                "--id ccd=0..8^10..103 tract=%s visit=%s" % (tract, visits))
            dax.addJob(task)

            logfile = peg.File("%s-%s-%s.log" % (taskname, tract, filt))
            dax.addFile(logfile)
            task.setStdout(logfile)
            task.setStderr(logfile)
            task.uses(logfile, link=peg.Link.OUTPUT)

    return dax

예제 #2

0

파일 보기

파일: workflow.py 프로젝트: ADACS-Australia/ADACS-SS18A-RSmith

 def __init__(self, executable):
     self.in_workflow = False
     self.executable = executable
     self._inputs = []
     self._outputs = []
     self._dax_node = dax.Job(name=executable.logical_name,
                              version=executable.version,
                              namespace=executable.namespace)
     self._args = []
     self._options = []

예제 #3

0

파일 보기

 def make(self, task_name, dataId=None, options=None, repo=None):
     job = DAX3.Job(task_name)
     if repo is None:
         repo = self.repo
     args = [repo]
     args = self._add_dataId(args, dataId)
     args = self._add_options(args, options)
     configfile = os.path.join(self.config_dir, '%s-config.py' % task_name)
     args.extend(['--configfile', configfile])
     job.addArguments(*args)
     self.dax.addJob(job)
     if self.bin_dir is not None and self.tc is not None:
         self._update_tc_file(task_name)
     return job

예제 #4

0

파일 보기

 def __init__(self, executable):
     self.in_workflow = False
     self.executable = executable
     self._inputs = []
     self._outputs = []
     self._dax_node = dax.Job(name=executable.logical_name,
                              version=executable.version,
                              namespace=executable.namespace)
     self._args = []
     # Each value in _options is added separated with whitespace
     # so ['--option','value'] --> "--option value"
     self._options = []
     # For _raw_options *NO* whitespace is added.
     # so ['--option','value'] --> "--optionvalue"
     # and ['--option',' ','value'] --> "--option value"
     self._raw_options = []

예제 #5

0

파일 보기

def main():
    args = parse_args()
    setup_logger(args.debug)

    # TODO: handle execeptions for bad file paths
    workflow_file_path = args.cwl_workflow_file_path
    workflow_file_dir = os.path.dirname(workflow_file_path)

    log.info("Loading {}".format(workflow_file_path))
    workflow = cwl.load_document(workflow_file_path)

    adag = dax.ADAG("dag-generated-from-cwl", auto=True)
    rc = ReplicaCatalog()
    tc = TransformationCatalog(workflow_file_dir)

    # process initial input file(s)
    # TODO: need to account for the different fields for a file class
    # TODO: log warning for the fields that we are skipping
    workflow_input_strings = dict()
    workflow_files = dict()

    log.info("Collecting inputs in {}".format(args.input_file_spec_path))
    with open(args.input_file_spec_path, "r") as yaml_file:
        input_file_specs = load(yaml_file, Loader=Loader)

        for input in workflow.inputs:
            input_type = input.type

            if input_type == "File":
                workflow_files[get_basename(input.id)] = get_basename(input.id)
                # TODO: account for non-local sites
                rc.add_item(get_basename(input.id),
                            input_file_specs[get_basename(input.id)]["path"],
                            "local")
            elif input_type == "string":
                workflow_input_strings[get_basename(input.id)] = \
                                        input_file_specs[get_basename(input.id)]
            elif isinstance(input_type, cwl.InputArraySchema):
                if input_type.items == "File":
                    # TODO: account for workflow inputs of type File[]
                    pass
                elif input_type.items == "string":
                    workflow_input_strings[get_basename(input.id)] = \
                                        input_file_specs[get_basename(input.id)]

    log.info("Collecting output files")
    for step in workflow.steps:
        cwl_command_line_tool = cwl.load_document(step.run) if isinstance(step.run, str) \
                                                                    else step.run

        for output in cwl_command_line_tool.outputs:
            # TODO: account for outputs that are not files
            output_name = get_name(step.id, output.id)

            log.debug("Adding (key: {0}, value: {1}) to workflow_files".format(
                output_name, output.outputBinding.glob))

            # TODO: throw error when glob contains javascript expression
            #       or pattern as we cannot support anything that is dynamic
            workflow_files[output_name] = output.outputBinding.glob

    log.info("Building workflow steps into dax jobs")
    for step in workflow.steps:
        # convert cwl:CommandLineTool -> pegasus:Executable
        cwl_command_line_tool = cwl.load_document(step.run) if isinstance(step.run, str) \
                                                                    else step.run

        executable_name = os.path.basename(cwl_command_line_tool.baseCommand) if \
            os.path.isabs(cwl_command_line_tool.baseCommand) else cwl_command_line_tool.baseCommand

        dax_executable = dax.Executable(executable_name)

        # add executable to transformation catalog
        tc.add_item(executable_name, cwl_command_line_tool.baseCommand)

        # create job with executable
        dax_job = dax.Job(dax_executable)

        step_inputs = dict()
        for input in step.in_:
            input_id = get_basename(input.id)
            if isinstance(input.source, str):
                step_inputs[input_id] = get_basename(input.source)
            elif isinstance(input.source, list):
                step_inputs[input_id] = [
                    get_basename(file) for file in input.source
                ]

        # add input uses to job
        for input in cwl_command_line_tool.inputs:
            if input.type == "File":
                file_id = step_inputs[get_name(step.id, input.id)]
                file = dax.File(workflow_files[file_id])
                log.debug("Adding link ({0} -> {1})".format(
                    file_id, dax_job.name))

                dax_job.uses(file, link=dax.Link.INPUT)

            # TODO: better type checking for string[] and File[] ?
            elif isinstance(input.type, cwl.CommandInputArraySchema):
                if input.type.items == "File":
                    file_ids = step_inputs[get_name(step.id, input.id)]
                    for file_id in file_ids:
                        file = dax.File(workflow_files[file_id])
                        log.debug("Adding link ({0} -> {1})".format(
                            file_id, dax_job.name))

                        dax_job.uses(file, link=dax.Link.INPUT)

        # add output uses to job
        # TODO: ensure that these are of type File or File[]
        for output in step.out:
            file_id = get_basename(output)
            file = dax.File(workflow_files[file_id])
            log.debug("Adding link ({0} -> {1})".format(dax_job.name, file_id))

            dax_job.uses(file,
                         link=dax.Link.OUTPUT,
                         transfer=True,
                         register=True)

        # add arguments to job
        # TODO: place argument building up in a function
        dax_job_args = cwl_command_line_tool.arguments if \
            cwl_command_line_tool.arguments is not None else []

        # process cwl inputBindings if they exist and build up job argument list
        cwl_command_line_tool_inputs = sorted(cwl_command_line_tool.inputs,
            key=lambda input : input.inputBinding.position if input.inputBinding.position \
                is not None else 0 )

        for input in cwl_command_line_tool_inputs:
            # process args
            if input.inputBinding is not None:
                # TODO: account for inputBinding separation
                if input.inputBinding.prefix is not None:
                    dax_job_args.append(input.inputBinding.prefix)

                if input.type == "File":
                    dax_job_args.append(
                        dax.File(workflow_files[step_inputs[get_name(
                            step.id, input.id)]]))

                if input.type == "string":
                    dax_job_args.append(
                        workflow_input_strings[step_inputs[get_name(
                            step.id, input.id)]])

                # handle array type inputs
                if isinstance(input.type, cwl.CommandInputArraySchema):
                    if input.type.items == "File":
                        for file in step_inputs[get_name(step.id, input.id)]:
                            dax_job_args.append(dax.File(workflow_files[file]))
                    elif input.type.items == "string":
                        input_string_arr_id = step_inputs[get_name(
                            step.id, input.id)]

                        separator = " " if input.inputBinding.itemSeparator is None \
                                        else input.inputBinding.itemSeparator

                        dax_job_args.append(
                            # TODO: currently only accounting for input strings that
                            #       are inputs to the entire workflow
                            separator.join(
                                workflow_input_strings[input_string_arr_id]))

        log.debug("Adding job: {0}, with args: {1}".format(
            dax_job.name, dax_job_args))
        dax_job.addArguments(*dax_job_args)

        # add job to DAG
        adag.addJob(dax_job)

    rc.write_catalog("rc.txt")
    tc.write_catalog("tc.txt")

    with open(args.output_file_path, "w") as f:
        log.info("Writing DAX to {}".format(args.output_file_path))
        adag.writeXML(f)

예제 #6

0

파일 보기

import desc.imsim_deep_pipeline as idp

USER = pwd.getpwuid(os.getuid())[0]

# Create a abstract dag
dax = DAX3.ADAG("imsim_pipeline")

# Add some workflow-level metadata
dax.metadata("creator", "%s@%s" % (USER, os.uname()[1]))
dax.metadata("created", time.ctime())

dither_info_file = 'dither_info.pkl'
sensor_lists = idp.SensorLists(dither_info_file)

for visit, visit_info in sensor_lists.visits:
    band = visit_info.band
    for sensor_id in visit_info.sensor_ids:
        make_instcat = DAX3.Job('make_instcat')
        make_instcat.addArguments(visit, sensor_id)
        instcat = DAX3.File('instcat_%(visit)s_%(sensor_id)s.txt' % locals())
        make_instcat.uses(instcat, link=DAX3.Link.OUTPUT, transfer=True,
                          register=True)
        dax.addJob(make_instcat)

        run_imsim = DAX3.Job('run_imsim')
        run_imsim.uses(instcat, link=DAX3.Link.INPUT)
        dax.addJob(run_imsim)
        dax.depends(run_imsim, make_instcat)
        eimage = dax.File('lsst_e_%(visit)s_%(sensor_id)s_%(band)s.fits'
                          % locals())

예제 #7

0

파일 보기

파일: poc.py 프로젝트: lsst-dm/qserv-ingest-hsc-poc

def generateDax(name="object", inputData=None):
    """Generate a Pegasus DAX abstract workflow"""
    dax = peg.ADAG(name)

    # These config-ish files are expected in the input/ folder
    schemaAbh = peg.File("schema.abh")
    dax.addFile(schemaAbh)
    sedScript = peg.File("fixCsv.sed")
    dax.addFile(sedScript)
    partCfg = peg.File("partition.json")
    dax.addFile(partCfg)
    catYaml = peg.File("hsc.yaml")
    dax.addFile(catYaml)

    # (Ab)using the shared filesystem....!!!
    chunkBaseFolder = os.path.join("/project", "hchiang2", "qserv", "qqpoc")
    if not os.path.isdir(chunkBaseFolder):
        logging.warning("Chunk file base folder %s invalid", chunkBaseFolder)

    # Create a new database and the Object table in Qserv
    task0a = peg.Job(name="replctl-register")
    task0a.addProfile(
        peg.Profile(peg.Namespace.CONDOR, "request_memory", "2GB"))
    task0a.addArguments("http://lsst-qserv-master03:25080", str(database),
                        "--felis", catYaml, "-v")
    dax.addJob(task0a)
    logfile = peg.File("qingest-a.log")
    dax.addFile(logfile)
    task0a.setStdout(logfile)
    task0a.setStderr(logfile)
    task0a.uses(logfile, link=peg.Link.OUTPUT)
    task0a.uses(catYaml, link=peg.Link.INPUT)

    # Start a super-transaction
    # Need to get the super transaction id from the log file
    task0c = peg.Job(name="replctl-trans")
    task0c.addProfile(
        peg.Profile(peg.Namespace.CONDOR, "request_memory", "2GB"))
    task0c.addArguments("http://lsst-qserv-master03:25080", str(database),
                        "--start")
    dax.addJob(task0c)
    transIdFile = peg.File("qingest-c.log")
    dax.addFile(transIdFile)
    task0c.setStdout(transIdFile)
    task0c.setStderr(transIdFile)
    task0c.uses(transIdFile, link=peg.Link.OUTPUT)
    dax.depends(parent=task0a, child=task0c)

    # Commit a super-transaction
    task0d = peg.Job(name="replctl-trans")
    task0d.addProfile(
        peg.Profile(peg.Namespace.CONDOR, "request_memory", "2GB"))
    task0d.addArguments("http://lsst-qserv-master03:25080", str(database),
                        "-a")
    dax.addJob(task0d)
    logfile = peg.File("qingest-d.log")
    dax.addFile(logfile)
    task0d.setStdout(logfile)
    task0d.setStderr(logfile)
    task0d.uses(logfile, link=peg.Link.OUTPUT)

    i = 0
    with open(inputData, 'r') as f:
        for line in f:
            inparq = line.strip()
            i += 1
            logging.debug('Add file %d: %s', i, inparq)

            taskname = 'hackType'
            task1 = peg.Job(name=taskname)
            task1.addProfile(
                peg.Profile(peg.Namespace.CONDOR, "request_memory", "20GB"))
            outparq = peg.File("hack-%d.parq" % i)
            dax.addFile(outparq)
            task1.addArguments("-i", inparq, "-o", outparq)
            dax.addJob(task1)
            logfile = peg.File("%s-%s.log" % (
                taskname,
                i,
            ))
            dax.addFile(logfile)
            task1.setStdout(logfile)
            task1.setStderr(logfile)
            task1.uses(logfile, link=peg.Link.OUTPUT)
            task1.uses(outparq, link=peg.Link.OUTPUT)

            taskname = 'pq2csv'
            task2 = peg.Job(name=taskname)
            task2.addProfile(
                peg.Profile(peg.Namespace.CONDOR, "request_memory", "20GB"))
            outcsv = peg.File("csv-%d.csv" % i)
            dax.addFile(outcsv)
            task2.addArguments("--schema", schemaAbh, "--verbose", outparq,
                               outcsv)
            dax.addJob(task2)
            logfile = peg.File("%s-%s.log" % (
                taskname,
                i,
            ))
            dax.addFile(logfile)
            task2.setStdout(logfile)
            task2.setStderr(logfile)
            task2.uses(logfile, link=peg.Link.OUTPUT)
            task2.uses(schemaAbh, link=peg.Link.INPUT)
            task2.uses(outparq, link=peg.Link.INPUT)
            task2.uses(outcsv, link=peg.Link.OUTPUT)
            dax.depends(parent=task1, child=task2)

            taskname = 'sed'
            task3 = peg.Job(name=taskname)
            task3.addProfile(
                peg.Profile(peg.Namespace.CONDOR, "request_memory", "2GB"))
            task3.addArguments("-f", sedScript, outcsv)
            dax.addJob(task3)
            logfile = peg.File("%s-%s.log" % (
                taskname,
                i,
            ))
            newcsv = peg.File("new-%s.csv" % (i, ))
            dax.addFile(logfile)
            task3.setStdout(newcsv)
            task3.setStderr(logfile)
            task3.uses(logfile, link=peg.Link.OUTPUT)
            task3.uses(newcsv, link=peg.Link.OUTPUT)
            task3.uses(outcsv, link=peg.Link.INPUT)
            task3.uses(sedScript, link=peg.Link.INPUT)
            dax.depends(parent=task2, child=task3)

            # My input csv files are larger than 1GB each and I am not splitting them for now
            taskname = 'partition'
            task4 = peg.Job(name=taskname)
            task4.addProfile(
                peg.Profile(peg.Namespace.CONDOR, "request_memory", "15GB"))
            outdir = os.path.join(chunkBaseFolder, 'chunksSet' + str(i))
            task4.addArguments("--verbose", "-c", partCfg, "--in.path", newcsv,
                               "--out.dir", outdir)
            dax.addJob(task4)
            logfile = peg.File("%s-%s.log" % (
                taskname,
                i,
            ))
            dax.addFile(logfile)
            task4.setStdout(logfile)
            task4.setStderr(logfile)
            task4.uses(logfile, link=peg.Link.OUTPUT)
            task4.uses(newcsv, link=peg.Link.INPUT)
            task4.uses(partCfg, link=peg.Link.INPUT)
            dax.depends(parent=task3, child=task4)

            # Look for chunk files in the output folder of this partitiong
            # Cannot handle smaller job units at dax creation as the folder is not yet populated;
            # if we want smaller units, consider using dynamic subworkflow
            taskname = 'allocateChunk'
            task5 = peg.Job(name=taskname)
            task5.addProfile(
                peg.Profile(peg.Namespace.CONDOR, "request_memory", "2GB"))
            task5.addArguments(outdir, "--idFile", transIdFile)
            dax.addJob(task5)
            logfile = peg.File("%s-%s.log" % (
                taskname,
                i,
            ))
            dax.addFile(logfile)
            task5.setStdout(logfile)
            task5.setStderr(logfile)
            task5.uses(logfile, link=peg.Link.OUTPUT)
            task5.uses(transIdFile, link=peg.Link.INPUT)
            dax.depends(parent=task4, child=task5)
            dax.depends(parent=task0c, child=task5)

            taskname = 'loadData'
            task6 = peg.Job(name=taskname)
            task6.addProfile(
                peg.Profile(peg.Namespace.CONDOR, "request_memory", "2GB"))
            task6.addArguments(logfile)
            dax.addJob(task6)
            task6.uses(logfile, link=peg.Link.INPUT)
            logfile6 = peg.File("%s-%s.log" % (
                taskname,
                i,
            ))
            dax.addFile(logfile6)
            task6.setStdout(logfile6)
            task6.setStderr(logfile6)
            task6.uses(logfile6, link=peg.Link.OUTPUT)
            dax.depends(parent=task5, child=task6)
            dax.depends(parent=task6, child=task0d)

    return dax

예제 #8

0

파일 보기

import time
import Pegasus.DAX3 as DAX3

USER = pwd.getpwuid(os.getuid())[0]

# Create a abstract dag
dax = DAX3.ADAG("Strong Lensing Pipeline")

# Add some workflow-level metadata
dax.metadata("creator", "%s@%s" % (USER, os.uname()[1]))
dax.metadata("created", time.ctime())

dm_level1_catalog = DAX3.File('dm_level1_catalog')
dm_images = DAX3.File('dm_image_data')
SL_candidates = DAX3.File('SL_candidates')
SLFinder = DAX3.Job('SLFinder')
SLFinder.uses(dm_level1_catalog, link=DAX3.Link.INPUT)
SLFinder.uses(dm_images, link=DAX3.Link.INPUT)
SLFinder.uses(SL_candidates,
              link=DAX3.Link.OUTPUT,
              register=True,
              transfer=True)
dax.addJob(SLFinder)

DESC_Lenses = DAX3.File('DESC_Lenses')
SpaceWarps = DAX3.Job('SpaceWarps')
SpaceWarps.uses(SL_candidates, link=DAX3.Link.INPUT)
SpaceWarps.uses(DESC_Lenses,
                link=DAX3.Link.OUTPUT,
                register=True,
                transfer=True)

예제 #9

0

파일 보기

파일: poc.py 프로젝트: hsinfang/qserv-ingest-hsc-poc

def generateDax(name="object", inputData=None):
    """Generate a Pegasus DAX abstract workflow"""
    dax = peg.ADAG(name)

    # These config-ish files are expected in the input/ folder
    schemaAbh = peg.File("schema.abh")
    dax.addFile(schemaAbh)
    sedScript = peg.File("fixCsv.sed")
    dax.addFile(sedScript)
    partCfg = peg.File("Object_new.cfg")
    dax.addFile(partCfg)
    # Note this json file has the database name....!!!
    tableJson = peg.File("test.json")
    dax.addFile(tableJson)
    database = "hsc_rc2_w_2020_14_00"

    # (Ab)using the shared filesystem....!!!
    chunkBaseFolder = os.path.join("/project", "hchiang2", "qserv", "qqpoc")
    if not os.path.isdir(chunkBaseFolder):
        logging.warning("Chunk file base folder %s invalid", chunkBaseFolder)

    # Create a new database
    task0a = peg.Job(name="qingest")
    task0a.addArguments("http://lsst-qserv-master01:25080/ingest/v1/database",
                        "post", "--data", "database=" + str(database),
                        "num_stripes=340 num_sub_stripes=3 overlap=0.01667")
    dax.addJob(task0a)
    logfile = peg.File("qingest-a.log")
    dax.addFile(logfile)
    task0a.setStdout(logfile)
    task0a.setStderr(logfile)
    task0a.uses(logfile, link=peg.Link.OUTPUT)

    # Create the Object table in Qserv
    task0b = peg.Job(name="qingest")
    task0b.addArguments("http://lsst-qserv-master01:25080/ingest/v1/table",
                        "post", "--json", tableJson)
    dax.addJob(task0b)
    logfile = peg.File("qingest-b.log")
    dax.addFile(logfile)
    task0b.setStdout(logfile)
    task0b.setStderr(logfile)
    task0b.uses(logfile, link=peg.Link.OUTPUT)
    task0b.uses(tableJson, link=peg.Link.INPUT)
    dax.depends(parent=task0a, child=task0b)

    # Start a super-transaction
    # Need to get the super transaction id from the log file
    task0c = peg.Job(name="qingest")
    task0c.addArguments("http://lsst-qserv-master01:25080/ingest/v1/trans",
                        "post", "--data", "database=" + str(database))
    dax.addJob(task0c)
    transIdFile = peg.File("qingest-c.log")
    dax.addFile(transIdFile)
    task0c.setStdout(transIdFile)
    task0c.setStderr(transIdFile)
    task0c.uses(transIdFile, link=peg.Link.OUTPUT)
    dax.depends(parent=task0b, child=task0c)

    i = 0
    with open(inputData, 'r') as f:
        for line in f:
            inparq = line.strip()
            i += 1
            logging.debug('Add file %d: %s', i, inparq)

            taskname = 'hackType'
            task1 = peg.Job(name=taskname)
            outparq = peg.File("hack-%d.parq" % i)
            dax.addFile(outparq)
            task1.addArguments("-i", inparq, "-o", outparq)
            dax.addJob(task1)
            logfile = peg.File("%s-%s.log" % (
                taskname,
                i,
            ))
            dax.addFile(logfile)
            task1.setStdout(logfile)
            task1.setStderr(logfile)
            task1.uses(logfile, link=peg.Link.OUTPUT)
            task1.uses(outparq, link=peg.Link.OUTPUT)

            taskname = 'pq2csv'
            task2 = peg.Job(name=taskname)
            outcsv = peg.File("csv-%d.csv" % i)
            dax.addFile(outcsv)
            task2.addArguments("--schema", schemaAbh, "--verbose", outparq,
                               outcsv)
            dax.addJob(task2)
            logfile = peg.File("%s-%s.log" % (
                taskname,
                i,
            ))
            dax.addFile(logfile)
            task2.setStdout(logfile)
            task2.setStderr(logfile)
            task2.uses(logfile, link=peg.Link.OUTPUT)
            task2.uses(schemaAbh, link=peg.Link.INPUT)
            task2.uses(outparq, link=peg.Link.INPUT)
            task2.uses(outcsv, link=peg.Link.OUTPUT)
            dax.depends(parent=task1, child=task2)

            taskname = 'sed'
            task3 = peg.Job(name=taskname)
            task3.addArguments("-f", sedScript, outcsv)
            dax.addJob(task3)
            logfile = peg.File("%s-%s.log" % (
                taskname,
                i,
            ))
            newcsv = peg.File("new-%s.csv" % (i, ))
            dax.addFile(logfile)
            task3.setStdout(newcsv)
            task3.setStderr(logfile)
            task3.uses(logfile, link=peg.Link.OUTPUT)
            task3.uses(newcsv, link=peg.Link.OUTPUT)
            task3.uses(outcsv, link=peg.Link.INPUT)
            task3.uses(sedScript, link=peg.Link.INPUT)
            dax.depends(parent=task2, child=task3)

            # My input csv files are larger than 1GB each and I am not splitting them for now
            taskname = 'partition'
            task4 = peg.Job(name=taskname)
            outdir = os.path.join(chunkBaseFolder, 'chunksSet' + str(i))
            task4.addArguments("--verbose", "-c", partCfg, "--in", newcsv,
                               "--out.dir", outdir)
            dax.addJob(task4)
            logfile = peg.File("%s-%s.log" % (
                taskname,
                i,
            ))
            dax.addFile(logfile)
            task4.setStdout(logfile)
            task4.setStderr(logfile)
            task4.uses(logfile, link=peg.Link.OUTPUT)
            task4.uses(newcsv, link=peg.Link.INPUT)
            task4.uses(partCfg, link=peg.Link.INPUT)
            dax.depends(parent=task3, child=task4)

            # Look for chunk files in the output folder of this partitiong
            # Cannot handle smaller job units at dax creation as the folder is not yet populated;
            # if we want smaller units, consider using dynamic subworkflow
            taskname = 'allocateChunk'
            task5 = peg.Job(name=taskname)
            task5.addArguments(outdir, "--idFile", transIdFile)
            dax.addJob(task5)
            logfile = peg.File("%s-%s.log" % (
                taskname,
                i,
            ))
            dax.addFile(logfile)
            task5.setStdout(logfile)
            task5.setStderr(logfile)
            task5.uses(logfile, link=peg.Link.OUTPUT)
            dax.depends(parent=task4, child=task5)
            dax.depends(parent=task0c, child=task5)

    return dax

예제 #10

0

파일 보기

job_maker = JobMaker(dax,
                     output_repo,
                     config_dir,
                     bin_dir='./bin',
                     tc='tc.txt',
                     clobber=True)

# Ingest the raw images.
ingestImages = job_maker.make('ingestImages',
                              repo=input_repo,
                              options={'--output': output_repo})

# Ingest the reference catalog.
ref_cat = '/global/homes/d/descdm/dc1/DC1-imsim-dithered/dc1_reference_catalog.txt'
ingestReferenceCatalog = DAX3.Job('ingestReferenceCatalog')
ingestReferenceCatalog.addArguments(ref_cat, output_repo)
dax.addJob(ingestReferenceCatalog)
dax.depends(ingestReferenceCatalog, ingestImages)
job_maker.add_tc_entry(job_maker, 'ingestReferenceCatalog')

makeDiscreteSkyMap = job_maker.make('makeDiscreteSkyMap')
# Loop over visits
for visit in visit_list(output_repo):
    # Loop over rafts
    for raft in raft_list(visit):
        dataId = dict(visit=visit, raft=raft)
        processCcd = job_maker.make('processCcd', dataId=dataId)
        dax.depends(processCcd, ingestReferenceCatalog)
        dax.depends(makeDiscreteSkyMap, processCcd)