Exemple #1
0
def task_runner(task_name, log_file, argv):
    task_message = {
        0: "运行成功",
        1: "未知异常",
        2: "数据缺失",
        3: "任务失败",
        4: "有重复的任务正在运行",
        5: "任务计划失败",
        6: "任务未运行",
        98: "其它错误",
        99: "未知错误"
    }
    log_fd = open(log_file, 'a')
    sys.stdout = log_fd
    sys.stderr = log_fd
    now = time.time()
    year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
    ts = "%02d/%02d/%04d %02d:%02d:%02d" % (day, month, year, hh, mm, ss)
    print >> log_fd, ts, task_name, argv
    try:
        ret_code = int(run_with_retcodes(argv))
        ret_msg = task_message[
            ret_code] if ret_code in task_message else task_message[99]
    except Exception as e:
        ret_code = 98
        ret_msg = e.message
    return json.dumps({"code": ret_code, "message": ret_msg})
Exemple #2
0
def luigi_run(argv=sys.argv[1:]):
    run_with_retcodes(argv)
Exemple #3
0
def luigi_run(argv=sys.argv[1:]):
    run_with_retcodes(argv)
Exemple #4
0
def apply(input_bundle,
          output_bundle,
          pipe_params,
          pipe_cls,
          input_tags,
          output_tags,
          force,
          output_bundle_uuid=None,
          sysexit=True):
    """
    Given an input bundle, run the pipesline on the bundle.
    Note, we first make a copy of all tasks that are parameterized identically to the tasks we will run.
    This is so we can figure out what we will need to re-run.
    This is why we make a single uuid for the output bundle of apply (for the driver).

    Args:
        input_bundle:  The bundle with the data to be processed
        output_bundle: The new bundle to be created
        pipe_params: Dict of zero or more bundles to be handed to each instance of the task
        pipe_cls:      String <module.ClassName>
        force:         force recomputation of dependencies
        input_tags (dict):  Tags used to find the input bundle
        output_tags (dict):  Tags that need to be placed on the output bundle
        force (bool): whether to re-run this pipe
        output_bundle_uuid (str):  Optionally specify exactly the UUID of the output bundle IFF we actually need to produce it
        sysexit: Run with sys exist return codes (will raise SystemExit), else run internally

    Returns:
        None
    """

    _logger.debug("driver {}".format(driver.DriverTask))
    _logger.debug("pipe_cls {}".format(pipe_cls))
    _logger.debug("pipe params: {}".format(pipe_params))
    _logger.debug("force: {}".format(force))
    _logger.debug("input tags: {}".format(input_tags))
    _logger.debug("output tags: {}".format(output_tags))
    _logger.debug("sys.path {}".format(sys.path))

    args = [
        driver.DriverTask.task_family, '--local-scheduler', '--input-bundle',
        input_bundle, '--output-bundle', output_bundle, '--param-bundles',
        pipe_params, '--pipe-cls', pipe_cls, '--input-tags',
        json.dumps(input_tags), '--output-tags',
        json.dumps(output_tags)
    ]

    if force:
        args += ['--force']

    ## Re-execute logic -- make copy of task DAG here.
    #  Creates a cache of {pipe:path_cache_entry} in the pipesFS object.
    #  This is used throughout execution to find / name the output bundles.
    reexecute_dag = driver.DriverTask(input_bundle, output_bundle, pipe_params,
                                      pipe_cls, input_tags, output_tags, force)

    resolve_workflow_bundles(reexecute_dag)

    # at this point the path cache should be full of existing or new UUIDs.
    # we are going to replace the final pipe's UUID if the user has passed one in.
    # this happens when we run the docker container.
    # TODO: don't replace if it already exists.
    if output_bundle_uuid is not None:
        users_root_task = reexecute_dag.deps()[0]
        pfs = fs.DisdatFS()
        pce = pfs.get_path_cache(users_root_task)
        if pce.rerun:  # if we have to re-run then replace it with our UUID
            # TODO: this is the same code as new_output_hframe, FIX!!!
            dir, uuid, _ = pfs._curr_context.make_managed_path(
                output_bundle_uuid)
            fs.DisdatFS.put_path_cache(users_root_task,
                                       uuid,
                                       dir,
                                       pce.rerun,
                                       pce.is_left_edge_task,
                                       overwrite=True)

    if False:
        test = reexecute_dag
        print "----START DAG TASK---"
        print "task_id is {}".format(test.task_id)
        print "task_family is {}".format(test.task_family)
        print " class {}".format(test.__class__)
        print " module {}".format(test.__module__)
        print " inspect getfile(test) {}".format(
            inspect.getfile(test.__class__))
        print "resolve_bundles requires {}".format(fs.DisdatFS.task_path_cache)
        print "----END DAG TASK---"

    # This is a superior way of calling the task, because we can make it once
    # and not have to repeat the args into a 'fake' cli call.
    if sysexit:
        retcodes.run_with_retcodes(args)
    else:
        build([reexecute_dag], local_scheduler=True)

    # After running a pipeline, blow away our path cache.  Needed if we're run twice in the same process.
    # Probably not needed if you're using sysexit.
    fs.DisdatFS().clear_path_cache()