Ejemplo n.º 1
0
def get_papermill_parameters(compute_context, inputs, output_log_path):
    check.inst_param(compute_context, 'compute_context',
                     SystemComputeExecutionContext)
    check.param_invariant(
        isinstance(compute_context.environment_dict, dict),
        'compute_context',
        'SystemComputeExecutionContext must have valid environment_dict',
    )
    check.dict_param(inputs, 'inputs', key_type=six.string_types)

    run_id = compute_context.run_id

    marshal_dir = '/tmp/dagstermill/{run_id}/marshal'.format(run_id=run_id)
    mkdir_p(marshal_dir)

    (handle, solid_subset) = ExecutionTargetHandle.get_handle(
        compute_context.pipeline_def)

    if not handle:
        raise DagstermillError(
            'Can\'t execute a dagstermill solid from a pipeline that wasn\'t instantiated using '
            'an ExecutionTargetHandle')

    dm_handle_kwargs = handle.data._asdict()

    dm_handle_kwargs['pipeline_name'] = compute_context.pipeline_def.name

    dm_context_dict = {
        'output_log_path': output_log_path,
        'marshal_dir': marshal_dir,
        'environment_dict': compute_context.environment_dict,
    }

    dm_solid_handle_kwargs = compute_context.solid_handle._asdict()

    parameters = {}

    input_def_dict = compute_context.solid_def.input_dict
    for input_name, input_value in inputs.items():
        assert (
            input_name not in RESERVED_INPUT_NAMES
        ), 'Dagstermill solids cannot have inputs named {input_name}'.format(
            input_name=input_name)
        dagster_type = input_def_dict[input_name].dagster_type
        parameter_value = write_value(
            dagster_type, input_value,
            os.path.join(marshal_dir, 'input-{}'.format(input_name)))
        parameters[input_name] = parameter_value

    parameters['__dm_context'] = dm_context_dict
    parameters['__dm_handle_kwargs'] = dm_handle_kwargs
    parameters['__dm_pipeline_run_dict'] = pack_value(
        compute_context.pipeline_run)
    parameters['__dm_solid_handle_kwargs'] = dm_solid_handle_kwargs
    parameters['__dm_solid_subset'] = solid_subset
    parameters['__dm_instance_ref_dict'] = pack_value(
        compute_context.instance.get_ref())

    return parameters
Ejemplo n.º 2
0
def in_pipeline_manager(
    pipeline_name='hello_world_pipeline',
    solid_handle=SolidHandle('hello_world', 'hello_world', None),
    handle_kwargs=None,
    mode=None,
    **kwargs
):
    manager = Manager()

    run_id = str(uuid.uuid4())
    instance = DagsterInstance.local_temp()
    marshal_dir = tempfile.mkdtemp()

    if not handle_kwargs:
        handle_kwargs = {
            'pipeline_name': pipeline_name,
            'module_name': 'dagstermill.examples.repository',
            'fn_name': 'define_hello_world_pipeline',
        }

    pipeline_run_dict = pack_value(
        PipelineRun(
            pipeline_name=pipeline_name,
            run_id=run_id,
            mode=mode or 'default',
            environment_dict=None,
            selector=None,
            reexecution_config=None,
            step_keys_to_execute=None,
            status=PipelineRunStatus.NOT_STARTED,
        )
    )

    try:
        with safe_tempfile_path() as output_log_file_path:
            context_dict = {
                'pipeline_run_dict': pipeline_run_dict,
                'solid_handle_kwargs': solid_handle._asdict(),
                'handle_kwargs': handle_kwargs,
                'marshal_dir': marshal_dir,
                'environment_dict': {},
                'output_log_path': output_log_file_path,
                'instance_ref_dict': pack_value(instance.get_ref()),
            }

            manager.reconstitute_pipeline_context(**dict(context_dict, **kwargs))
            yield manager
    finally:
        shutil.rmtree(marshal_dir)
Ejemplo n.º 3
0
    def on_raw_log_record(self, record):
        from dagster.core.serdes import pack_value

        log_dict = copy.copy(record.__dict__)
        if log_dict.get('dagster_meta'):
            log_dict['dagster_meta'] = pack_value(log_dict['dagster_meta'])

        with self.connect() as con:
            con.execute(INSERT_LOG_RECORD_STATEMENT,
                        (seven.json.dumps(log_dict), ))