def get_papermill_parameters(compute_context, inputs, output_log_path): check.inst_param(compute_context, 'compute_context', SystemComputeExecutionContext) check.param_invariant( isinstance(compute_context.environment_dict, dict), 'compute_context', 'SystemComputeExecutionContext must have valid environment_dict', ) check.dict_param(inputs, 'inputs', key_type=six.string_types) run_id = compute_context.run_id marshal_dir = '/tmp/dagstermill/{run_id}/marshal'.format(run_id=run_id) mkdir_p(marshal_dir) (handle, solid_subset) = ExecutionTargetHandle.get_handle( compute_context.pipeline_def) if not handle: raise DagstermillError( 'Can\'t execute a dagstermill solid from a pipeline that wasn\'t instantiated using ' 'an ExecutionTargetHandle') dm_handle_kwargs = handle.data._asdict() dm_handle_kwargs['pipeline_name'] = compute_context.pipeline_def.name dm_context_dict = { 'output_log_path': output_log_path, 'marshal_dir': marshal_dir, 'environment_dict': compute_context.environment_dict, } dm_solid_handle_kwargs = compute_context.solid_handle._asdict() parameters = {} input_def_dict = compute_context.solid_def.input_dict for input_name, input_value in inputs.items(): assert ( input_name not in RESERVED_INPUT_NAMES ), 'Dagstermill solids cannot have inputs named {input_name}'.format( input_name=input_name) dagster_type = input_def_dict[input_name].dagster_type parameter_value = write_value( dagster_type, input_value, os.path.join(marshal_dir, 'input-{}'.format(input_name))) parameters[input_name] = parameter_value parameters['__dm_context'] = dm_context_dict parameters['__dm_handle_kwargs'] = dm_handle_kwargs parameters['__dm_pipeline_run_dict'] = pack_value( compute_context.pipeline_run) parameters['__dm_solid_handle_kwargs'] = dm_solid_handle_kwargs parameters['__dm_solid_subset'] = solid_subset parameters['__dm_instance_ref_dict'] = pack_value( compute_context.instance.get_ref()) return parameters
def in_pipeline_manager( pipeline_name='hello_world_pipeline', solid_handle=SolidHandle('hello_world', 'hello_world', None), handle_kwargs=None, mode=None, **kwargs ): manager = Manager() run_id = str(uuid.uuid4()) instance = DagsterInstance.local_temp() marshal_dir = tempfile.mkdtemp() if not handle_kwargs: handle_kwargs = { 'pipeline_name': pipeline_name, 'module_name': 'dagstermill.examples.repository', 'fn_name': 'define_hello_world_pipeline', } pipeline_run_dict = pack_value( PipelineRun( pipeline_name=pipeline_name, run_id=run_id, mode=mode or 'default', environment_dict=None, selector=None, reexecution_config=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, ) ) try: with safe_tempfile_path() as output_log_file_path: context_dict = { 'pipeline_run_dict': pipeline_run_dict, 'solid_handle_kwargs': solid_handle._asdict(), 'handle_kwargs': handle_kwargs, 'marshal_dir': marshal_dir, 'environment_dict': {}, 'output_log_path': output_log_file_path, 'instance_ref_dict': pack_value(instance.get_ref()), } manager.reconstitute_pipeline_context(**dict(context_dict, **kwargs)) yield manager finally: shutil.rmtree(marshal_dir)
def on_raw_log_record(self, record): from dagster.core.serdes import pack_value log_dict = copy.copy(record.__dict__) if log_dict.get('dagster_meta'): log_dict['dagster_meta'] = pack_value(log_dict['dagster_meta']) with self.connect() as con: con.execute(INSERT_LOG_RECORD_STATEMENT, (seven.json.dumps(log_dict), ))