def test_snapshot_command_pipeline_solid_subset(): with get_temp_file_name() as output_file: runner = CliRunner() result = runner.invoke( pipeline_snapshot_command, [ output_file, '-y', file_relative_path(__file__, 'repository_file.yaml'), 'foo', '--solid-subset', 'do_input', ], ) assert result.exit_code == 0 # Now that we have the snapshot make sure that it can be properly deserialized messages = list(ipc_read_event_stream(output_file)) assert len(messages) == 1 external_pipeline_data = messages[0] assert isinstance(external_pipeline_data, ExternalPipelineData) assert external_pipeline_data.name == 'foo' assert (len(external_pipeline_data.pipeline_snapshot. solid_definitions_snapshot.solid_def_snaps) == 1)
def sync_get_external_pipeline_subset(pipeline_handle, solid_selection=None): check.inst_param(pipeline_handle, 'pipeline_handle', PipelineHandle) check.opt_list_param(solid_selection, 'solid_selection', of_type=str) location_handle = pipeline_handle.repository_handle.repository_location_handle check.param_invariant( isinstance(location_handle, PythonEnvRepositoryLocationHandle), 'pipeline_handle') pointer = pipeline_handle.repository_handle.get_pointer() with get_temp_file_name() as output_file: parts = ([ location_handle.executable_path, '-m', 'dagster', 'api', 'snapshot', 'pipeline_subset', output_file, ] + xplat_shlex_split(pointer.get_cli_args()) + [pipeline_handle.pipeline_name]) if solid_selection: parts.append('--solid-selection={solid_selection}'.format( solid_selection=json.dumps(solid_selection))) execute_command_in_subprocess(parts) external_pipeline_subset_result = read_unary_response(output_file) check.inst(external_pipeline_subset_result, ExternalPipelineSubsetResult) return external_pipeline_subset_result
def cache_file_from_s3(context, s3_coordinate: S3Coordinate) -> FileHandle: target_key = context.solid_config.get('file_key', s3_coordinate['key'].split('/')[-1]) file_cache = context.resources.file_cache target_file_handle = file_cache.get_file_handle(target_key) if file_cache.overwrite or not file_cache.has_file_object(target_key): with get_temp_file_name() as tmp_file: context.resources.s3.download_file( Bucket=s3_coordinate['bucket'], Key=s3_coordinate['key'], Filename=tmp_file ) context.log.info('File downloaded to {}'.format(tmp_file)) with open(tmp_file, 'rb') as tmp_file_object: file_cache.write_file_object(target_key, tmp_file_object) context.log.info('File handle written at : {}'.format(target_file_handle.path_desc)) else: context.log.info('File {} already present in cache'.format(target_file_handle.path_desc)) yield ExpectationResult( success=file_cache.has_file_object(target_key), label='file_handle_exists', metadata_entries=[ EventMetadataEntry.path(path=target_file_handle.path_desc, label=target_key) ], ) yield Output(target_file_handle)
def sync_launch_scheduled_execution(schedule_origin, system_tz=None): check.inst_param(schedule_origin, "schedule_origin", ExternalJobOrigin) with get_temp_file_name() as output_file: parts = ([ sys.executable, "-m", "dagster", "api", "launch_scheduled_execution", output_file, ] + xplat_shlex_split(schedule_origin.get_repo_cli_args()) + ["--schedule_name={}".format(schedule_origin.job_name)] + (["--override-system-timezone={}".format(system_tz)] if system_tz else [])) subprocess.check_call(parts) result = read_unary_response(output_file) if isinstance(result, ScheduledExecutionResult): return result elif isinstance(result, IPCErrorMessage): error = result.serializable_error_info raise DagsterSubprocessError( "Error in API subprocess: {message}\n\n{err}".format( message=result.message, err=error.to_string()), subprocess_error_infos=[error], ) else: check.failed("Unexpected result {}".format(result))
def sync_launch_scheduled_execution(schedule_origin): check.inst_param(schedule_origin, "schedule_origin", ScheduleOrigin) with get_temp_file_name() as output_file: parts = ([ schedule_origin.executable_path, "-m", "dagster", "api", "launch_scheduled_execution", output_file, ] + xplat_shlex_split(schedule_origin.get_repo_cli_args()) + [ "--schedule_name={}".format(schedule_origin.schedule_name), ]) execute_command_in_subprocess(parts) result = read_unary_response(output_file) if isinstance(result, ScheduledExecutionResult): return result elif isinstance(result, IPCErrorMessage): error = result.serializable_error_info raise DagsterSubprocessError( "Error in API subprocess: {message}\n\n{err}".format( message=result.message, err=error.to_string()), subprocess_error_infos=[error], ) else: check.failed("Unexpected result {}".format(result))
def test_snapshot_command_pipeline_solid_selection(): with get_temp_file_name() as output_file: runner = CliRunner() solid_selection = ['do_input'] result = runner.invoke( pipeline_subset_snapshot_command, [ output_file, '-y', file_relative_path(__file__, 'repository_file.yaml'), 'foo', '--solid-selection={solid_selection}'.format( solid_selection=json.dumps(solid_selection)), ], ) assert result.exit_code == 0 # Now that we have the snapshot make sure that it can be properly deserialized messages = list(ipc_read_event_stream(output_file)) assert len(messages) == 1 subset_result = messages[0] assert isinstance(subset_result, ExternalPipelineSubsetResult) assert subset_result.external_pipeline_data.name == 'foo' assert (len(subset_result.external_pipeline_data.pipeline_snapshot. solid_definitions_snapshot.solid_def_snaps) == 1)
def execute_unary_api_cli_command(executable_path, command_name, input_obj): with get_temp_file_name() as input_file, get_temp_file_name( ) as output_file: parts = [ executable_path, '-m', 'dagster', 'api', command_name, input_file, output_file, ] write_unary_input(input_file, input_obj) execute_command_in_subprocess(parts) return read_unary_response(output_file)
def test_get_temp_file_name_leak_file_descriptors(): resource.setrlimit(resource.RLIMIT_NOFILE, (100, 100)) for _ in range(100): with get_temp_file_name() as _: pass for _ in range(100): with get_temp_file_names(1) as _: pass
def sync_get_external_execution_plan( pipeline_handle, environment_dict, mode, snapshot_id, solid_selection=None, step_keys_to_execute=None, ): check.inst_param(pipeline_handle, 'pipeline_handle', PipelineHandle) check.opt_list_param(solid_selection, 'solid_selection', of_type=str) check.dict_param(environment_dict, 'environment_dict') check.str_param(mode, 'mode') check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str) check.str_param(snapshot_id, 'snapshot_id') pointer = pipeline_handle.repository_handle.get_pointer() location_handle = pipeline_handle.repository_handle.repository_location_handle check.param_invariant( isinstance(location_handle, PythonEnvRepositoryLocationHandle), 'pipeline_handle') with get_temp_file_name() as output_file: parts = ([ location_handle.executable_path, '-m', 'dagster', 'api', 'snapshot', 'execution_plan', output_file, ] + xplat_shlex_split(pointer.get_cli_args()) + [ pipeline_handle.pipeline_name, '--environment-dict={environment_dict}'.format( environment_dict=json.dumps(environment_dict)), '--mode={mode}'.format(mode=mode), '--snapshot-id={snapshot_id}'.format(snapshot_id=snapshot_id), ]) if solid_selection: parts.append('--solid-selection={solid_selection}'.format( solid_selection=json.dumps(solid_selection))) if step_keys_to_execute: parts.append( '--step-keys-to-execute={step_keys_to_execute}'.format( step_keys_to_execute=json.dumps(step_keys_to_execute))) execute_command_in_subprocess(parts) execution_plan_snapshot = read_unary_response(output_file) check.inst(execution_plan_snapshot, ExecutionPlanSnapshot) return execution_plan_snapshot
def cache_file_from_s3(_, s3_coord: S3Coordinate) -> str: # we default the target_key to the last component of the s3 key. target_key = s3_coord['key'].split('/')[-1] with get_temp_file_name() as tmp_file: boto3.client('s3').download_file(Bucket=s3_coord['bucket'], Key=s3_coord['key'], Filename=tmp_file) target_path = os.path.join(file_cache_folder(), target_key) with open(tmp_file, 'rb') as tmp_file_object: with open(target_path, 'wb') as target_file_object: shutil.copyfileobj(tmp_file_object, target_file_object) return target_path
def cache_file_from_s3(context, s3_coord: S3Coordinate) -> str: # we default the target_key to the last component of the s3 key. target_key = s3_coord['key'].split('/')[-1] with get_temp_file_name() as tmp_file: context.resources.s3.download_file( Bucket=s3_coord['bucket'], Key=s3_coord['key'], Filename=tmp_file ) file_cache = context.resources.file_cache with open(tmp_file, 'rb') as tmp_file_object: # returns a handle rather than a path file_handle = file_cache.write_file_object(target_key, tmp_file_object) return file_handle.path
def cache_file_from_s3(context, s3_coord: S3Coordinate) -> str: # we default the target_key to the last component of the s3 key. target_key = s3_coord["key"].split("/")[-1] with get_temp_file_name() as tmp_file: boto3.client("s3").download_file( Bucket=s3_coord["bucket"], Key=s3_coord["key"], Filename=tmp_file ) file_cache = context.resources.file_cache with open(tmp_file, "rb") as tmp_file_object: # returns a handle rather than a path file_handle = file_cache.write_file_object(target_key, tmp_file_object) return file_handle.path
def cache_file_from_s3(s3_coord: S3Coordinate) -> str: # we default the target_key to the last component of the s3 key. target_key = s3_coord["key"].split("/")[-1] with get_temp_file_name() as tmp_file: boto3.client("s3").download_file( Bucket=s3_coord["bucket"], Key=s3_coord["key"], Filename=tmp_file ) target_path = os.path.join(file_cache_folder(), target_key) with open(tmp_file, "rb") as tmp_file_object: with open(target_path, "wb") as target_file_object: shutil.copyfileobj(tmp_file_object, target_file_object) return target_path
def test_snapshot_command_repository(): with get_temp_file_name() as output_file: runner = CliRunner() result = runner.invoke( repository_snapshot_command, [ output_file, '-y', file_relative_path(__file__, 'repository_file.yaml') ], ) assert result.exit_code == 0 # Now that we have the snapshot make sure that it can be properly deserialized messages = list(ipc_read_event_stream(output_file)) assert len(messages) == 1 external_repository_data = messages[0] assert isinstance(external_repository_data, ExternalRepositoryData) assert external_repository_data.name == 'bar' assert len(external_repository_data.external_pipeline_datas) == 2
def sync_list_repositories(executable_path, python_file, module_name): from dagster.cli.api import ListRepositoriesResponse with get_temp_file_name() as output_file: parts = [ executable_path, '-m', 'dagster', 'api', 'snapshot', 'list_repositories', output_file, ] + (['-f', python_file] if python_file else ['--module-name', module_name]) execute_command_in_subprocess(parts) response = read_unary_response(output_file) return check.inst(response, ListRepositoriesResponse)
def sync_get_external_repositories(repository_location_handle): check.inst_param( repository_location_handle, 'repository_location_handle', RepositoryLocationHandle, ) check.param_invariant( isinstance(repository_location_handle, PythonEnvRepositoryLocationHandle), 'repository_location_handle', ) repos = [] for key, pointer in repository_location_handle.repository_code_pointer_dict.items(): with get_temp_file_name() as output_file: parts = [ repository_location_handle.executable_path, '-m', 'dagster', 'api', 'snapshot', 'repository', output_file, ] + xplat_shlex_split(pointer.get_cli_args()) execute_command_in_subprocess(parts) external_repository_data = read_unary_response(output_file) check.inst(external_repository_data, ExternalRepositoryData) repository_handle = RepositoryHandle( repository_name=external_repository_data.name, repository_key=key, repository_location_handle=repository_location_handle, ) repos.append(ExternalRepository(external_repository_data, repository_handle)) return repos
def sync_get_external_repository(location_handle): check.inst_param(location_handle, 'location_handle', LocationHandle) with get_temp_file_name() as output_file: parts = ['dagster', 'api', 'snapshot', 'repository', output_file] + xplat_shlex_split( location_handle.pointer.get_cli_args() ) returncode = subprocess.check_call(parts) check.invariant(returncode == 0, 'dagster api cli invocation did not complete successfully') messages = list(ipc_read_event_stream(output_file)) check.invariant(len(messages) == 1) external_repository_data = messages[0] check.inst(external_repository_data, ExternalRepositoryData) return ExternalRepository( external_repository_data, RepositoryHandle(external_repository_data.name, location_handle), )