Beispiel #1
0
def test_snapshot_command_pipeline_solid_subset():

    with get_temp_file_name() as output_file:
        runner = CliRunner()
        result = runner.invoke(
            pipeline_snapshot_command,
            [
                output_file,
                '-y',
                file_relative_path(__file__, 'repository_file.yaml'),
                'foo',
                '--solid-subset',
                'do_input',
            ],
        )

        assert result.exit_code == 0
        # Now that we have the snapshot make sure that it can be properly deserialized
        messages = list(ipc_read_event_stream(output_file))
        assert len(messages) == 1
        external_pipeline_data = messages[0]
        assert isinstance(external_pipeline_data, ExternalPipelineData)
        assert external_pipeline_data.name == 'foo'
        assert (len(external_pipeline_data.pipeline_snapshot.
                    solid_definitions_snapshot.solid_def_snaps) == 1)
Beispiel #2
0
def sync_get_external_pipeline_subset(pipeline_handle, solid_selection=None):
    check.inst_param(pipeline_handle, 'pipeline_handle', PipelineHandle)
    check.opt_list_param(solid_selection, 'solid_selection', of_type=str)

    location_handle = pipeline_handle.repository_handle.repository_location_handle
    check.param_invariant(
        isinstance(location_handle, PythonEnvRepositoryLocationHandle),
        'pipeline_handle')

    pointer = pipeline_handle.repository_handle.get_pointer()
    with get_temp_file_name() as output_file:
        parts = ([
            location_handle.executable_path,
            '-m',
            'dagster',
            'api',
            'snapshot',
            'pipeline_subset',
            output_file,
        ] + xplat_shlex_split(pointer.get_cli_args()) +
                 [pipeline_handle.pipeline_name])

        if solid_selection:
            parts.append('--solid-selection={solid_selection}'.format(
                solid_selection=json.dumps(solid_selection)))

        execute_command_in_subprocess(parts)

        external_pipeline_subset_result = read_unary_response(output_file)
        check.inst(external_pipeline_subset_result,
                   ExternalPipelineSubsetResult)

        return external_pipeline_subset_result
Beispiel #3
0
def cache_file_from_s3(context, s3_coordinate: S3Coordinate) -> FileHandle:
    target_key = context.solid_config.get('file_key', s3_coordinate['key'].split('/')[-1])

    file_cache = context.resources.file_cache

    target_file_handle = file_cache.get_file_handle(target_key)

    if file_cache.overwrite or not file_cache.has_file_object(target_key):
        with get_temp_file_name() as tmp_file:
            context.resources.s3.download_file(
                Bucket=s3_coordinate['bucket'], Key=s3_coordinate['key'], Filename=tmp_file
            )

            context.log.info('File downloaded to {}'.format(tmp_file))

            with open(tmp_file, 'rb') as tmp_file_object:
                file_cache.write_file_object(target_key, tmp_file_object)
                context.log.info('File handle written at : {}'.format(target_file_handle.path_desc))
    else:
        context.log.info('File {} already present in cache'.format(target_file_handle.path_desc))

    yield ExpectationResult(
        success=file_cache.has_file_object(target_key),
        label='file_handle_exists',
        metadata_entries=[
            EventMetadataEntry.path(path=target_file_handle.path_desc, label=target_key)
        ],
    )
    yield Output(target_file_handle)
def sync_launch_scheduled_execution(schedule_origin, system_tz=None):
    check.inst_param(schedule_origin, "schedule_origin", ExternalJobOrigin)

    with get_temp_file_name() as output_file:

        parts = ([
            sys.executable,
            "-m",
            "dagster",
            "api",
            "launch_scheduled_execution",
            output_file,
        ] + xplat_shlex_split(schedule_origin.get_repo_cli_args()) +
                 ["--schedule_name={}".format(schedule_origin.job_name)] +
                 (["--override-system-timezone={}".format(system_tz)]
                  if system_tz else []))
        subprocess.check_call(parts)
        result = read_unary_response(output_file)
        if isinstance(result, ScheduledExecutionResult):
            return result
        elif isinstance(result, IPCErrorMessage):
            error = result.serializable_error_info
            raise DagsterSubprocessError(
                "Error in API subprocess: {message}\n\n{err}".format(
                    message=result.message, err=error.to_string()),
                subprocess_error_infos=[error],
            )
        else:
            check.failed("Unexpected result {}".format(result))
Beispiel #5
0
def sync_launch_scheduled_execution(schedule_origin):
    check.inst_param(schedule_origin, "schedule_origin", ScheduleOrigin)

    with get_temp_file_name() as output_file:
        parts = ([
            schedule_origin.executable_path,
            "-m",
            "dagster",
            "api",
            "launch_scheduled_execution",
            output_file,
        ] + xplat_shlex_split(schedule_origin.get_repo_cli_args()) + [
            "--schedule_name={}".format(schedule_origin.schedule_name),
        ])
        execute_command_in_subprocess(parts)
        result = read_unary_response(output_file)
        if isinstance(result, ScheduledExecutionResult):
            return result
        elif isinstance(result, IPCErrorMessage):
            error = result.serializable_error_info
            raise DagsterSubprocessError(
                "Error in API subprocess: {message}\n\n{err}".format(
                    message=result.message, err=error.to_string()),
                subprocess_error_infos=[error],
            )
        else:
            check.failed("Unexpected result {}".format(result))
Beispiel #6
0
def test_snapshot_command_pipeline_solid_selection():

    with get_temp_file_name() as output_file:
        runner = CliRunner()
        solid_selection = ['do_input']
        result = runner.invoke(
            pipeline_subset_snapshot_command,
            [
                output_file,
                '-y',
                file_relative_path(__file__, 'repository_file.yaml'),
                'foo',
                '--solid-selection={solid_selection}'.format(
                    solid_selection=json.dumps(solid_selection)),
            ],
        )

        assert result.exit_code == 0
        # Now that we have the snapshot make sure that it can be properly deserialized
        messages = list(ipc_read_event_stream(output_file))
        assert len(messages) == 1
        subset_result = messages[0]
        assert isinstance(subset_result, ExternalPipelineSubsetResult)
        assert subset_result.external_pipeline_data.name == 'foo'
        assert (len(subset_result.external_pipeline_data.pipeline_snapshot.
                    solid_definitions_snapshot.solid_def_snaps) == 1)
Beispiel #7
0
def execute_unary_api_cli_command(executable_path, command_name, input_obj):
    with get_temp_file_name() as input_file, get_temp_file_name(
    ) as output_file:
        parts = [
            executable_path,
            '-m',
            'dagster',
            'api',
            command_name,
            input_file,
            output_file,
        ]

        write_unary_input(input_file, input_obj)

        execute_command_in_subprocess(parts)

        return read_unary_response(output_file)
Beispiel #8
0
def test_get_temp_file_name_leak_file_descriptors():
    resource.setrlimit(resource.RLIMIT_NOFILE, (100, 100))
    for _ in range(100):
        with get_temp_file_name() as _:
            pass

    for _ in range(100):
        with get_temp_file_names(1) as _:
            pass
def sync_get_external_execution_plan(
    pipeline_handle,
    environment_dict,
    mode,
    snapshot_id,
    solid_selection=None,
    step_keys_to_execute=None,
):
    check.inst_param(pipeline_handle, 'pipeline_handle', PipelineHandle)
    check.opt_list_param(solid_selection, 'solid_selection', of_type=str)
    check.dict_param(environment_dict, 'environment_dict')
    check.str_param(mode, 'mode')
    check.opt_list_param(step_keys_to_execute,
                         'step_keys_to_execute',
                         of_type=str)
    check.str_param(snapshot_id, 'snapshot_id')

    pointer = pipeline_handle.repository_handle.get_pointer()
    location_handle = pipeline_handle.repository_handle.repository_location_handle

    check.param_invariant(
        isinstance(location_handle, PythonEnvRepositoryLocationHandle),
        'pipeline_handle')

    with get_temp_file_name() as output_file:
        parts = ([
            location_handle.executable_path,
            '-m',
            'dagster',
            'api',
            'snapshot',
            'execution_plan',
            output_file,
        ] + xplat_shlex_split(pointer.get_cli_args()) + [
            pipeline_handle.pipeline_name,
            '--environment-dict={environment_dict}'.format(
                environment_dict=json.dumps(environment_dict)),
            '--mode={mode}'.format(mode=mode),
            '--snapshot-id={snapshot_id}'.format(snapshot_id=snapshot_id),
        ])

        if solid_selection:
            parts.append('--solid-selection={solid_selection}'.format(
                solid_selection=json.dumps(solid_selection)))

        if step_keys_to_execute:
            parts.append(
                '--step-keys-to-execute={step_keys_to_execute}'.format(
                    step_keys_to_execute=json.dumps(step_keys_to_execute)))

        execute_command_in_subprocess(parts)

        execution_plan_snapshot = read_unary_response(output_file)
        check.inst(execution_plan_snapshot, ExecutionPlanSnapshot)

        return execution_plan_snapshot
Beispiel #10
0
def cache_file_from_s3(_, s3_coord: S3Coordinate) -> str:
    # we default the target_key to the last component of the s3 key.
    target_key = s3_coord['key'].split('/')[-1]

    with get_temp_file_name() as tmp_file:
        boto3.client('s3').download_file(Bucket=s3_coord['bucket'],
                                         Key=s3_coord['key'],
                                         Filename=tmp_file)

        target_path = os.path.join(file_cache_folder(), target_key)
        with open(tmp_file, 'rb') as tmp_file_object:
            with open(target_path, 'wb') as target_file_object:
                shutil.copyfileobj(tmp_file_object, target_file_object)
                return target_path
def cache_file_from_s3(context, s3_coord: S3Coordinate) -> str:
    # we default the target_key to the last component of the s3 key.
    target_key = s3_coord['key'].split('/')[-1]

    with get_temp_file_name() as tmp_file:
        context.resources.s3.download_file(
            Bucket=s3_coord['bucket'], Key=s3_coord['key'], Filename=tmp_file
        )

        file_cache = context.resources.file_cache
        with open(tmp_file, 'rb') as tmp_file_object:
            # returns a handle rather than a path
            file_handle = file_cache.write_file_object(target_key, tmp_file_object)
            return file_handle.path
def cache_file_from_s3(context, s3_coord: S3Coordinate) -> str:
    # we default the target_key to the last component of the s3 key.
    target_key = s3_coord["key"].split("/")[-1]

    with get_temp_file_name() as tmp_file:
        boto3.client("s3").download_file(
            Bucket=s3_coord["bucket"], Key=s3_coord["key"], Filename=tmp_file
        )

        file_cache = context.resources.file_cache
        with open(tmp_file, "rb") as tmp_file_object:
            # returns a handle rather than a path
            file_handle = file_cache.write_file_object(target_key, tmp_file_object)
            return file_handle.path
Beispiel #13
0
def cache_file_from_s3(s3_coord: S3Coordinate) -> str:
    # we default the target_key to the last component of the s3 key.
    target_key = s3_coord["key"].split("/")[-1]

    with get_temp_file_name() as tmp_file:
        boto3.client("s3").download_file(
            Bucket=s3_coord["bucket"], Key=s3_coord["key"], Filename=tmp_file
        )

        target_path = os.path.join(file_cache_folder(), target_key)
        with open(tmp_file, "rb") as tmp_file_object:
            with open(target_path, "wb") as target_file_object:
                shutil.copyfileobj(tmp_file_object, target_file_object)
                return target_path
Beispiel #14
0
def test_snapshot_command_repository():
    with get_temp_file_name() as output_file:
        runner = CliRunner()
        result = runner.invoke(
            repository_snapshot_command,
            [
                output_file, '-y',
                file_relative_path(__file__, 'repository_file.yaml')
            ],
        )
        assert result.exit_code == 0
        # Now that we have the snapshot make sure that it can be properly deserialized
        messages = list(ipc_read_event_stream(output_file))
        assert len(messages) == 1
        external_repository_data = messages[0]
        assert isinstance(external_repository_data, ExternalRepositoryData)
        assert external_repository_data.name == 'bar'
        assert len(external_repository_data.external_pipeline_datas) == 2
Beispiel #15
0
def sync_list_repositories(executable_path, python_file, module_name):
    from dagster.cli.api import ListRepositoriesResponse

    with get_temp_file_name() as output_file:
        parts = [
            executable_path,
            '-m',
            'dagster',
            'api',
            'snapshot',
            'list_repositories',
            output_file,
        ] + (['-f', python_file]
             if python_file else ['--module-name', module_name])

        execute_command_in_subprocess(parts)

        response = read_unary_response(output_file)

        return check.inst(response, ListRepositoriesResponse)
Beispiel #16
0
def sync_get_external_repositories(repository_location_handle):
    check.inst_param(
        repository_location_handle, 'repository_location_handle', RepositoryLocationHandle,
    )

    check.param_invariant(
        isinstance(repository_location_handle, PythonEnvRepositoryLocationHandle),
        'repository_location_handle',
    )

    repos = []

    for key, pointer in repository_location_handle.repository_code_pointer_dict.items():
        with get_temp_file_name() as output_file:

            parts = [
                repository_location_handle.executable_path,
                '-m',
                'dagster',
                'api',
                'snapshot',
                'repository',
                output_file,
            ] + xplat_shlex_split(pointer.get_cli_args())

            execute_command_in_subprocess(parts)

            external_repository_data = read_unary_response(output_file)
            check.inst(external_repository_data, ExternalRepositoryData)

            repository_handle = RepositoryHandle(
                repository_name=external_repository_data.name,
                repository_key=key,
                repository_location_handle=repository_location_handle,
            )

            repos.append(ExternalRepository(external_repository_data, repository_handle))

    return repos
Beispiel #17
0
def sync_get_external_repository(location_handle):
    check.inst_param(location_handle, 'location_handle', LocationHandle)

    with get_temp_file_name() as output_file:

        parts = ['dagster', 'api', 'snapshot', 'repository', output_file] + xplat_shlex_split(
            location_handle.pointer.get_cli_args()
        )
        returncode = subprocess.check_call(parts)
        check.invariant(returncode == 0, 'dagster api cli invocation did not complete successfully')

        messages = list(ipc_read_event_stream(output_file))
        check.invariant(len(messages) == 1)

        external_repository_data = messages[0]

        check.inst(external_repository_data, ExternalRepositoryData)

        return ExternalRepository(
            external_repository_data,
            RepositoryHandle(external_repository_data.name, location_handle),
        )