Ejemplo n.º 1
0
def main(step_run_ref_bucket, s3_dir_key):
    session = boto3.client("s3")
    file_manager = S3FileManager(session, step_run_ref_bucket, "")
    file_handle = S3FileHandle(step_run_ref_bucket, s3_dir_key)
    step_run_ref_data = file_manager.read_data(file_handle)

    step_run_ref = pickle.loads(step_run_ref_data)

    events_bucket = step_run_ref_bucket
    events_s3_key = os.path.dirname(
        s3_dir_key) + "/" + PICKLED_EVENTS_FILE_NAME

    def put_events(events):
        file_obj = io.BytesIO(pickle.dumps(events))
        session.put_object(Body=file_obj,
                           Bucket=events_bucket,
                           Key=events_s3_key)

    # Set up a thread to handle writing events back to the plan process, so execution doesn't get
    # blocked on remote communication
    events_queue = Queue()
    event_writing_thread = Thread(
        target=event_writing_loop,
        kwargs=dict(events_queue=events_queue, put_events_fn=put_events),
    )
    event_writing_thread.start()

    with DagsterInstance.ephemeral() as instance:
        try:
            for event in run_step_from_ref(step_run_ref, instance):
                events_queue.put(event)
        finally:
            events_queue.put(DONE)
            event_writing_thread.join()
Ejemplo n.º 2
0
def test_s3_file_manager_read():
    state = {'called': 0}
    bar_bytes = 'bar'.encode()

    class S3Mock(mock.MagicMock):
        def download_file(self, *_args, **kwargs):
            state['called'] += 1
            assert state['called'] == 1
            state['bucket'] = kwargs.get('Bucket')
            state['key'] = kwargs.get('Key')
            file_name = kwargs.get('Filename')
            state['file_name'] = file_name
            with open(file_name, 'wb') as ff:
                ff.write(bar_bytes)

    s3_mock = S3Mock()
    file_manager = S3FileManager(s3_mock, 'some-bucket', 'some-key')
    file_handle = S3FileHandle('some-bucket', 'some-key/kdjfkjdkfjkd')
    with file_manager.read(file_handle) as file_obj:
        assert file_obj.read() == bar_bytes

    assert state['bucket'] == file_handle.s3_bucket
    assert state['key'] == file_handle.s3_key

    # read again. cached
    with file_manager.read(file_handle) as file_obj:
        assert file_obj.read() == bar_bytes

    assert os.path.exists(state['file_name'])

    file_manager.delete_local_temp()

    assert not os.path.exists(state['file_name'])
 def get_step_events(self, step_context, run_id, step_key):
     events_file_handle = S3FileHandle(
         self.staging_bucket,
         self._artifact_s3_key(run_id, step_key, PICKLED_EVENTS_FILE_NAME),
     )
     events_data = step_context.file_manager.read_data(events_file_handle)
     return pickle.loads(events_data)
Ejemplo n.º 4
0
def main(step_run_ref_bucket, s3_dir_key):
    session = boto3.client('s3')
    file_manager = S3FileManager(session, step_run_ref_bucket, '')
    file_handle = S3FileHandle(step_run_ref_bucket, s3_dir_key)
    step_run_ref_data = file_manager.read_data(file_handle)

    step_run_ref = pickle.loads(step_run_ref_data)

    events = list(run_step_from_ref(step_run_ref))
    file_obj = io.BytesIO(pickle.dumps(events))
    events_key = os.path.dirname(s3_dir_key) + '/' + PICKLED_EVENTS_FILE_NAME
    session.put_object(Body=file_obj,
                       Bucket=step_run_ref_bucket,
                       Key=events_key)
Ejemplo n.º 5
0
 def get_file_handle(self, file_key):
     check.str_param(file_key, 'file_key')
     return S3FileHandle(self.s3_bucket, self.get_full_key(file_key))