Beispiel #1
0
def main(step_run_ref_filepath, pipeline_zip):
    # Extract any zip files to a temporary directory and add that temporary directory
    # to the site path so the contained files can be imported.
    #
    # We can't rely on pip or other packaging tools because the zipped files might not
    # even be Python packages.
    with tempfile.TemporaryDirectory() as tmp:

        print('Extracting {}'.format(pipeline_zip))
        with zipfile.ZipFile(pipeline_zip) as zf:
            zf.extractall(tmp)
        site.addsitedir(tmp)

        print('Loading step run ref')
        # We can use regular local filesystem APIs to access DBFS inside the Databricks runtime.
        with open(step_run_ref_filepath, 'rb') as handle:
            step_run_ref = pickle.load(handle)

        print('Step run ref:')
        print(step_run_ref)

        print('Setting up storage credentials')
        setup_storage(step_run_ref)

        print('Running pipeline')
        events = list(run_step_from_ref(step_run_ref))

    print('Saving events to DBFS')
    events_filepath = os.path.dirname(
        step_run_ref_filepath) + '/' + PICKLED_EVENTS_FILE_NAME
    with open(events_filepath, 'wb') as handle:
        pickle.dump(serialize_value(events), handle)
def main(step_run_ref_filepath, pipeline_zip):
    # Extract any zip files to a temporary directory and add that temporary directory
    # to the site path so the contained files can be imported.
    #
    # We can't rely on pip or other packaging tools because the zipped files might not
    # even be Python packages.
    with tempfile.TemporaryDirectory() as tmp:

        with zipfile.ZipFile(pipeline_zip) as zf:
            zf.extractall(tmp)
        site.addsitedir(tmp)

        # We can use regular local filesystem APIs to access DBFS inside the Databricks runtime.
        with open(step_run_ref_filepath, "rb") as handle:
            step_run_ref = pickle.load(handle)

        setup_storage(step_run_ref)

        with DagsterInstance.ephemeral() as instance:
            events = list(run_step_from_ref(step_run_ref, instance))

    events_filepath = os.path.dirname(
        step_run_ref_filepath) + "/" + PICKLED_EVENTS_FILE_NAME
    with open(events_filepath, "wb") as handle:
        pickle.dump(serialize_value(events), handle)
Beispiel #3
0
def main(step_run_ref_path: str) -> None:
    file_manager = LocalFileManager(".")
    file_handle = LocalFileHandle(step_run_ref_path)
    step_run_ref = pickle.loads(file_manager.read_data(file_handle))

    all_events: List[EventLogEntry] = []

    try:
        instance = external_instance_from_step_run_ref(
            step_run_ref, event_listener_fn=all_events.append)
        # consume entire step iterator
        list(run_step_from_ref(step_run_ref, instance))
    finally:
        events_out_path = os.path.join(os.path.dirname(step_run_ref_path),
                                       PICKLED_EVENTS_FILE_NAME)
        with open(events_out_path, "wb") as events_file:
            pickle.dump(serialize_value(all_events), events_file)
Beispiel #4
0
 def put_events(events):
     with open(events_filepath, "wb") as handle:
         pickle.dump(serialize_value(events), handle)
Beispiel #5
0
 def default_value_as_json_str(self):
     check.invariant(self.default_provided,
                     'Asking for default value when none was provided')
     return serialize_value(self.default_value)
Beispiel #6
0
def test_deserialize_empty_set():
    assert set() == deserialize_value(serialize_value(set()))
    assert frozenset() == deserialize_value(serialize_value(frozenset()))