def main(step_run_ref_filepath, pipeline_zip): # Extract any zip files to a temporary directory and add that temporary directory # to the site path so the contained files can be imported. # # We can't rely on pip or other packaging tools because the zipped files might not # even be Python packages. with tempfile.TemporaryDirectory() as tmp: print('Extracting {}'.format(pipeline_zip)) with zipfile.ZipFile(pipeline_zip) as zf: zf.extractall(tmp) site.addsitedir(tmp) print('Loading step run ref') # We can use regular local filesystem APIs to access DBFS inside the Databricks runtime. with open(step_run_ref_filepath, 'rb') as handle: step_run_ref = pickle.load(handle) print('Step run ref:') print(step_run_ref) print('Setting up storage credentials') setup_storage(step_run_ref) print('Running pipeline') events = list(run_step_from_ref(step_run_ref)) print('Saving events to DBFS') events_filepath = os.path.dirname( step_run_ref_filepath) + '/' + PICKLED_EVENTS_FILE_NAME with open(events_filepath, 'wb') as handle: pickle.dump(serialize_value(events), handle)
def main(step_run_ref_filepath, pipeline_zip): # Extract any zip files to a temporary directory and add that temporary directory # to the site path so the contained files can be imported. # # We can't rely on pip or other packaging tools because the zipped files might not # even be Python packages. with tempfile.TemporaryDirectory() as tmp: with zipfile.ZipFile(pipeline_zip) as zf: zf.extractall(tmp) site.addsitedir(tmp) # We can use regular local filesystem APIs to access DBFS inside the Databricks runtime. with open(step_run_ref_filepath, "rb") as handle: step_run_ref = pickle.load(handle) setup_storage(step_run_ref) with DagsterInstance.ephemeral() as instance: events = list(run_step_from_ref(step_run_ref, instance)) events_filepath = os.path.dirname( step_run_ref_filepath) + "/" + PICKLED_EVENTS_FILE_NAME with open(events_filepath, "wb") as handle: pickle.dump(serialize_value(events), handle)
def main(step_run_ref_path: str) -> None: file_manager = LocalFileManager(".") file_handle = LocalFileHandle(step_run_ref_path) step_run_ref = pickle.loads(file_manager.read_data(file_handle)) all_events: List[EventLogEntry] = [] try: instance = external_instance_from_step_run_ref( step_run_ref, event_listener_fn=all_events.append) # consume entire step iterator list(run_step_from_ref(step_run_ref, instance)) finally: events_out_path = os.path.join(os.path.dirname(step_run_ref_path), PICKLED_EVENTS_FILE_NAME) with open(events_out_path, "wb") as events_file: pickle.dump(serialize_value(all_events), events_file)
def put_events(events): with open(events_filepath, "wb") as handle: pickle.dump(serialize_value(events), handle)
def default_value_as_json_str(self): check.invariant(self.default_provided, 'Asking for default value when none was provided') return serialize_value(self.default_value)
def test_deserialize_empty_set(): assert set() == deserialize_value(serialize_value(set())) assert frozenset() == deserialize_value(serialize_value(frozenset()))