def watch(self, run_id, key): key = self._key(run_id, key) if key in self._watchers: return update_paths = [ self._manager.get_local_path(run_id, key, ComputeIOType.STDOUT), self._manager.get_local_path(run_id, key, ComputeIOType.STDERR), ] complete_paths = [self._manager.complete_artifact_path(run_id, key)] directory = os.path.dirname( self._manager.get_local_path(run_id, key, ComputeIOType.STDERR)) ensure_dir(directory) self._watchers[key] = self._observer.schedule( LocalComputeLogFilesystemEventHandler(self, run_id, key, update_paths, complete_paths), str(directory), )
def mirror_step_io(step_context): # https://github.com/dagster-io/dagster/issues/1698 if not should_capture_stdout(step_context.instance): yield return check.inst_param(step_context, 'step_context', SystemStepExecutionContext) filebase = _filebase(step_context.instance, step_context.run_id, step_context.step.key) outpath = _filepath(filebase, IO_TYPE_STDOUT) errpath = _filepath(filebase, IO_TYPE_STDERR) touchpath = _filepath(filebase, IO_TYPE_COMPLETE) ensure_dir(os.path.dirname(outpath)) ensure_dir(os.path.dirname(errpath)) with mirror_stream(sys.stderr, errpath): with mirror_stream(sys.stdout, outpath): yield # touch the file to signify that compute is complete touch_file(touchpath)
def test_compute_log_manager_subscription_updates(): from dagster.core.storage.local_compute_log_manager import LocalComputeLogManager with tempfile.TemporaryDirectory() as temp_dir: compute_log_manager = LocalComputeLogManager(temp_dir, polling_timeout=0.5) run_id = "fake_run_id" step_key = "spew" stdout_path = compute_log_manager.get_local_path( run_id, step_key, ComputeIOType.STDOUT) # make sure the parent directory to be watched exists, file exists ensure_dir(os.path.dirname(stdout_path)) touch_file(stdout_path) # set up the subscription messages = [] observable = compute_log_manager.observable(run_id, step_key, ComputeIOType.STDOUT) observable.subscribe(messages.append) # returns a single update, with 0 data assert len(messages) == 1 last_chunk = messages[-1] assert not last_chunk.data assert last_chunk.cursor == 0 with open(stdout_path, "a+") as f: print(HELLO_SOLID, file=f) # pylint:disable=print-call # wait longer than the watchdog timeout time.sleep(1) assert len(messages) == 2 last_chunk = messages[-1] assert last_chunk.data assert last_chunk.cursor > 0
def _download_to_local(self, run_id, key, io_type): path = self.get_local_path(run_id, key, io_type) ensure_dir(os.path.dirname(path)) with open(path, "wb") as fileobj: self._bucket.blob(self._bucket_key(run_id, key, io_type)).download_to_file(fileobj)