def add_dependencies(self, bundles, arg_names=None): """ Add one or more upstream bundles as dependencies Note: Metadata for correct re-use of re-execution semantics. Args: bundles (Union[list `api.Bundle`, `api.Bundle`]): Another bundle that may have been used to produce this one arg_names (Union[list str, str]): Optional argument names of the dependencies. Default 'arg_<i>' used. Returns: self """ self._check_open() curr_count = LineageRecord.dependency_count(self.pb.lineage) if isinstance(bundles, collections.Iterable): if arg_names is None: arg_names = [ '_arg_{}'.format(i) for i in range(0 + curr_count, len(bundles) + curr_count) ] LineageRecord.add_deps_to_lr( self.pb.lineage, [(b.processing_name, b.uuid, an) for an, b in zip(arg_names, bundles)]) else: if arg_names is None: arg_names = '_arg_{}'.format(curr_count) LineageRecord.add_deps_to_lr( self.pb.lineage, [(bundles.processing_name, bundles.uuid, arg_names)]) return self
def _close(self): """ Write out this bundle as a hyperframe. Parse the data, set presentation, create lineage, and write to disk. This closes the bundle so it may not be re-used. Returns: None """ try: presentation, frames = PipeBase.parse_return_val(self.uuid, self.data, self.data_context) self.add_frames(frames) self.pb.presentation = presentation # TODO: we should let user decide which file under git or explicitly set hash if False: pipeline_path = os.path.dirname(sys.modules[BundleWrapperTask.__module__].__file__) cv = DisdatFS().get_pipe_version(pipeline_path) else: cv = disdat.fs.CodeVersion(semver="0.1.0", hash="unknown", tstamp="unknown", branch="unknown", url="unknown", dirty="unknown") lr = LineageRecord(hframe_name=self._set_processing_name(), # <--- setting processing name hframe_uuid=self.uuid, code_repo=cv.url, code_name='unknown', code_semver=cv.semver, code_hash=cv.hash, code_branch=cv.branch, code_method='unknown', # TODO: capture pkg.mod.class.method that creates bundle depends_on=self.depends_on) self.add_lineage(lr) self.replace_tags(self.tags) self.data_context.write_hframe(self) except Exception as error: """ If we fail for any reason, remove bundle dir and raise """ PipeBase.rm_bundle_dir(self.local_dir, self.uuid, []) # [] means no db-targets raise self.closed = True self.open = False return self
def make_hframe(output_frames, output_bundle_uuid, depends_on, human_name, processing_name, class_to_version, tags=None, presentation=hyperframe_pb2.DEFAULT): """ Create HyperFrameRecord or HFR HFR contains a LineageRecord HFR contains UUIDs of FrameRecords or FRs FR contains data or LinkRecords Use the pipe_task to look in the path cache for the output directory Use the pipe_task outputs to find the named file for the final HF proto buf file. Write out all Frames, and at the very last moment, write out the HF proto buff. Args: output_frames (:list:`FrameRecord`): List of frames to be placed in bundle / hframe output_bundle_uuid: depends_on (:list:tuple): must be the processing_name, uuid of the upstream pipes / base bundles human_name: processing_name: class_to_version: A python class whose file is under git control tags: presentation (enum): how to present this hframe when we use it as input to a function -- default None That default means it will be a HF, but it wasn't a "presentable" hyperframe. Returns: `HyperFrameRecord` """ # Grab code version and path cache entry -- only called if we ran cv = get_pipe_version(class_to_version) lr = LineageRecord(hframe_name=processing_name, hframe_uuid=output_bundle_uuid, code_repo=cv.url, code_name='unknown', code_semver=cv.semver, code_hash=cv.hash, code_branch=cv.branch, depends_on=depends_on) hfr = HyperFrameRecord(owner=getpass.getuser(), human_name=human_name, processing_name=processing_name, uuid=output_bundle_uuid, frames=output_frames, lin_obj=lr, tags=tags, presentation=presentation) return hfr
def _close(self): """ Write out this bundle as a hyperframe. Parse the data, set presentation, create lineage, and write to disk. This closes the bundle so it may not be re-used. Returns: None """ try: presentation, frames = PipeBase.parse_return_val( self.uuid, self.data, self.data_context) self.add_frames(frames) self.pb.presentation = presentation cv = get_pipe_version(BundleWrapperTask) lr = LineageRecord( hframe_name=self._set_processing_name( ), # <--- setting processing name hframe_uuid=self.uuid, code_repo=cv.url, code_name='unknown', code_semver=cv.semver, code_hash=cv.hash, code_branch=cv.branch, depends_on=self.depends_on) self.add_lineage(lr) self.replace_tags(self.tags) self.data_context.write_hframe(self) except Exception as error: """ If we fail for any reason, remove bundle dir and raise """ PipeBase.rm_bundle_dir(self.local_dir, self.uuid, self.db_targets) raise self.closed = True self.open = False return self