Beispiel #1
0
    def add_dependencies(self, bundles, arg_names=None):
        """ Add one or more upstream bundles as dependencies

        Note: Metadata for correct re-use of re-execution semantics.

        Args:
            bundles (Union[list `api.Bundle`, `api.Bundle`]): Another bundle that may have been used to produce this one
            arg_names (Union[list str, str]): Optional argument names of the dependencies.  Default 'arg_<i>' used.

        Returns:
            self
        """
        self._check_open()
        curr_count = LineageRecord.dependency_count(self.pb.lineage)
        if isinstance(bundles, collections.Iterable):
            if arg_names is None:
                arg_names = [
                    '_arg_{}'.format(i)
                    for i in range(0 + curr_count,
                                   len(bundles) + curr_count)
                ]
            LineageRecord.add_deps_to_lr(
                self.pb.lineage, [(b.processing_name, b.uuid, an)
                                  for an, b in zip(arg_names, bundles)])
        else:
            if arg_names is None:
                arg_names = '_arg_{}'.format(curr_count)
            LineageRecord.add_deps_to_lr(
                self.pb.lineage,
                [(bundles.processing_name, bundles.uuid, arg_names)])
        return self
Beispiel #2
0
    def _close(self):
        """ Write out this bundle as a hyperframe.

        Parse the data, set presentation, create lineage, and
        write to disk.

        This closes the bundle so it may not be re-used.

        Returns:
            None
        """

        try:

            presentation, frames = PipeBase.parse_return_val(self.uuid, self.data, self.data_context)

            self.add_frames(frames)

            self.pb.presentation = presentation

            # TODO: we should let user decide which file under git or explicitly set hash
            if False:
                pipeline_path = os.path.dirname(sys.modules[BundleWrapperTask.__module__].__file__)
                cv = DisdatFS().get_pipe_version(pipeline_path)
            else:
                cv = disdat.fs.CodeVersion(semver="0.1.0", hash="unknown", tstamp="unknown", branch="unknown",
                                      url="unknown", dirty="unknown")

            lr = LineageRecord(hframe_name=self._set_processing_name(), # <--- setting processing name
                               hframe_uuid=self.uuid,
                               code_repo=cv.url,
                               code_name='unknown',
                               code_semver=cv.semver,
                               code_hash=cv.hash,
                               code_branch=cv.branch,
                               code_method='unknown', # TODO: capture pkg.mod.class.method that creates bundle
                               depends_on=self.depends_on)

            self.add_lineage(lr)

            self.replace_tags(self.tags)

            self.data_context.write_hframe(self)

        except Exception as error:
            """ If we fail for any reason, remove bundle dir and raise """
            PipeBase.rm_bundle_dir(self.local_dir, self.uuid, []) # [] means no db-targets
            raise

        self.closed = True
        self.open = False

        return self
Beispiel #3
0
    def make_hframe(output_frames, output_bundle_uuid, depends_on,
                    human_name, processing_name, class_to_version, tags=None, presentation=hyperframe_pb2.DEFAULT):
        """
        Create HyperFrameRecord or HFR
        HFR contains a LineageRecord
        HFR contains UUIDs of FrameRecords or FRs
        FR contains data or LinkRecords

        Use the pipe_task to look in the path cache for the output directory
        Use the pipe_task outputs to find the named file for the final HF proto buf file.
        Write out all Frames, and at the very last moment, write out the HF proto buff.

        Args:
            output_frames (:list:`FrameRecord`):  List of frames to be placed in bundle / hframe
            output_bundle_uuid:
            depends_on (:list:tuple):  must be the processing_name, uuid of the upstream pipes / base bundles
            human_name:
            processing_name:
            class_to_version: A python class whose file is under git control
            tags:
            presentation (enum):  how to present this hframe when we use it as input to a function -- default None

            That default means it will be a HF, but it wasn't a "presentable" hyperframe.

        Returns:
            `HyperFrameRecord`
        """

        # Grab code version and path cache entry -- only called if we ran
        cv = get_pipe_version(class_to_version)

        lr = LineageRecord(hframe_name=processing_name,
                           hframe_uuid=output_bundle_uuid,
                           code_repo=cv.url,
                           code_name='unknown',
                           code_semver=cv.semver,
                           code_hash=cv.hash,
                           code_branch=cv.branch,
                           depends_on=depends_on)

        hfr = HyperFrameRecord(owner=getpass.getuser(),
                               human_name=human_name,
                               processing_name=processing_name,
                               uuid=output_bundle_uuid,
                               frames=output_frames,
                               lin_obj=lr,
                               tags=tags,
                               presentation=presentation)

        return hfr
Beispiel #4
0
    def _close(self):
        """ Write out this bundle as a hyperframe.

        Parse the data, set presentation, create lineage, and
        write to disk.

        This closes the bundle so it may not be re-used.

        Returns:
            None
        """

        try:

            presentation, frames = PipeBase.parse_return_val(
                self.uuid, self.data, self.data_context)

            self.add_frames(frames)

            self.pb.presentation = presentation

            cv = get_pipe_version(BundleWrapperTask)

            lr = LineageRecord(
                hframe_name=self._set_processing_name(
                ),  # <--- setting processing name
                hframe_uuid=self.uuid,
                code_repo=cv.url,
                code_name='unknown',
                code_semver=cv.semver,
                code_hash=cv.hash,
                code_branch=cv.branch,
                depends_on=self.depends_on)

            self.add_lineage(lr)

            self.replace_tags(self.tags)

            self.data_context.write_hframe(self)

        except Exception as error:
            """ If we fail for any reason, remove bundle dir and raise """
            PipeBase.rm_bundle_dir(self.local_dir, self.uuid, self.db_targets)
            raise

        self.closed = True
        self.open = False

        return self