Esempio n. 1
0
    def add_writer_state(self,
                         graph,
                         after,
                         features,
                         key_columns=None,
                         timestamp_key=None):
        from storey import V3ioDriver

        endpoint, uri = parse_v3io_path(self._target_path)
        column_list = list(features.keys())
        if timestamp_key and timestamp_key not in column_list:
            column_list = [timestamp_key] + column_list
        for key in reversed(key_columns):
            if key not in column_list:
                column_list.insert(0, key)
        graph.add_step(
            name="WriteToStream",
            after=after,
            graph_shape="cylinder",
            class_name="storey.WriteToV3IOStream",
            columns=column_list,
            storage=V3ioDriver(webapi=endpoint),
            stream_path=uri,
            **self.attributes,
        )
Esempio n. 2
0
    def add_writer_step(
        self,
        graph,
        after,
        features,
        key_columns=None,
        timestamp_key=None,
        featureset_status=None,
    ):
        from storey import V3ioDriver

        key_columns = list(key_columns.keys())
        endpoint, uri = parse_v3io_path(self._target_path)
        column_list = self._get_column_list(features=features,
                                            timestamp_key=timestamp_key,
                                            key_columns=key_columns)

        graph.add_step(
            name=self.name or "StreamTarget",
            after=after,
            graph_shape="cylinder",
            class_name="storey.StreamTarget",
            columns=column_list,
            storage=V3ioDriver(webapi=endpoint),
            stream_path=uri,
            **self.attributes,
        )
Esempio n. 3
0
    def get_table(self, uri):
        """get storey Table object by uri"""
        try:
            from storey import Driver, Table, V3ioDriver
        except ImportError:
            raise ImportError(
                "storey package is not installed, use pip install storey")

        if uri in self._tabels:
            return self._tabels[uri]
        if uri in [".", ""
                   ] or uri.startswith("$"):  # $.. indicates in-mem table
            self._tabels[uri] = Table("", Driver())
            return self._tabels[uri]

        if uri.startswith("v3io://") or uri.startswith("v3ios://"):
            endpoint, uri = parse_v3io_path(uri)
            self._tabels[uri] = Table(uri, V3ioDriver(webapi=endpoint))
            return self._tabels[uri]

        if is_store_uri(uri):
            resource = get_store_resource(uri)
            if resource.kind in [
                    mlrun.api.schemas.ObjectKind.feature_set.value,
                    mlrun.api.schemas.ObjectKind.feature_vector.value,
            ]:
                target = get_online_target(resource)
                if not target:
                    raise mlrun.errors.MLRunInvalidArgumentError(
                        f"resource {uri} does not have an online data source")
                self._tabels[uri] = target.get_table_object()
                return self._tabels[uri]

        raise mlrun.errors.MLRunInvalidArgumentError(
            f"table {uri} not found in cache")
Esempio n. 4
0
    def get_table_object(self):
        from storey import Table, V3ioDriver

        # TODO use options/cred
        endpoint, uri = parse_v3io_path(self._target_path)
        return Table(
            uri,
            V3ioDriver(webapi=endpoint),
            flush_interval_secs=mlrun.mlconf.feature_store.flush_interval,
        )
Esempio n. 5
0
    def get_table(self, uri):
        """get storey Table object by uri"""
        try:
            from storey import Table, Driver, V3ioDriver
        except ImportError:
            raise ImportError(
                "storey package is not installed, use pip install storey")

        if uri in self._tabels:
            return self._tabels[uri]
        if uri in [".", ""]:
            self._tabels[uri] = Table("", Driver())
            return self._tabels[uri]

        if uri.startswith("v3io://") or uri.startswith("v3ios://"):
            endpoint, uri = parse_v3io_path(uri)
            self._tabels[uri] = Table(uri, V3ioDriver(webapi=endpoint))
            return self._tabels[uri]

        # todo: map store:// uri's to Table objects

        raise ValueError(f"table {uri} not found in cache")
Esempio n. 6
0
    def get_table_object(self):
        from storey import Table, V3ioDriver

        # TODO use options/cred
        endpoint, uri = parse_v3io_path(self._target_path)
        return Table(uri, V3ioDriver(webapi=endpoint))