def add_writer_state(self, graph, after, features, key_columns=None, timestamp_key=None): from storey import V3ioDriver endpoint, uri = parse_v3io_path(self._target_path) column_list = list(features.keys()) if timestamp_key and timestamp_key not in column_list: column_list = [timestamp_key] + column_list for key in reversed(key_columns): if key not in column_list: column_list.insert(0, key) graph.add_step( name="WriteToStream", after=after, graph_shape="cylinder", class_name="storey.WriteToV3IOStream", columns=column_list, storage=V3ioDriver(webapi=endpoint), stream_path=uri, **self.attributes, )
def add_writer_step( self, graph, after, features, key_columns=None, timestamp_key=None, featureset_status=None, ): from storey import V3ioDriver key_columns = list(key_columns.keys()) endpoint, uri = parse_v3io_path(self._target_path) column_list = self._get_column_list(features=features, timestamp_key=timestamp_key, key_columns=key_columns) graph.add_step( name=self.name or "StreamTarget", after=after, graph_shape="cylinder", class_name="storey.StreamTarget", columns=column_list, storage=V3ioDriver(webapi=endpoint), stream_path=uri, **self.attributes, )
def get_table(self, uri): """get storey Table object by uri""" try: from storey import Driver, Table, V3ioDriver except ImportError: raise ImportError( "storey package is not installed, use pip install storey") if uri in self._tabels: return self._tabels[uri] if uri in [".", "" ] or uri.startswith("$"): # $.. indicates in-mem table self._tabels[uri] = Table("", Driver()) return self._tabels[uri] if uri.startswith("v3io://") or uri.startswith("v3ios://"): endpoint, uri = parse_v3io_path(uri) self._tabels[uri] = Table(uri, V3ioDriver(webapi=endpoint)) return self._tabels[uri] if is_store_uri(uri): resource = get_store_resource(uri) if resource.kind in [ mlrun.api.schemas.ObjectKind.feature_set.value, mlrun.api.schemas.ObjectKind.feature_vector.value, ]: target = get_online_target(resource) if not target: raise mlrun.errors.MLRunInvalidArgumentError( f"resource {uri} does not have an online data source") self._tabels[uri] = target.get_table_object() return self._tabels[uri] raise mlrun.errors.MLRunInvalidArgumentError( f"table {uri} not found in cache")
def get_table_object(self): from storey import Table, V3ioDriver # TODO use options/cred endpoint, uri = parse_v3io_path(self._target_path) return Table( uri, V3ioDriver(webapi=endpoint), flush_interval_secs=mlrun.mlconf.feature_store.flush_interval, )
def get_table(self, uri): """get storey Table object by uri""" try: from storey import Table, Driver, V3ioDriver except ImportError: raise ImportError( "storey package is not installed, use pip install storey") if uri in self._tabels: return self._tabels[uri] if uri in [".", ""]: self._tabels[uri] = Table("", Driver()) return self._tabels[uri] if uri.startswith("v3io://") or uri.startswith("v3ios://"): endpoint, uri = parse_v3io_path(uri) self._tabels[uri] = Table(uri, V3ioDriver(webapi=endpoint)) return self._tabels[uri] # todo: map store:// uri's to Table objects raise ValueError(f"table {uri} not found in cache")
def get_table_object(self): from storey import Table, V3ioDriver # TODO use options/cred endpoint, uri = parse_v3io_path(self._target_path) return Table(uri, V3ioDriver(webapi=endpoint))