def gen_data_instance(self, table, namespace): reader = None if self.workflow_param.dataio_param.input_format == "dense": reader = DenseFeatureReader(self.workflow_param.dataio_param) else: reader = SparseFeatureReader(self.workflow_param.dataio_param) data_instance = reader.read_data(table, namespace) return data_instance
def test_sparse_output_format(self): dataio_param = DataIOParam() dataio_param.output_format = "sparse" reader = DenseFeatureReader(dataio_param) data = reader.read_data(self.table, self.namespace).collect() result = dict(data) vala = result['a'] features = vala.features self.assertTrue(type(features).__name__ == "SparseVector") self.assertTrue(len(features.sparse_vec) == 4) self.assertTrue(features.shape == 6)
def test_with_label(self): dataio_param = DataIOParam() dataio_param.with_label = True dataio_param.label_idx = 2 reader = DenseFeatureReader(dataio_param) data = reader.read_data(self.table, self.namespace).collect() result = dict(data) vala = result['a'] label = vala.label features = vala.features self.assertTrue(label == -1) self.assertTrue(features.shape[0] == 5)
def gen_data_instance(self, table, namespace, mode="fit"): reader = None if self.workflow_param.dataio_param.input_format == "dense": reader = DenseFeatureReader(self.workflow_param.dataio_param) elif self.workflow_param.dataio_param.input_format == "sparse": reader = SparseFeatureReader(self.workflow_param.dataio_param) else: reader = SparseTagReader(self.workflow_param.dataio_param) LOGGER.debug("mode is {}".format(mode)) if mode == "transform": reader.load_model(self.workflow_param.model_table, self.workflow_param.model_namespace) data_instance = reader.read_data(table, namespace, mode=mode) if mode == "fit": save_result = reader.save_model( self.workflow_param.model_table, self.workflow_param.model_namespace) for meta_buffer_type, param_buffer_type in save_result: self.pipeline.node_meta.append(meta_buffer_type) self.pipeline.node_param.append(param_buffer_type) return data_instance
def test_missing_value_fill(self): dataio_param = DataIOParam() dataio_param.missing_fill = True dataio_param.with_label = False dataio_param.output_format = "sparse" dataio_param.default_value = 100 dataio_param.missing_fill_method = "designated" dataio_param.data_type = 'int' reader = DenseFeatureReader(dataio_param) data = reader.read_data(self.table2, self.namespace).collect() result = dict(data) features = result['a'].features for i in range(1, 5): self.assertTrue(features.get_data(i) == 100)
def test_dense_output_format(self): dataio_param = DataIOParam() reader = DenseFeatureReader(dataio_param) data = reader.read_data(self.table, self.namespace).collect() result = dict(data) self.assertTrue(type(result['a']).__name__ == "Instance") self.assertTrue(type(result['b']).__name__ == "Instance") vala = result['a'] features = vala.features weight = vala.weight label = vala.label self.assertTrue(np.abs(weight - 1.0) < consts.FLOAT_ZERO) self.assertTrue(type(features).__name__ == "ndarray") self.assertTrue(label == None) self.assertTrue(features.shape[0] == 6) self.assertTrue(features.dtype == "float64")