Esempio n. 1
0
 def gen_data_instance(self, table, namespace):
     reader = None
     if self.workflow_param.dataio_param.input_format == "dense":
         reader = DenseFeatureReader(self.workflow_param.dataio_param)
     else:
         reader = SparseFeatureReader(self.workflow_param.dataio_param)
     data_instance = reader.read_data(table, namespace)
     return data_instance
Esempio n. 2
0
 def test_sparse_output_format(self):
     dataio_param = DataIOParam()
     dataio_param.output_format = "sparse"
     reader = DenseFeatureReader(dataio_param)
     data = reader.read_data(self.table, self.namespace).collect()
     result = dict(data)
     vala = result['a']
     features = vala.features
     self.assertTrue(type(features).__name__ == "SparseVector")
     self.assertTrue(len(features.sparse_vec) == 4)
     self.assertTrue(features.shape == 6)
Esempio n. 3
0
 def test_with_label(self):
     dataio_param = DataIOParam()
     dataio_param.with_label = True
     dataio_param.label_idx = 2
     reader = DenseFeatureReader(dataio_param)
     data = reader.read_data(self.table, self.namespace).collect()
     result = dict(data)
     vala = result['a']
     label = vala.label
     features = vala.features
     self.assertTrue(label == -1)
     self.assertTrue(features.shape[0] == 5)
Esempio n. 4
0
    def gen_data_instance(self, table, namespace, mode="fit"):
        reader = None
        if self.workflow_param.dataio_param.input_format == "dense":
            reader = DenseFeatureReader(self.workflow_param.dataio_param)
        elif self.workflow_param.dataio_param.input_format == "sparse":
            reader = SparseFeatureReader(self.workflow_param.dataio_param)
        else:
            reader = SparseTagReader(self.workflow_param.dataio_param)

        LOGGER.debug("mode is {}".format(mode))

        if mode == "transform":
            reader.load_model(self.workflow_param.model_table,
                              self.workflow_param.model_namespace)

        data_instance = reader.read_data(table, namespace, mode=mode)

        if mode == "fit":
            save_result = reader.save_model(
                self.workflow_param.model_table,
                self.workflow_param.model_namespace)

            for meta_buffer_type, param_buffer_type in save_result:
                self.pipeline.node_meta.append(meta_buffer_type)
                self.pipeline.node_param.append(param_buffer_type)

        return data_instance
Esempio n. 5
0
 def test_missing_value_fill(self):
     dataio_param = DataIOParam()
     dataio_param.missing_fill = True
     dataio_param.with_label = False
     dataio_param.output_format = "sparse"
     dataio_param.default_value = 100
     dataio_param.missing_fill_method = "designated"
     dataio_param.data_type = 'int'
     reader = DenseFeatureReader(dataio_param)
     data = reader.read_data(self.table2, self.namespace).collect()
     result = dict(data)
     features = result['a'].features
     for i in range(1, 5):
         self.assertTrue(features.get_data(i) == 100)
Esempio n. 6
0
 def test_dense_output_format(self):
     dataio_param = DataIOParam()
     reader = DenseFeatureReader(dataio_param)
     data = reader.read_data(self.table, self.namespace).collect()
     result = dict(data)
     self.assertTrue(type(result['a']).__name__ == "Instance")
     self.assertTrue(type(result['b']).__name__ == "Instance")
     vala = result['a']
     features = vala.features
     weight = vala.weight
     label = vala.label
     self.assertTrue(np.abs(weight - 1.0) < consts.FLOAT_ZERO)
     self.assertTrue(type(features).__name__ == "ndarray")
     self.assertTrue(label == None)
     self.assertTrue(features.shape[0] == 6)
     self.assertTrue(features.dtype == "float64")