def test_dense_output_format(self):
     reader = DataIO()
     reader.set_tracker(self.tracker)
     component_params = {
         "DataIOParam": {
             "input_format": "dense"
         },
         "role": {
             "guest": [9999],
             "host": [10000],
             "arbiter": [10000]
         },
         "local": {
             "role": "guest",
             "party_id": 9999
         }
     }
     reader.run(component_params, self.args1)
     data = reader.save_data().collect()
     result = dict(data)
     self.assertTrue(type(result['a']).__name__ == "Instance")
     self.assertTrue(type(result['b']).__name__ == "Instance")
     vala = result['a']
     features = vala.features
     weight = vala.weight
     label = vala.label
     self.assertTrue(np.abs(weight - 1.0) < consts.FLOAT_ZERO)
     self.assertTrue(type(features).__name__ == "ndarray")
     self.assertTrue(label == None)
     self.assertTrue(features.shape[0] == 6)
     self.assertTrue(features.dtype == "float64")
 def test_sparse_output_format(self):
     reader = DataIO()
     reader.set_tracker(self.tracker)
     component_params = {
         "DataIOParam": {
             "output_format": "sparse",
             "input_format": "dense"
         },
         "role": {
             "guest": [9999],
             "host": [10000],
             "arbiter": [10000]
         },
         "local": {
             "role": "guest",
             "party_id": 9999
         }
     }
     reader.run(component_params, self.args1)
     data = reader.save_data().collect()
     result = dict(data)
     vala = result['a']
     features = vala.features
     self.assertTrue(type(features).__name__ == "SparseVector")
     self.assertTrue(len(features.sparse_vec) == 4)
     self.assertTrue(features.shape == 6)
 def test_with_label(self):
     reader = DataIO()
     reader.set_tracker(self.tracker)
     component_params = {
         "DataIOParam": {
             "output_format": "dense",
             "input_format": "dense",
             "with_label": True,
             "label_name": "x3"
         },
         "role": {
             "guest": [9999],
             "host": [10000],
             "arbiter": [10000]
         },
         "local": {
             "role": "guest",
             "party_id": 9999
         }
     }
     reader.run(component_params, self.args1)
     data = reader.save_data().collect()
     result = dict(data)
     vala = result['a']
     label = vala.label
     features = vala.features
     self.assertTrue(label == -1)
     self.assertTrue(features.shape[0] == 5)
    def test_sparse_output_format(self):
        reader = DataIO()
        reader.set_tracker(self.tracker)
        component_params = {
            "DataIOParam": {
                "output_format": "sparse",
                "input_format": "sparse",
                "delimitor": ' ',
                "defualt_value": 2**30
            },
            "role": {
                "guest": [9999],
                "host": [10000],
                "arbiter": [10000]
            },
            "local": {
                "role": "guest",
                "party_id": 9999
            }
        }
        reader.run(component_params, self.args)
        data = reader.save_data().collect()
        for i in range(100):
            self.assertTrue(
                insts[i][1].features.get_shape() == self.max_feature + 1)
            self.assertTrue(insts[i][1].label == i % 2)
            original_feat = {}
            row = self.data[i][1].split(" ")
            for j in range(1, len(row)):
                fid, val = row[j].split(":", -1)
                original_feat[int(fid)] = float(val)

            self.assertTrue(original_feat == insts[i][1].features.sparse_vec)
Esempio n. 5
0
    def test_sparse_output_format(self):
        dataio = DataIO()
        dataio.set_tracker(TrackerMock())
        component_params = {
            "DataIOParam": {
                "output_format": "sparse",
                "input_format": "sparse",
                "delimitor": ' '
            },
            "role": {
                "guest": [9999],
                "host": [10000],
                "arbiter": [10000]
            },
            "local": {
                "role": "guest",
                "party_id": 9999
            }
        }
        dataio.run(component_params, self.args)
        insts = list(dataio.save_data().collect())
        for i in range(100):
            features = insts[i][1].features
            self.assertTrue(type(features).__name__ == "SparseVector")
            self.assertTrue(features.get_shape() == self.max_feature + 1)
            self.assertTrue(insts[i][1].label == i % 2)

            row = self.data[i][1].split(" ")
            for j in range(1, len(row)):
                fid, val = row[j].split(":", -1)

                self.assertTrue(
                    np.fabs(features.get_data(int(fid)) -
                            float(val)) < consts.FLOAT_ZERO)
 def test_missing_value_fill(self):
     reader = DataIO()
     reader.set_tracker(self.tracker)
     component_params = {
         "DataIOParam": {
             "output_format": "sparse",
             "input_format": "dense",
             "default_value": 100,
             "with_label": False,
             "missing_fill": True,
             "missing_fill_method": "designated",
             "data_type": "int"
         },
         "role": {
             "guest": [9999],
             "host": [10000],
             "arbiter": [10000]
         },
         "local": {
             "role": "guest",
             "party_id": 9999
         }
     }
     reader.run(component_params, self.args2)
     data = reader.save_data().collect()
     result = dict(data)
     features = result['a'].features
     for i in range(1, 5):
         self.assertTrue(features.get_data(i) == 100)
Esempio n. 7
0
    def test_dense_output_format(self):
        dataio = DataIO()
        dataio.set_tracker(TrackerMock())
        component_params = {
            "output_format": "dense",
            "input_format": "sparse",
            "delimitor": ' '
        }
        cpn_input = get_cpn_input(self.dataset, component_params)
        dataio.run(cpn_input)
        insts = list(dataio.save_data().collect())
        for i in range(100):
            features = insts[i][1].features
            self.assertTrue(type(features).__name__ == "ndarray")
            self.assertTrue(features.shape[0] == self.max_feature + 1)
            self.assertTrue(insts[i][1].label == i % 2)

            row = self.data[i][1].split(" ")
            ori_feat = [0 for i in range(self.max_feature + 1)]
            for j in range(1, len(row)):
                fid, val = row[j].split(":", -1)
                ori_feat[int(fid)] = float(val)

            ori_feat = np.asarray(ori_feat, dtype="float64")

            self.assertTrue(
                np.abs(ori_feat - features).any() < consts.FLOAT_ZERO)
Esempio n. 8
0
    def test_tag_with_value_dense_output_format(self):
        dataio = DataIO()
        dataio.set_tracker(TrackerMock())
        component_params = {
            "DataIOParam": {
                "output_format": "dense",
                "input_format": "tag",
                "delimitor": ' ',
                "data_type": "float",
                "with_label": False,
                "tag_with_value": True
            },
            "role": {
                "guest": [9999],
                "host": [10000],
                "arbiter": [10000]
            },
            "local": {
                "role": "guest",
                "party_id": 9999
            }
        }
        dataio.run(component_params, self.args2)
        tag_insts = dataio.save_data()
        features = [inst.features for key, inst in tag_insts.collect()]

        tags = set()
        for row in self.data_with_value:
            tag_list = []
            for tag_with_value in row[1].split(" ", -1):
                tag_list.append(tag_with_value.split(":")[0])

            tags |= set(tag_list)

        tags = sorted(tags)
        tag_dict = dict(zip(tags, range(len(tags))))

        for i in range(len(self.data_with_value)):
            ori_feature = [0 for i in range(len(tags))]

            for tag_with_value in self.data_with_value[i][1].split(" ", -1):
                tag = tag_with_value.split(":", -1)[0]
                val = float(tag_with_value.split(":", -1)[1])
                ori_feature[tag_dict.get(tag)] = val

            ori_feature = np.asarray(ori_feature, dtype='float64')
            self.assertTrue(
                np.abs(ori_feature - features).all() < consts.FLOAT_ZERO)
Esempio n. 9
0
 def test_sparse_output_format(self):
     reader = DataIO()
     reader.set_tracker(self.tracker)
     component_params = {"DataIOParam": 
                          {"output_format": "sparse",
                           "input_format": "dense"
                          }
                        }
     reader.run(component_params, self.args1)
     data = reader.save_data().collect()
     result = dict(data)
     vala = result['a']
     features = vala.features
     self.assertTrue(type(features).__name__ == "SparseVector")
     self.assertTrue(len(features.sparse_vec) == 4)
     self.assertTrue(features.shape == 6)
Esempio n. 10
0
 def test_with_label(self):
     reader = DataIO()
     reader.set_tracker(self.tracker)
     component_params = {
         "DataIOParam": {
             "output_format": "dense",
             "input_format": "dense",
             "with_label": True,
             "label_idx": 2
         }
     }
     reader.run(component_params, self.args1)
     data = reader.save_data().collect()
     result = dict(data)
     vala = result['a']
     label = vala.label
     features = vala.features
     self.assertTrue(label == -1)
     self.assertTrue(features.shape[0] == 5)
Esempio n. 11
0
    def test_tag_sparse_output_format(self):
        dataio = DataIO()
        dataio.set_tracker(TrackerMock())
        component_params = {
            "DataIOParam": {
                "output_format": "sparse",
                "input_format": "tag",
                "delimitor": ' ',
                "data_type": "int",
                "with_label": False,
                "tag_with_value": False
            },
            "role": {
                "guest": [9999],
                "host": [10000],
                "arbiter": [10000]
            },
            "local": {
                "role": "guest",
                "party_id": 9999
            }
        }
        dataio.run(component_params, self.args1)
        tag_insts = dataio.save_data()
        features = [inst.features for key, inst in tag_insts.collect()]

        tags = set()
        for row in self.data:
            tags |= set(row[1].split(" ", -1))

        tags = sorted(tags)
        tag_dict = dict(zip(tags, range(len(tags))))

        for i in range(len(self.data)):
            ori_feature = {}
            for tag in self.data[i][1].split(" ", -1):
                ori_feature[tag_dict.get(tag)] = 1

            self.assertTrue(ori_feature == features[i].sparse_vec)
Esempio n. 12
0
    def test_tag_with_value_sparse_output_format(self):
        dataio = DataIO()
        dataio.set_tracker(TrackerMock())
        component_params = {
            "output_format": "sparse",
            "input_format": "tag",
            "delimitor": ' ',
            "data_type": "float",
            "with_label": False,
            "tag_with_value": True,
            "tag_value_delimitor": ":"
        }
        cpn_input = get_cpn_input(self.dataset2, component_params)
        dataio.run(cpn_input)
        tag_insts = dataio.save_data()
        features = [inst.features for key, inst in tag_insts.collect()]

        tags = set()
        for row in self.data_with_value:
            tag_list = []
            for tag_with_value in row[1].split(" ", -1):
                tag_list.append(tag_with_value.split(":")[0])

            tags |= set(tag_list)

        tags = sorted(tags)
        tag_dict = dict(zip(tags, range(len(tags))))

        for i in range(len(self.data_with_value)):
            ori_feature = {}
            for tag_with_value in self.data_with_value[i][1].split(" ", -1):
                idx = tag_dict.get(tag_with_value.split(":")[0])
                val = float(tag_with_value.split(":")[1])

                self.assertTrue(
                    np.abs(val -
                           features[i].get_data(idx)) < consts.FLOAT_ZERO)
Esempio n. 13
0
    def test_sparse_output_format(self):
        dataio = DataIO()
        dataio.set_tracker(TrackerMock())
        component_params = {
            "output_format": "sparse",
            "input_format": "sparse",
            "delimitor": ' '
        }
        cpn_input = get_cpn_input(self.dataset, component_params)
        dataio.run(cpn_input)
        insts = list(dataio.save_data().collect())
        for i in range(100):
            features = insts[i][1].features
            self.assertTrue(type(features).__name__ == "SparseVector")
            self.assertTrue(features.get_shape() == self.max_feature + 1)
            self.assertTrue(insts[i][1].label == i % 2)

            row = self.data[i][1].split(" ")
            for j in range(1, len(row)):
                fid, val = row[j].split(":", -1)

                self.assertTrue(
                    np.fabs(features.get_data(int(fid)) -
                            float(val)) < consts.FLOAT_ZERO)