コード例 #1
0
ファイル: data_io_test.py プロジェクト: zhilangtaosha/FATE
 def test_dense_output_format(self):
     reader = DataIO()
     component_params = {"DataIOParam": 
                          {"output_format": "dense",
                           "input_format": "sparse",
                           "delimitor": ' '
                          },
                          "role": {"guest": [9999], "host": [10000], "arbiter": [10000]},
                          "local": {"role": "guest", "party_id": 9999}
                        }
     reader.run(component_params, self.args)
     insts = list(reader.save_data().collect()) 
     for i in range(100):
         features = insts[i][1].features
         self.assertTrue(type(features).__name__ == "ndarray")
         self.assertTrue(features.shape[0] == self.max_feature + 1)
         self.assertTrue(insts[i][1].label == i % 2)
         
         row = self.data[i][1].split(" ")
         ori_feat = [0 for i in range(self.max_feature + 1)]
         for j in range(1, len(row)):
             fid, val = row[j].split(":", -1)
             ori_feat[int(fid)] = float(val)
         
         ori_feat = np.asarray(ori_feat, dtype="float64")
 
         self.assertTrue(np.abs(ori_feat - features).any() < consts.FLOAT_ZERO)
コード例 #2
0
    def test_tag_sparse_output_format(self):
        dataio = DataIO()
        component_params = {
            "output_format": "sparse",
            "input_format": "tag",
            "delimitor": ' ',
            "data_type": "int",
            "with_label": False,
            "tag_with_value": False
        }
        cpn_input = get_cpn_input(self.dataset1, component_params)
        dataio.run(cpn_input)
        tag_insts = dataio.save_data()
        features = [inst.features for key, inst in tag_insts.collect()]

        tags = set()
        for row in self.data:
            tags |= set(row[1].split(" ", -1))

        tags = sorted(tags)
        tag_dict = dict(zip(tags, range(len(tags))))

        for i in range(len(self.data)):
            ori_feature = {}
            for tag in self.data[i][1].split(" ", -1):
                ori_feature[tag_dict.get(tag)] = 1

            self.assertTrue(ori_feature == features[i].sparse_vec)
コード例 #3
0
    def test_tag_dense_output_format(self):
        dataio = DataIO()
        component_params = {
            "output_format": "dense",
            "input_format": "tag",
            "delimitor": ' ',
            "data_type": "int",
            "with_label": False
        }
        cpn_input = get_cpn_input(self.dataset1, component_params)
        dataio.run(cpn_input)
        tag_insts = dataio.save_data()
        features = [inst.features for key, inst in tag_insts.collect()]

        tags = set()
        for row in self.data:
            tags |= set(row[1].split(" ", -1))

        tags = sorted(tags)
        tag_dict = dict(zip(tags, range(len(tags))))

        for i in range(len(self.data)):
            ori_feature = [0 for i in range(len(tags))]

            for tag in self.data[i][1].split(" ", -1):
                ori_feature[tag_dict.get(tag)] = 1

            ori_feature = np.asarray(ori_feature, dtype='int')
            self.assertTrue(
                np.abs(ori_feature - features).all() < consts.FLOAT_ZERO)
コード例 #4
0
ファイル: data_io_test.py プロジェクト: zhilangtaosha/FATE
    def test_tag_dense_output_format(self):
        reader = DataIO()
        component_params = {"DataIOParam": 
                             {"output_format": "dense",
                              "input_format": "tag",
                              "delimitor": ' ',
                              "data_type": "int",
                              "with_label": False
                             },
                             "role": {"guest": [9999], "host": [10000], "arbiter": [10000]},
                             "local": {"role": "guest", "party_id": 9999}
                           }
        reader.run(component_params, self.args1)
        tag_insts = reader.save_data()
        features = [inst.features for key, inst in tag_insts.collect()]

        tags = set()
        for row in self.data:
            tags |= set(row[1].split(" ", -1))
        
        tags = sorted(tags)
        tag_dict = dict(zip(tags, range(len(tags))))
        
        for i in range(len(self.data)):
            ori_feature = [0 for i in range(len(tags))]

            for tag in self.data[i][1].split(" ", -1):
                ori_feature[tag_dict.get(tag)] = 1
            
            ori_feature = np.asarray(ori_feature, dtype='int')
            self.assertTrue(np.abs(ori_feature - features).all() < consts.FLOAT_ZERO)
コード例 #5
0
ファイル: data_io_test.py プロジェクト: zhilangtaosha/FATE
    def test_tag_sparse_output_format(self):
        reader = DataIO()
        component_params = {"DataIOParam": 
                             {"output_format": "sparse",
                              "input_format": "tag",
                              "delimitor": ' ',
                              "data_type": "int",
                              "with_label": False,
                              "tag_with_value": False
                             },
                             "role": {"guest": [9999], "host": [10000], "arbiter": [10000]},
                             "local": {"role": "guest", "party_id": 9999}
                           }
        reader.run(component_params, self.args1)
        tag_insts = reader.save_data()
        features = [inst.features for key, inst in tag_insts.collect()]

        tags = set()
        for row in self.data:
            tags |= set(row[1].split(" ", -1))
   
        tags = sorted(tags)
        tag_dict = dict(zip(tags, range(len(tags))))
        
        for i in range(len(self.data)):
            ori_feature = {}
            for tag in self.data[i][1].split(" ", -1):
                ori_feature[tag_dict.get(tag)] = 1

            self.assertTrue(ori_feature == features[i].sparse_vec)
コード例 #6
0
    def test_sparse_output_format(self):
        reader = DataIO()
        component_params = {
            "DataIOParam": {
                "output_format": "sparse",
                "input_format": "sparse",
                "delimitor": ' '
            },
            "role": {
                "guest": [9999],
                "host": [10000],
                "arbiter": [10000]
            },
            "local": {
                "role": "guest",
                "party_id": 9999
            }
        }
        reader.run(component_params, self.args)
        insts = list(reader.save_data().collect())
        for i in range(100):
            features = insts[i][1].features
            self.assertTrue(type(features).__name__ == "SparseVector")
            self.assertTrue(features.get_shape() == self.max_feature + 1)
            self.assertTrue(insts[i][1].label == i % 2)

            row = self.data[i][1].split(" ")
            for j in range(1, len(row)):
                fid, val = row[j].split(":", -1)

                self.assertTrue(
                    np.fabs(features.get_data(int(fid)) -
                            float(val)) < consts.FLOAT_ZERO)
コード例 #7
0
    def test_tag_with_value_sparse_output_format(self):
        reader = DataIO()
        component_params = {"DataIOParam": 
                             {"output_format": "sparse",
                              "input_format": "tag",
                              "delimitor": ' ',
                              "data_type": "float",
                              "with_label": False,
                              "tag_with_value": True,
                              "tag_value_delimitor": ":"
                             }
                           }
        reader.run(component_params, self.args2)
        tag_insts = reader.save_data()
        features = [inst.features for key, inst in tag_insts.collect()]

        tags = set()
        for row in self.data_with_value:
            tag_list = []
            for tag_with_value in row[1].split(" ", -1):
                tag_list.append(tag_with_value.split(":")[0])

            tags |= set(tag_list)
   
        tags = sorted(tags)
        tag_dict = dict(zip(tags, range(len(tags))))
        
        for i in range(len(self.data_with_value)):
            ori_feature = {}
            for tag_with_value in self.data_with_value[i][1].split(" ", -1):
                idx = tag_dict.get(tag_with_value.split(":")[0])
                val = float(tag_with_value.split(":")[1])

                self.assertTrue(np.abs(val - features[i].get_data(idx)) < consts.FLOAT_ZERO) 
コード例 #8
0
 def test_sparse_output_format(self):
     dataio = DataIO()
     component_params = {"output_format": "sparse", "input_format": "dense"}
     cpn_input = get_cpn_input(self.dataset1, component_params)
     dataio.run(cpn_input)
     data = dataio.save_data().collect()
     result = dict(data)
     vala = result['a']
     features = vala.features
     self.assertTrue(type(features).__name__ == "SparseVector")
     self.assertTrue(len(features.sparse_vec) == 4)
     self.assertTrue(features.shape == 6)
コード例 #9
0
 def test_with_label(self):
     dataio = DataIO()
     component_params = {
         "output_format": "dense",
         "input_format": "dense",
         "with_label": True,
         "label_name": "x3"
     }
     cpn_input = get_cpn_input(self.dataset1, component_params)
     dataio.run(cpn_input)
     data = dataio.save_data().collect()
     result = dict(data)
     vala = result['a']
     label = vala.label
     features = vala.features
     self.assertTrue(label == -1)
     self.assertTrue(features.shape[0] == 5)
コード例 #10
0
    def test_dense_output_format(self):
        dataio = DataIO()
        dataio.set_tracker(TrackerMock())
        component_params = {
            "output_format": "dense",
            "input_format": "sparse",
            "delimitor": ' '
        }
        cpn_input = get_cpn_input(self.dataset, component_params)
        dataio.run(cpn_input)
        insts = list(dataio.save_data().collect())
        for i in range(100):
            features = insts[i][1].features
            self.assertTrue(type(features).__name__ == "ndarray")
            self.assertTrue(features.shape[0] == self.max_feature + 1)
            self.assertTrue(insts[i][1].label == i % 2)

            row = self.data[i][1].split(" ")
            ori_feat = [0 for i in range(self.max_feature + 1)]
            for j in range(1, len(row)):
                fid, val = row[j].split(":", -1)
                ori_feat[int(fid)] = float(val)

            ori_feat = np.asarray(ori_feat, dtype="float64")

            self.assertTrue(
                np.abs(ori_feat - features).any() < consts.FLOAT_ZERO)
コード例 #11
0
 def test_sparse_output_format(self):
     reader = DataIO()
     reader.set_tracker(self.tracker)
     component_params = {
         "DataIOParam": {
             "output_format": "sparse",
             "input_format": "dense"
         },
         "role": {
             "guest": [9999],
             "host": [10000],
             "arbiter": [10000]
         },
         "local": {
             "role": "guest",
             "party_id": 9999
         }
     }
     reader.run(component_params, self.args1)
     data = reader.save_data().collect()
     result = dict(data)
     vala = result['a']
     features = vala.features
     self.assertTrue(type(features).__name__ == "SparseVector")
     self.assertTrue(len(features.sparse_vec) == 4)
     self.assertTrue(features.shape == 6)
コード例 #12
0
 def test_dense_output_format(self):
     reader = DataIO()
     reader.set_tracker(self.tracker)
     component_params = {
         "DataIOParam": {
             "input_format": "dense"
         },
         "role": {
             "guest": [9999],
             "host": [10000],
             "arbiter": [10000]
         },
         "local": {
             "role": "guest",
             "party_id": 9999
         }
     }
     reader.run(component_params, self.args1)
     data = reader.save_data().collect()
     result = dict(data)
     self.assertTrue(type(result['a']).__name__ == "Instance")
     self.assertTrue(type(result['b']).__name__ == "Instance")
     vala = result['a']
     features = vala.features
     weight = vala.weight
     label = vala.label
     self.assertTrue(np.abs(weight - 1.0) < consts.FLOAT_ZERO)
     self.assertTrue(type(features).__name__ == "ndarray")
     self.assertTrue(label == None)
     self.assertTrue(features.shape[0] == 6)
     self.assertTrue(features.dtype == "float64")
コード例 #13
0
    def test_sparse_output_format(self):
        reader = DataIO()
        reader.set_tracker(self.tracker)
        component_params = {
            "DataIOParam": {
                "output_format": "sparse",
                "input_format": "sparse",
                "delimitor": ' ',
                "defualt_value": 2**30
            },
            "role": {
                "guest": [9999],
                "host": [10000],
                "arbiter": [10000]
            },
            "local": {
                "role": "guest",
                "party_id": 9999
            }
        }
        reader.run(component_params, self.args)
        data = reader.save_data().collect()
        for i in range(100):
            self.assertTrue(
                insts[i][1].features.get_shape() == self.max_feature + 1)
            self.assertTrue(insts[i][1].label == i % 2)
            original_feat = {}
            row = self.data[i][1].split(" ")
            for j in range(1, len(row)):
                fid, val = row[j].split(":", -1)
                original_feat[int(fid)] = float(val)

            self.assertTrue(original_feat == insts[i][1].features.sparse_vec)
コード例 #14
0
 def test_missing_value_fill(self):
     reader = DataIO()
     reader.set_tracker(self.tracker)
     component_params = {
         "DataIOParam": {
             "output_format": "sparse",
             "input_format": "dense",
             "default_value": 100,
             "with_label": False,
             "missing_fill": True,
             "missing_fill_method": "designated",
             "data_type": "int"
         },
         "role": {
             "guest": [9999],
             "host": [10000],
             "arbiter": [10000]
         },
         "local": {
             "role": "guest",
             "party_id": 9999
         }
     }
     reader.run(component_params, self.args2)
     data = reader.save_data().collect()
     result = dict(data)
     features = result['a'].features
     for i in range(1, 5):
         self.assertTrue(features.get_data(i) == 100)
コード例 #15
0
 def test_with_label(self):
     reader = DataIO()
     reader.set_tracker(self.tracker)
     component_params = {
         "DataIOParam": {
             "output_format": "dense",
             "input_format": "dense",
             "with_label": True,
             "label_name": "x3"
         },
         "role": {
             "guest": [9999],
             "host": [10000],
             "arbiter": [10000]
         },
         "local": {
             "role": "guest",
             "party_id": 9999
         }
     }
     reader.run(component_params, self.args1)
     data = reader.save_data().collect()
     result = dict(data)
     vala = result['a']
     label = vala.label
     features = vala.features
     self.assertTrue(label == -1)
     self.assertTrue(features.shape[0] == 5)
コード例 #16
0
 def test_missing_value_fill(self):
     dataio = DataIO()
     component_params = {
         "output_format": "sparse",
         "input_format": "dense",
         "default_value": 100,
         "with_label": False,
         "missing_fill": True,
         "missing_fill_method": "designated",
         "data_type": "int"
     }
     cpn_input = get_cpn_input(self.dataset2, component_params)
     dataio.run(cpn_input)
     data = dataio.save_data().collect()
     result = dict(data)
     features = result['a'].features
     for i in range(1, 5):
         self.assertTrue(features.get_data(i) == 100)
コード例 #17
0
 def test_dense_output_format(self):
     dataio = DataIO()
     component_params = {"input_format": "dense"}
     cpn_input = get_cpn_input(self.dataset1, component_params)
     dataio.run(cpn_input)
     data = dataio.save_data().collect()
     result = dict(data)
     self.assertTrue(type(result['a']).__name__ == "Instance")
     self.assertTrue(type(result['b']).__name__ == "Instance")
     vala = result['a']
     features = vala.features
     # weight = vala.weight
     label = vala.label
     # self.assertTrue(np.abs(weight - 1.0) < consts.FLOAT_ZERO)
     self.assertTrue(type(features).__name__ == "ndarray")
     self.assertTrue(label is None)
     self.assertTrue(features.shape[0] == 6)
     self.assertTrue(features.dtype == "float64")
コード例 #18
0
ファイル: data_io_test.py プロジェクト: pangzx1/FL1.0
    def test_tag_with_value_dense_output_format(self):
        reader = DataIO()
        component_params = {
            "DataIOParam": {
                "output_format": "dense",
                "input_format": "tag",
                "delimitor": ' ',
                "data_type": "float",
                "with_label": False,
                "tag_with_value": True
            }
        }
        reader.run(component_params, self.args2)
        tag_insts = reader.save_data()
        features = [inst.features for key, inst in tag_insts.collect()]

        tags = set()
        for row in self.data_with_value:
            tag_list = []
            for tag_with_value in row[1].split(" ", -1):
                tag_list.append(tag_with_value.split(":")[0])

            tags |= set(tag_list)

        tags = sorted(tags)
        tag_dict = dict(zip(tags, range(len(tags))))

        for i in range(len(self.data_with_value)):
            ori_feature = [0 for i in range(len(tags))]

            for tag_with_value in self.data_with_value[i][1].split(" ", -1):
                tag = tag_with_value.split(":", -1)[0]
                val = float(tag_with_value.split(":", -1)[1])
                ori_feature[tag_dict.get(tag)] = val

            ori_feature = np.asarray(ori_feature, dtype='float64')
            self.assertTrue(
                np.abs(ori_feature - features).all() < consts.FLOAT_ZERO)
コード例 #19
0
 def test_sparse_output_format(self):
     reader = DataIO()
     reader.set_tracker(self.tracker)
     component_params = {"DataIOParam": 
                          {"output_format": "sparse",
                           "input_format": "dense"
                          }
                        }
     reader.run(component_params, self.args1)
     data = reader.save_data().collect()
     result = dict(data)
     vala = result['a']
     features = vala.features
     self.assertTrue(type(features).__name__ == "SparseVector")
     self.assertTrue(len(features.sparse_vec) == 4)
     self.assertTrue(features.shape == 6)
コード例 #20
0
    def test_tag_with_value_sparse_output_format(self):
        dataio = DataIO()
        dataio.set_tracker(TrackerMock())
        component_params = {
            "DataIOParam": {
                "output_format": "sparse",
                "input_format": "tag",
                "delimitor": ' ',
                "data_type": "float",
                "with_label": False,
                "tag_with_value": True,
                "tag_value_delimitor": ":"
            },
            "role": {
                "guest": [9999],
                "host": [10000],
                "arbiter": [10000]
            },
            "local": {
                "role": "guest",
                "party_id": 9999
            }
        }
        dataio.run(component_params, self.args2)
        tag_insts = dataio.save_data()
        features = [inst.features for key, inst in tag_insts.collect()]

        tags = set()
        for row in self.data_with_value:
            tag_list = []
            for tag_with_value in row[1].split(" ", -1):
                tag_list.append(tag_with_value.split(":")[0])

            tags |= set(tag_list)

        tags = sorted(tags)
        tag_dict = dict(zip(tags, range(len(tags))))

        for i in range(len(self.data_with_value)):
            ori_feature = {}
            for tag_with_value in self.data_with_value[i][1].split(" ", -1):
                idx = tag_dict.get(tag_with_value.split(":")[0])
                val = float(tag_with_value.split(":")[1])

                self.assertTrue(
                    np.abs(val -
                           features[i].get_data(idx)) < consts.FLOAT_ZERO)
コード例 #21
0
ファイル: data_io_test.py プロジェクト: pangzx1/FL1.0
 def test_with_label(self):
     reader = DataIO()
     reader.set_tracker(self.tracker)
     component_params = {
         "DataIOParam": {
             "output_format": "dense",
             "input_format": "dense",
             "with_label": True,
             "label_idx": 2
         }
     }
     reader.run(component_params, self.args1)
     data = reader.save_data().collect()
     result = dict(data)
     vala = result['a']
     label = vala.label
     features = vala.features
     self.assertTrue(label == -1)
     self.assertTrue(features.shape[0] == 5)
コード例 #22
0
    def test_sparse_output_format(self):
        dataio = DataIO()
        dataio.set_tracker(TrackerMock())
        component_params = {
            "output_format": "sparse",
            "input_format": "sparse",
            "delimitor": ' '
        }
        cpn_input = get_cpn_input(self.dataset, component_params)
        dataio.run(cpn_input)
        insts = list(dataio.save_data().collect())
        for i in range(100):
            features = insts[i][1].features
            self.assertTrue(type(features).__name__ == "SparseVector")
            self.assertTrue(features.get_shape() == self.max_feature + 1)
            self.assertTrue(insts[i][1].label == i % 2)

            row = self.data[i][1].split(" ")
            for j in range(1, len(row)):
                fid, val = row[j].split(":", -1)

                self.assertTrue(
                    np.fabs(features.get_data(int(fid)) -
                            float(val)) < consts.FLOAT_ZERO)