Ejemplo n.º 1
0
 def setUp(self):
     session.init("test_cross_entropy")
     self.softmax_loss = SoftmaxCrossEntropyLoss()
     self.y_list = [i % 5 for i in range(100)]
     self.predict_list = [np.array([random.random() for i in range(5)]) for j in range(100)]
     self.y = session.parallelize(self.y_list, include_key=False, partition=16)
     self.predict = session.parallelize(self.predict_list, include_key=False, partition=16)
Ejemplo n.º 2
0
 def setUp(self):
     session.init("test_cross_entropy")
     self.sigmoid_loss = SigmoidBinaryCrossEntropyLoss()
     self.y_list = [i % 2 for i in range(100)]
     self.predict_list = [random.random() for i in range(100)]
     self.y = session.parallelize(self.y_list, include_key=False, partition=16)
     self.predict = session.parallelize(self.predict_list, include_key=False, partition=16)
Ejemplo n.º 3
0
    def setUp(self):
        self.feature_histogram = FeatureHistogram()
        session.init("test_feature_histogram")
        data_insts = []
        for i in range(1000):
            indices = []
            data = []
            for j in range(10):
                x = random.randint(0, 5)
                if x != 0:
                    data.append(x)
                    indices.append(j)
            sparse_vec = SparseVector(indices, data, shape=10)
            data_insts.append(
                (Instance(features=sparse_vec), (1, random.randint(0, 3))))
        self.node_map = {0: 0, 1: 1, 2: 2, 3: 3}
        self.data_insts = data_insts
        self.data_bin = session.parallelize(data_insts,
                                            include_key=False,
                                            partition=16)

        self.grad_and_hess_list = [(random.random(), random.random())
                                   for i in range(1000)]
        self.grad_and_hess = session.parallelize(self.grad_and_hess_list,
                                                 include_key=False,
                                                 partition=16)

        bin_split_points = []
        for i in range(10):
            bin_split_points.append(np.array([i for i in range(6)]))
        self.bin_split_points = np.array(bin_split_points)
        self.bin_sparse = [0 for i in range(10)]
Ejemplo n.º 4
0
    def setUp(self):
        session.init("test_dataio_" + str(random.random()))
        self.data = []
        self.max_feature = -1
        for i in range(100):
            row = []
            label = i % 2
            row.append(str(label))
            dict = {}

            for j in range(20):
                x = random.randint(0, 1000)
                val = random.random()
                if x in dict:
                    continue
                self.max_feature = max(self.max_feature, x)
                dict[x] = True
                row.append(":".join(map(str, [x, val])))

            self.data.append((i, " ".join(row)))

        self.table = session.parallelize(self.data,
                                         include_key=True,
                                         partition=16)
        self.args = {"data": {"data_io_0": {"data": self.table}}}
Ejemplo n.º 5
0
 def setUp(self):
     self.job_id = str(uuid.uuid1())
     session.init(self.job_id)
     self.eps = 1e-5
     self.count = 1000
     self.feature_num = 100
     self._dense_table, self._dense_not_inst_table, self._original_data = None, None, None
Ejemplo n.º 6
0
    def setUp(self):
        session.init("test_dataio_" + str(random.random()))
        self.data = []
        self.data_with_value = []
        for i in range(100):
            row = []
            row_with_value = []
            for j in range(100):
                if random.randint(1, 100) > 30:
                    continue
                str_r = ''.join(
                    random.sample(string.ascii_letters + string.digits, 10))
                row.append(str_r)
                row_with_value.append(str_r + ':' + str(random.random()))

            self.data.append((i, ' '.join(row)))
            self.data_with_value.append((i, ' '.join(row_with_value)))

        self.table1 = session.parallelize(self.data,
                                          include_key=True,
                                          partition=16)
        self.table2 = session.parallelize(self.data_with_value,
                                          include_key=True,
                                          partition=16)
        self.args1 = {"data": {"data_io_0": {"data": self.table1}}}
        self.args2 = {"data": {"data_io_1": {"data": self.table2}}}
Ejemplo n.º 7
0
    def setUp(self):
        from fate_arch.session import computing_session as session
        session.init("test_encrypt_mode_calculator")

        self.list_data = []
        self.tuple_data = []
        self.numpy_data = []

        for i in range(30):
            list_value = [100 * i + j for j in range(20)]
            tuple_value = tuple(list_value)
            numpy_value = np.array(list_value, dtype="int")

            self.list_data.append(list_value)
            self.tuple_data.append(tuple_value)
            self.numpy_data.append(numpy_value)

        self.data_list = session.parallelize(self.list_data,
                                             include_key=False,
                                             partition=10)
        self.data_tuple = session.parallelize(self.tuple_data,
                                              include_key=False,
                                              partition=10)
        self.data_numpy = session.parallelize(self.numpy_data,
                                              include_key=False,
                                              partition=10)
Ejemplo n.º 8
0
 def setUp(self):
     session.init("test_paillier_tensor" + str(random.random()), 0)
     self.data1 = np.ones((1000, 10))
     self.data2 = np.ones((1000, 10))
     self.paillier_tensor1 = PaillierTensor(ori_data=self.data1,
                                            partitions=10)
     self.paillier_tensor2 = PaillierTensor(ori_data=self.data2,
                                            partitions=10)
Ejemplo n.º 9
0
    def setUp(self):
        self.jobid = str(uuid.uuid1())
        session.init(self.jobid)

        from federatedml.statistic.intersect_deprecated.intersect_guest import RsaIntersectionGuest
        from federatedml.statistic.intersect_deprecated.intersect import RsaIntersect
        intersect_param = IntersectParam()
        self.rsa_operator = RsaIntersectionGuest(intersect_param)
        self.rsa_op2 = RsaIntersect(intersect_param)
Ejemplo n.º 10
0
    def setUp(self):
        self.jobid = str(uuid.uuid1())
        session.init(self.jobid)

        from federatedml.statistic.intersect.intersect_host import RsaIntersectionHost
        from federatedml.statistic.intersect.intersect_host import RawIntersectionHost
        intersect_param = IntersectParam()
        self.rsa_operator = RsaIntersectionHost(intersect_param)
        self.raw_operator = RawIntersectionHost(intersect_param)
Ejemplo n.º 11
0
 def setUp(self):
     session.init("test_random_sampler")
     self.data = [(i * 10 + 5, i * i) for i in range(100)]
     self.table = session.parallelize(self.data,
                                      include_key=True,
                                      partition=16)
     self.data_to_trans = [(i * 10 + 5, i * i * i) for i in range(100)]
     self.table_trans = session.parallelize(self.data_to_trans,
                                            include_key=True,
                                            partition=16)
Ejemplo n.º 12
0
    def setUp(self):
        session.init("test_min_max_scaler_" + str(random.random()))
        str_time = time.strftime("%Y%m%d%H%M%S", time.localtime())

        self.test_data = [
            [
                "0.254879", "na", "0.209656", "10000", "-0.441366", "-10000",
                "-0.485934", "na", "-0.287570", "-0.733474"
            ],
            [
                "-1.142928", "", "-1.166747", "-0.923578", "0.628230",
                "-1.021418", "-1.111867", "-0.959523", "-0.096672", "-0.121683"
            ],
            [
                "-1.451067", "-1.406518", "none", "-1.092337", "none",
                "-1.168557", "-1.305831", "-1.745063", "-0.499499", "-0.302893"
            ],
            [
                "-0.879933", "null", "-0.877527", "-0.780484", "-1.037534",
                "-0.483880", "-0.555498", "-0.768581", "0.433960", "-0.200928"
            ],
            [
                "0.426758", "0.723479", "0.316885", "0.287273", "1.000835",
                "0.962702", "1.077099", "1.053586", "2.996525", "0.961696"
            ],
            [
                "0.963102", "1.467675", "0.829202", "0.772457", "-0.038076",
                "-0.468613", "-0.307946", "-0.015321", "-0.641864", "-0.247477"
            ],
            [
                "-0.662496", "0.212149", "-0.620475", "-0.632995", "-0.327392",
                "-0.385278", "-0.077665", "-0.730362", "0.217178", "-0.061280"
            ],
            [
                "-0.453343", "-2.147457", "-0.473631", "-0.483572", "0.558093",
                "-0.740244", "-0.896170", "-0.617229", "-0.308601", "-0.666975"
            ],
            [
                "-0.606584", "-0.971725", "-0.678558", "-0.591332",
                "-0.963013", "-1.302401", "-1.212855", "-1.321154",
                "-1.591501", "-1.230554"
            ],
            [
                "-0.583805", "-0.193332", "-0.633283", "-0.560041",
                "-0.349310", "-0.519504", "-0.610669", "-0.929526",
                "-0.196974", "-0.151608"
            ]
        ]
        self.test_instance = []
        for td in self.test_data:
            self.test_instance.append(td)
        self.table_instance = self.data_to_table(self.test_instance)
        self.table_instance.schema['header'] = [
            "fid" + str(i) for i in range(len(self.test_data[0]))
        ]
Ejemplo n.º 13
0
 def setUp(self):
     session.init("test_least_abs_error_loss")
     self.lae_loss = LeastAbsoluteErrorLoss()
     self.y_list = [i % 2 for i in range(100)]
     self.predict_list = [random.random() for i in range(100)]
     self.y = session.parallelize(self.y_list,
                                  include_key=False,
                                  partition=16)
     self.predict = session.parallelize(self.predict_list,
                                        include_key=False,
                                        partition=16)
Ejemplo n.º 14
0
 def clean_tables(self):
     from fate_arch.session import computing_session as session
     session.init(job_id=self.job_id)
     try:
         session.cleanup("*", self.job_id, True)
     except EnvironmentError:
         pass
     try:
         session.cleanup("*", self.job_id, False)
     except EnvironmentError:
         pass
Ejemplo n.º 15
0
 def setUp(self):
     session.init("test_fair_loss")
     self.log_cosh_loss = LogCoshLoss()
     self.y_list = [i % 2 for i in range(100)]
     self.predict_list = [random.random() for i in range(100)]
     self.y = session.parallelize(self.y_list,
                                  include_key=False,
                                  partition=16)
     self.predict = session.parallelize(self.predict_list,
                                        include_key=False,
                                        partition=16)
Ejemplo n.º 16
0
 def setUp(self):
     session.init("test_huber_loss")
     self.delta = 1
     self.huber_loss = HuberLoss(self.delta)
     self.y_list = [i % 2 for i in range(100)]
     self.predict_list = [random.random() for i in range(100)]
     self.y = session.parallelize(self.y_list,
                                  include_key=False,
                                  partition=16)
     self.predict = session.parallelize(self.predict_list,
                                        include_key=False,
                                        partition=16)
Ejemplo n.º 17
0
 def setUp(self):
     session.init("test_fair_loss")
     self.rho = 0.5
     self.tweedie_loss = TweedieLoss(self.rho)
     self.y_list = [i % 2 for i in range(100)]
     self.predict_list = [random.random() for i in range(100)]
     self.y = session.parallelize(self.y_list,
                                  include_key=False,
                                  partition=16)
     self.predict = session.parallelize(self.predict_list,
                                        include_key=False,
                                        partition=16)
Ejemplo n.º 18
0
 def setUp(self):
     session.init("test_sample_weight_" + str(uuid.uuid1()))
     self.class_weight = {"0": 2, "1": 3}
     data = []
     for i in range(1, 11):
         label = 1 if i % 5 == 0 else 0
         instance = Instance(inst_id=i, features=np.random.random(3), label=label)
         data.append((i, instance))
     schema = {"header": ["x0", "x1", "x2"],
               "sid": "id", "label_name": "y"}
     self.table = session.parallelize(data, include_key=True, partition=8)
     self.table.schema = schema
     self.sample_weight_obj = SampleWeight()
Ejemplo n.º 19
0
 def setUp(self):
     session.init("test_label_transform_" + str(uuid.uuid1()))
     self.label_encoder = {"yes": 1, "no": 0}
     self.predict_label_encoder = {1: "yes", 0: "no"}
     data = []
     for i in range(1, 11):
         label = "yes" if i % 5 == 0 else "no"
         instance = Instance(inst_id=i, features=np.random.random(3), label=label)
         data.append((i, instance))
     schema = {"header": ["x0", "x1", "x2"],
               "sid": "id", "label_name": "y"}
     self.table = session.parallelize(data, include_key=True, partition=8)
     self.table.schema = schema
     self.label_transformer_obj = LabelTransformer()
Ejemplo n.º 20
0
    def setUp(self):
        session.init("test_stratified_sampler")
        self.data = []
        self.data_to_trans = []
        for i in range(1000):
            self.data.append((i, Instance(label=i % 4, features=i * i)))
            self.data_to_trans.append((i, Instance(features=i**3)))

        self.table = session.parallelize(self.data,
                                         include_key=True,
                                         partition=16)
        self.table_trans = session.parallelize(self.data_to_trans,
                                               include_key=True,
                                               partition=16)
Ejemplo n.º 21
0
 def setUp(self):
     self.job_id = str(uuid.uuid1())
     session.init("test_random_sampler_" + self.job_id)
     data_num = 100
     feature_num = 8
     self.prepare_data(data_num, feature_num)
     params = LocalBaselineParam()
     local_baseline_obj = LocalBaseline()
     local_baseline_obj._init_model(params)
     local_baseline_obj.need_run = True
     local_baseline_obj.header = ["x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8"]
     local_baseline_obj.model_name = "LogisticRegression"
     local_baseline_obj.model_opts = {}
     self.local_baseline_obj = local_baseline_obj
Ejemplo n.º 22
0
 def setUp(self):
     session.init("123")
     self.data_num = 1000
     self.feature_num = 200
     final_result = []
     for i in range(self.data_num):
         tmp = i * np.ones(self.feature_num)
         inst = Instance(inst_id=i, features=tmp, label=0)
         tmp = (str(i), inst)
         final_result.append(tmp)
     table = session.parallelize(final_result,
                                 include_key=True,
                                 partition=3)
     self.table = table
Ejemplo n.º 23
0
    def setUp(self):
        session.init("test_label_checker")

        self.small_label_set = [Instance(label=i % 5) for i in range(100)]
        self.classify_inst = session.parallelize(self.small_label_set,
                                                 include_key=False,
                                                 partition=16)
        self.regression_label = [
            Instance(label=random.random()) for i in range(100)
        ]
        self.regression_inst = session.parallelize(self.regression_label,
                                                   partition=16,
                                                   include_key=False)
        self.classify_checker = ClassifyLabelChecker()
        self.regression_checker = RegressionLabelChecker()
Ejemplo n.º 24
0
 def setUp(self):
     self.job_id = str(uuid.uuid1())
     session.init("test_random_sampler_" + self.job_id)
     self.data_splitter = data_split.DataSplitter()
     param_dict = {
         "random_state": 42,
         "test_size": 0.2,
         "train_size": 0.6,
         "validate_size": 0.2,
         "stratified": True,
         "shuffle": True,
         "split_points": [0.5, 0.2]
     }
     params = DataSplitParam(**param_dict)
     self.data_splitter._init_model(params)
Ejemplo n.º 25
0
    def setUp(self):
        self.job_id = str(uuid.uuid1())
        session.init("test_random_sampler_" + self.job_id)
        model = HeteroStepwise()
        model.__setattr__('role', consts.GUEST)
        model.__setattr__('fit_intercept', True)

        self.model = model
        data_num = 100
        feature_num = 5
        bool_list = [True, False, True, True, False]
        self.str_mask = "10110"
        self.header = ["x1", "x2", "x3", "x4", "x5"]
        self.mask = self.prepare_mask(bool_list)
        self.table = self.prepare_data(data_num, feature_num, self.header,
                                       "id", "y")
Ejemplo n.º 26
0
    def setUp(self):
        self.test_data = [[0, 1.0, 10, 2, 3, 1], [1.0, 2, 9, 2, 4, 2],
                          [0, 3.0, 8, 3, 3, 3], [1.0, 4, 7, 4, 4, 4],
                          [1.0, 5, 6, 5, 5, 5], [1.0, 6, 5, 6, 6, -100],
                          [0, 7.0, 4, 7, 7, 7], [0, 8, 3.0, 8, 6, 8],
                          [0, 9, 2, 9.0, 9, 9], [0, 10, 1, 10.0, 10, 10]]
        str_time = time.strftime("%Y%m%d%H%M%S", time.localtime())
        session.init(str_time)

        self.test_instance = []
        for td in self.test_data:
            self.test_instance.append(Instance(features=np.array(td)))
        self.table_instance = self.data_to_table(self.test_instance)
        self.table_instance.schema['header'] = [
            "fid" + str(i) for i in range(len(self.test_data[0]))
        ]
Ejemplo n.º 27
0
    def setUp(self):
        session.init("test_dataio_" + str(random.random()))
        data1 = [("a", "1,2,-1,0,0,5"), ("b", "4,5,6,0,1,2")]
        schema = {"header": "x1,x2,x3,x4,x5,x6", "sid": "id"}
        self.table1 = session.parallelize(data1,
                                          include_key=True,
                                          partition=16)
        self.table1.schema = schema

        data2 = [("a", '-1,,na,null,null,2')]
        self.table2 = session.parallelize(data2,
                                          include_key=True,
                                          partition=16)
        self.table2.schema = schema

        self.dataset1 = {"data_io_0": {"data": self.table1}}

        self.dataset2 = {"data_io_1": {"data": self.table2}}
Ejemplo n.º 28
0
    def sec_intermediate_result(self, ir_b, loc, sigma_a):
        """
        备注:此函数已经测试过了,正常运行
        添加噪声
        算法
        ------------------
        1.根据当前批次的数据取出对应的ID
        2.拼接ID和高斯噪声
        3.调用API接口中的join()函数进行数据扰动
        """
        #这里的sec_result是一个列表,里面存取的都是元组,元组的第一项都是当前批次数据的ID,第二项便是高斯噪声
        #这里的疑问点在于是否可以

        #第一种添加噪声的方式
        sec_result_1 = []
        for ir_b_tuple_1 in ir_b.collect():
            test_tuple_1 = (ir_b_tuple_1[0], np.random.normal(loc, sigma_a))
            sec_result_1.append(test_tuple_1)

        # #第二种添加噪声的方式
        # gaussian_noise = np.random.normal(loc, sigma_a, ir_b.count())
        # gaussian_noise.tolist()
        # sec_result_2 = []
        # first_data_id = ir_b.first()[0]
        # for ir_b_tuple_2 in ir_b.collect():
        #     test_tuple_2 = (ir_b_tuple_2[0],gaussian_noise[int(ir_b_tuple_2[0]) - int(first_data_id)])
        #     sec_result_2.append(test_tuple_2)
        #-----------------------------------------------------------------
        #将高斯噪声封装成Dtbale格式
        computing_session.init(work_mode=0,
                               backend=0,
                               session_id="gaussian id")
        gaussian_noise = computing_session.parallelize(sec_result_1,
                                                       partition=4,
                                                       include_key=True)

        #扰动数据内积
        sec_result = ir_b.join(gaussian_noise, lambda x, y: x + y)
        return sec_result
Ejemplo n.º 29
0
    def sec_intermediate_result(self, ir_a, loc, sigma_b):
        """
        parameters
        ir_a:是一个Dtable格式数据表,值为标量
        ----------------------------
        return
        在ir_a的基础上添加噪声
        -----------------------
        算法
        同Host方,不再赘述
        """
        #第一种方法
        sec_result_1 = []
        for ir_a_tuple_1 in ir_a.collect():
            test_tuple_1 = (ir_a_tuple_1[0], np.random.normal(loc, sigma_b))
            sec_result_1.append(test_tuple_1)

        #第二种方法
        # gaussian_noise = np.random.normal(loc, sigma_b, ir_a.count())
        # gaussian_noise.tolist()
        # sec_result_2 = []
        # first_data_id = ir_a.first()[0]
        # for ir_b_tuple_2 in ir_a.collect():
        #     test_tuple_2 = (ir_b_tuple_2[0], gaussian_noise[int(ir_b_tuple_2[0]) - int(first_data_id)])
        #     sec_result_2.append(test_tuple_2)

        # -----------------------------------------------------------------
        # 将高斯噪声封装成Dtbale格式
        computing_session.init(work_mode=0,
                               backend=0,
                               session_id="gaussian id")
        gaussian_noise = computing_session.parallelize(sec_result_1,
                                                       partition=4,
                                                       include_key=True)

        # 扰动数据内积
        sec_result = ir_a.join(gaussian_noise, lambda x, y: x + y)
        return sec_result
Ejemplo n.º 30
0
    def setUp(self):

        session.init('test', 0)
        print('generating dense tables')
        l1, l2 = [], []
        col = [i for i in range(20)]
        for i in range(100):
            inst = Instance()
            inst.features = np.random.random(20)
            l1.append(inst)
        for i in range(1000):
            inst = Instance()
            inst.features = np.random.random(20)
            l2.append(inst)
        self.dense_table1, self.dense_table2 = session.parallelize(l1, partition=4, include_key=False), \
            session.parallelize(l2, partition=4, include_key=False)
        self.dense_table1.schema['header'] = copy.deepcopy(col)
        self.dense_table2.schema['header'] = copy.deepcopy(col)
        print('generating done')

        print('generating sparse tables')
        l1, l2 = [], []
        col = [i for i in range(20)]
        for i in range(100):
            inst = Instance()
            inst.features = SparseVector(indices=copy.deepcopy(col),
                                         data=list(np.random.random(20)))
            l1.append(inst)
        for i in range(1000):
            inst = Instance()
            inst.features = SparseVector(indices=copy.deepcopy(col),
                                         data=list(np.random.random(20)))
            l2.append(inst)
        self.sp_table1, self.sp_table2 = session.parallelize(l1, partition=4, include_key=False), \
            session.parallelize(l2, partition=4, include_key=False)
        self.sp_table1.schema['header'] = copy.deepcopy(col)
        self.sp_table2.schema['header'] = copy.deepcopy(col)
        print('generating done')