def setUp(self): session.init("test_cross_entropy") self.softmax_loss = SoftmaxCrossEntropyLoss() self.y_list = [i % 5 for i in range(100)] self.predict_list = [np.array([random.random() for i in range(5)]) for j in range(100)] self.y = session.parallelize(self.y_list, include_key=False, partition=16) self.predict = session.parallelize(self.predict_list, include_key=False, partition=16)
def setUp(self): session.init("test_cross_entropy") self.sigmoid_loss = SigmoidBinaryCrossEntropyLoss() self.y_list = [i % 2 for i in range(100)] self.predict_list = [random.random() for i in range(100)] self.y = session.parallelize(self.y_list, include_key=False, partition=16) self.predict = session.parallelize(self.predict_list, include_key=False, partition=16)
def setUp(self): self.feature_histogram = FeatureHistogram() session.init("test_feature_histogram") data_insts = [] for i in range(1000): indices = [] data = [] for j in range(10): x = random.randint(0, 5) if x != 0: data.append(x) indices.append(j) sparse_vec = SparseVector(indices, data, shape=10) data_insts.append( (Instance(features=sparse_vec), (1, random.randint(0, 3)))) self.node_map = {0: 0, 1: 1, 2: 2, 3: 3} self.data_insts = data_insts self.data_bin = session.parallelize(data_insts, include_key=False, partition=16) self.grad_and_hess_list = [(random.random(), random.random()) for i in range(1000)] self.grad_and_hess = session.parallelize(self.grad_and_hess_list, include_key=False, partition=16) bin_split_points = [] for i in range(10): bin_split_points.append(np.array([i for i in range(6)])) self.bin_split_points = np.array(bin_split_points) self.bin_sparse = [0 for i in range(10)]
def setUp(self): session.init("test_dataio_" + str(random.random())) self.data = [] self.max_feature = -1 for i in range(100): row = [] label = i % 2 row.append(str(label)) dict = {} for j in range(20): x = random.randint(0, 1000) val = random.random() if x in dict: continue self.max_feature = max(self.max_feature, x) dict[x] = True row.append(":".join(map(str, [x, val]))) self.data.append((i, " ".join(row))) self.table = session.parallelize(self.data, include_key=True, partition=16) self.args = {"data": {"data_io_0": {"data": self.table}}}
def setUp(self): self.job_id = str(uuid.uuid1()) session.init(self.job_id) self.eps = 1e-5 self.count = 1000 self.feature_num = 100 self._dense_table, self._dense_not_inst_table, self._original_data = None, None, None
def setUp(self): session.init("test_dataio_" + str(random.random())) self.data = [] self.data_with_value = [] for i in range(100): row = [] row_with_value = [] for j in range(100): if random.randint(1, 100) > 30: continue str_r = ''.join( random.sample(string.ascii_letters + string.digits, 10)) row.append(str_r) row_with_value.append(str_r + ':' + str(random.random())) self.data.append((i, ' '.join(row))) self.data_with_value.append((i, ' '.join(row_with_value))) self.table1 = session.parallelize(self.data, include_key=True, partition=16) self.table2 = session.parallelize(self.data_with_value, include_key=True, partition=16) self.args1 = {"data": {"data_io_0": {"data": self.table1}}} self.args2 = {"data": {"data_io_1": {"data": self.table2}}}
def setUp(self): from fate_arch.session import computing_session as session session.init("test_encrypt_mode_calculator") self.list_data = [] self.tuple_data = [] self.numpy_data = [] for i in range(30): list_value = [100 * i + j for j in range(20)] tuple_value = tuple(list_value) numpy_value = np.array(list_value, dtype="int") self.list_data.append(list_value) self.tuple_data.append(tuple_value) self.numpy_data.append(numpy_value) self.data_list = session.parallelize(self.list_data, include_key=False, partition=10) self.data_tuple = session.parallelize(self.tuple_data, include_key=False, partition=10) self.data_numpy = session.parallelize(self.numpy_data, include_key=False, partition=10)
def setUp(self): session.init("test_paillier_tensor" + str(random.random()), 0) self.data1 = np.ones((1000, 10)) self.data2 = np.ones((1000, 10)) self.paillier_tensor1 = PaillierTensor(ori_data=self.data1, partitions=10) self.paillier_tensor2 = PaillierTensor(ori_data=self.data2, partitions=10)
def setUp(self): self.jobid = str(uuid.uuid1()) session.init(self.jobid) from federatedml.statistic.intersect_deprecated.intersect_guest import RsaIntersectionGuest from federatedml.statistic.intersect_deprecated.intersect import RsaIntersect intersect_param = IntersectParam() self.rsa_operator = RsaIntersectionGuest(intersect_param) self.rsa_op2 = RsaIntersect(intersect_param)
def setUp(self): self.jobid = str(uuid.uuid1()) session.init(self.jobid) from federatedml.statistic.intersect.intersect_host import RsaIntersectionHost from federatedml.statistic.intersect.intersect_host import RawIntersectionHost intersect_param = IntersectParam() self.rsa_operator = RsaIntersectionHost(intersect_param) self.raw_operator = RawIntersectionHost(intersect_param)
def setUp(self): session.init("test_random_sampler") self.data = [(i * 10 + 5, i * i) for i in range(100)] self.table = session.parallelize(self.data, include_key=True, partition=16) self.data_to_trans = [(i * 10 + 5, i * i * i) for i in range(100)] self.table_trans = session.parallelize(self.data_to_trans, include_key=True, partition=16)
def setUp(self): session.init("test_min_max_scaler_" + str(random.random())) str_time = time.strftime("%Y%m%d%H%M%S", time.localtime()) self.test_data = [ [ "0.254879", "na", "0.209656", "10000", "-0.441366", "-10000", "-0.485934", "na", "-0.287570", "-0.733474" ], [ "-1.142928", "", "-1.166747", "-0.923578", "0.628230", "-1.021418", "-1.111867", "-0.959523", "-0.096672", "-0.121683" ], [ "-1.451067", "-1.406518", "none", "-1.092337", "none", "-1.168557", "-1.305831", "-1.745063", "-0.499499", "-0.302893" ], [ "-0.879933", "null", "-0.877527", "-0.780484", "-1.037534", "-0.483880", "-0.555498", "-0.768581", "0.433960", "-0.200928" ], [ "0.426758", "0.723479", "0.316885", "0.287273", "1.000835", "0.962702", "1.077099", "1.053586", "2.996525", "0.961696" ], [ "0.963102", "1.467675", "0.829202", "0.772457", "-0.038076", "-0.468613", "-0.307946", "-0.015321", "-0.641864", "-0.247477" ], [ "-0.662496", "0.212149", "-0.620475", "-0.632995", "-0.327392", "-0.385278", "-0.077665", "-0.730362", "0.217178", "-0.061280" ], [ "-0.453343", "-2.147457", "-0.473631", "-0.483572", "0.558093", "-0.740244", "-0.896170", "-0.617229", "-0.308601", "-0.666975" ], [ "-0.606584", "-0.971725", "-0.678558", "-0.591332", "-0.963013", "-1.302401", "-1.212855", "-1.321154", "-1.591501", "-1.230554" ], [ "-0.583805", "-0.193332", "-0.633283", "-0.560041", "-0.349310", "-0.519504", "-0.610669", "-0.929526", "-0.196974", "-0.151608" ] ] self.test_instance = [] for td in self.test_data: self.test_instance.append(td) self.table_instance = self.data_to_table(self.test_instance) self.table_instance.schema['header'] = [ "fid" + str(i) for i in range(len(self.test_data[0])) ]
def setUp(self): session.init("test_least_abs_error_loss") self.lae_loss = LeastAbsoluteErrorLoss() self.y_list = [i % 2 for i in range(100)] self.predict_list = [random.random() for i in range(100)] self.y = session.parallelize(self.y_list, include_key=False, partition=16) self.predict = session.parallelize(self.predict_list, include_key=False, partition=16)
def clean_tables(self): from fate_arch.session import computing_session as session session.init(job_id=self.job_id) try: session.cleanup("*", self.job_id, True) except EnvironmentError: pass try: session.cleanup("*", self.job_id, False) except EnvironmentError: pass
def setUp(self): session.init("test_fair_loss") self.log_cosh_loss = LogCoshLoss() self.y_list = [i % 2 for i in range(100)] self.predict_list = [random.random() for i in range(100)] self.y = session.parallelize(self.y_list, include_key=False, partition=16) self.predict = session.parallelize(self.predict_list, include_key=False, partition=16)
def setUp(self): session.init("test_huber_loss") self.delta = 1 self.huber_loss = HuberLoss(self.delta) self.y_list = [i % 2 for i in range(100)] self.predict_list = [random.random() for i in range(100)] self.y = session.parallelize(self.y_list, include_key=False, partition=16) self.predict = session.parallelize(self.predict_list, include_key=False, partition=16)
def setUp(self): session.init("test_fair_loss") self.rho = 0.5 self.tweedie_loss = TweedieLoss(self.rho) self.y_list = [i % 2 for i in range(100)] self.predict_list = [random.random() for i in range(100)] self.y = session.parallelize(self.y_list, include_key=False, partition=16) self.predict = session.parallelize(self.predict_list, include_key=False, partition=16)
def setUp(self): session.init("test_sample_weight_" + str(uuid.uuid1())) self.class_weight = {"0": 2, "1": 3} data = [] for i in range(1, 11): label = 1 if i % 5 == 0 else 0 instance = Instance(inst_id=i, features=np.random.random(3), label=label) data.append((i, instance)) schema = {"header": ["x0", "x1", "x2"], "sid": "id", "label_name": "y"} self.table = session.parallelize(data, include_key=True, partition=8) self.table.schema = schema self.sample_weight_obj = SampleWeight()
def setUp(self): session.init("test_label_transform_" + str(uuid.uuid1())) self.label_encoder = {"yes": 1, "no": 0} self.predict_label_encoder = {1: "yes", 0: "no"} data = [] for i in range(1, 11): label = "yes" if i % 5 == 0 else "no" instance = Instance(inst_id=i, features=np.random.random(3), label=label) data.append((i, instance)) schema = {"header": ["x0", "x1", "x2"], "sid": "id", "label_name": "y"} self.table = session.parallelize(data, include_key=True, partition=8) self.table.schema = schema self.label_transformer_obj = LabelTransformer()
def setUp(self): session.init("test_stratified_sampler") self.data = [] self.data_to_trans = [] for i in range(1000): self.data.append((i, Instance(label=i % 4, features=i * i))) self.data_to_trans.append((i, Instance(features=i**3))) self.table = session.parallelize(self.data, include_key=True, partition=16) self.table_trans = session.parallelize(self.data_to_trans, include_key=True, partition=16)
def setUp(self): self.job_id = str(uuid.uuid1()) session.init("test_random_sampler_" + self.job_id) data_num = 100 feature_num = 8 self.prepare_data(data_num, feature_num) params = LocalBaselineParam() local_baseline_obj = LocalBaseline() local_baseline_obj._init_model(params) local_baseline_obj.need_run = True local_baseline_obj.header = ["x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8"] local_baseline_obj.model_name = "LogisticRegression" local_baseline_obj.model_opts = {} self.local_baseline_obj = local_baseline_obj
def setUp(self): session.init("123") self.data_num = 1000 self.feature_num = 200 final_result = [] for i in range(self.data_num): tmp = i * np.ones(self.feature_num) inst = Instance(inst_id=i, features=tmp, label=0) tmp = (str(i), inst) final_result.append(tmp) table = session.parallelize(final_result, include_key=True, partition=3) self.table = table
def setUp(self): session.init("test_label_checker") self.small_label_set = [Instance(label=i % 5) for i in range(100)] self.classify_inst = session.parallelize(self.small_label_set, include_key=False, partition=16) self.regression_label = [ Instance(label=random.random()) for i in range(100) ] self.regression_inst = session.parallelize(self.regression_label, partition=16, include_key=False) self.classify_checker = ClassifyLabelChecker() self.regression_checker = RegressionLabelChecker()
def setUp(self): self.job_id = str(uuid.uuid1()) session.init("test_random_sampler_" + self.job_id) self.data_splitter = data_split.DataSplitter() param_dict = { "random_state": 42, "test_size": 0.2, "train_size": 0.6, "validate_size": 0.2, "stratified": True, "shuffle": True, "split_points": [0.5, 0.2] } params = DataSplitParam(**param_dict) self.data_splitter._init_model(params)
def setUp(self): self.job_id = str(uuid.uuid1()) session.init("test_random_sampler_" + self.job_id) model = HeteroStepwise() model.__setattr__('role', consts.GUEST) model.__setattr__('fit_intercept', True) self.model = model data_num = 100 feature_num = 5 bool_list = [True, False, True, True, False] self.str_mask = "10110" self.header = ["x1", "x2", "x3", "x4", "x5"] self.mask = self.prepare_mask(bool_list) self.table = self.prepare_data(data_num, feature_num, self.header, "id", "y")
def setUp(self): self.test_data = [[0, 1.0, 10, 2, 3, 1], [1.0, 2, 9, 2, 4, 2], [0, 3.0, 8, 3, 3, 3], [1.0, 4, 7, 4, 4, 4], [1.0, 5, 6, 5, 5, 5], [1.0, 6, 5, 6, 6, -100], [0, 7.0, 4, 7, 7, 7], [0, 8, 3.0, 8, 6, 8], [0, 9, 2, 9.0, 9, 9], [0, 10, 1, 10.0, 10, 10]] str_time = time.strftime("%Y%m%d%H%M%S", time.localtime()) session.init(str_time) self.test_instance = [] for td in self.test_data: self.test_instance.append(Instance(features=np.array(td))) self.table_instance = self.data_to_table(self.test_instance) self.table_instance.schema['header'] = [ "fid" + str(i) for i in range(len(self.test_data[0])) ]
def setUp(self): session.init("test_dataio_" + str(random.random())) data1 = [("a", "1,2,-1,0,0,5"), ("b", "4,5,6,0,1,2")] schema = {"header": "x1,x2,x3,x4,x5,x6", "sid": "id"} self.table1 = session.parallelize(data1, include_key=True, partition=16) self.table1.schema = schema data2 = [("a", '-1,,na,null,null,2')] self.table2 = session.parallelize(data2, include_key=True, partition=16) self.table2.schema = schema self.dataset1 = {"data_io_0": {"data": self.table1}} self.dataset2 = {"data_io_1": {"data": self.table2}}
def sec_intermediate_result(self, ir_b, loc, sigma_a): """ 备注:此函数已经测试过了,正常运行 添加噪声 算法 ------------------ 1.根据当前批次的数据取出对应的ID 2.拼接ID和高斯噪声 3.调用API接口中的join()函数进行数据扰动 """ #这里的sec_result是一个列表,里面存取的都是元组,元组的第一项都是当前批次数据的ID,第二项便是高斯噪声 #这里的疑问点在于是否可以 #第一种添加噪声的方式 sec_result_1 = [] for ir_b_tuple_1 in ir_b.collect(): test_tuple_1 = (ir_b_tuple_1[0], np.random.normal(loc, sigma_a)) sec_result_1.append(test_tuple_1) # #第二种添加噪声的方式 # gaussian_noise = np.random.normal(loc, sigma_a, ir_b.count()) # gaussian_noise.tolist() # sec_result_2 = [] # first_data_id = ir_b.first()[0] # for ir_b_tuple_2 in ir_b.collect(): # test_tuple_2 = (ir_b_tuple_2[0],gaussian_noise[int(ir_b_tuple_2[0]) - int(first_data_id)]) # sec_result_2.append(test_tuple_2) #----------------------------------------------------------------- #将高斯噪声封装成Dtbale格式 computing_session.init(work_mode=0, backend=0, session_id="gaussian id") gaussian_noise = computing_session.parallelize(sec_result_1, partition=4, include_key=True) #扰动数据内积 sec_result = ir_b.join(gaussian_noise, lambda x, y: x + y) return sec_result
def sec_intermediate_result(self, ir_a, loc, sigma_b): """ parameters ir_a:是一个Dtable格式数据表,值为标量 ---------------------------- return 在ir_a的基础上添加噪声 ----------------------- 算法 同Host方,不再赘述 """ #第一种方法 sec_result_1 = [] for ir_a_tuple_1 in ir_a.collect(): test_tuple_1 = (ir_a_tuple_1[0], np.random.normal(loc, sigma_b)) sec_result_1.append(test_tuple_1) #第二种方法 # gaussian_noise = np.random.normal(loc, sigma_b, ir_a.count()) # gaussian_noise.tolist() # sec_result_2 = [] # first_data_id = ir_a.first()[0] # for ir_b_tuple_2 in ir_a.collect(): # test_tuple_2 = (ir_b_tuple_2[0], gaussian_noise[int(ir_b_tuple_2[0]) - int(first_data_id)]) # sec_result_2.append(test_tuple_2) # ----------------------------------------------------------------- # 将高斯噪声封装成Dtbale格式 computing_session.init(work_mode=0, backend=0, session_id="gaussian id") gaussian_noise = computing_session.parallelize(sec_result_1, partition=4, include_key=True) # 扰动数据内积 sec_result = ir_a.join(gaussian_noise, lambda x, y: x + y) return sec_result
def setUp(self): session.init('test', 0) print('generating dense tables') l1, l2 = [], [] col = [i for i in range(20)] for i in range(100): inst = Instance() inst.features = np.random.random(20) l1.append(inst) for i in range(1000): inst = Instance() inst.features = np.random.random(20) l2.append(inst) self.dense_table1, self.dense_table2 = session.parallelize(l1, partition=4, include_key=False), \ session.parallelize(l2, partition=4, include_key=False) self.dense_table1.schema['header'] = copy.deepcopy(col) self.dense_table2.schema['header'] = copy.deepcopy(col) print('generating done') print('generating sparse tables') l1, l2 = [], [] col = [i for i in range(20)] for i in range(100): inst = Instance() inst.features = SparseVector(indices=copy.deepcopy(col), data=list(np.random.random(20))) l1.append(inst) for i in range(1000): inst = Instance() inst.features = SparseVector(indices=copy.deepcopy(col), data=list(np.random.random(20))) l2.append(inst) self.sp_table1, self.sp_table2 = session.parallelize(l1, partition=4, include_key=False), \ session.parallelize(l2, partition=4, include_key=False) self.sp_table1.schema['header'] = copy.deepcopy(col) self.sp_table2.schema['header'] = copy.deepcopy(col) print('generating done')