class TestHeteroLogisticGradient(unittest.TestCase): def setUp(self): self.paillier_encrypt = PaillierEncrypt() self.paillier_encrypt.generate_key() # self.hetero_lr_gradient = HeteroLogisticGradient(self.paillier_encrypt) self.hetero_lr_gradient = hetero_lr_gradient_and_loss.Guest() size = 10 self.en_wx = session.parallelize([self.paillier_encrypt.encrypt(i) for i in range(size)], partition=48, include_key=False) # self.en_wx = session.parallelize([self.paillier_encrypt.encrypt(i) for i in range(size)]) self.en_sum_wx_square = session.parallelize([self.paillier_encrypt.encrypt(np.square(i)) for i in range(size)], partition=48, include_key=False) self.wx = np.array([i for i in range(size)]) self.w = self.wx / np.array([1 for _ in range(size)]) self.data_inst = session.parallelize( [Instance(features=np.array([1 for _ in range(size)]), label=pow(-1, i % 2)) for i in range(size)], partition=48, include_key=False) # test fore_gradient self.fore_gradient_local = [-0.5, 0.75, 0, 1.25, 0.5, 1.75, 1, 2.25, 1.5, 2.75] # test gradient self.gradient = [1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125] self.gradient_fit_intercept = [1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125] self.loss = 4.505647 def test_compute_partition_gradient(self): fore_gradient = self.en_wx.join(self.data_inst, lambda wx, d: 0.25 * wx - 0.5 * d.label) sparse_data = self._make_sparse_data() gradient_computer = hetero_linear_model_gradient.HeteroGradientBase() for fit_intercept in [True, False]: dense_result = gradient_computer.compute_gradient(self.data_inst, fore_gradient, fit_intercept) dense_result = [self.paillier_encrypt.decrypt(iterator) for iterator in dense_result] if fit_intercept: self.assertListEqual(dense_result, self.gradient_fit_intercept) else: self.assertListEqual(dense_result, self.gradient) sparse_result = gradient_computer.compute_gradient(sparse_data, fore_gradient, fit_intercept) sparse_result = [self.paillier_encrypt.decrypt(iterator) for iterator in sparse_result] self.assertListEqual(dense_result, sparse_result) def _make_sparse_data(self): def trans_sparse(instance): dense_features = instance.features indices = [i for i in range(len(dense_features))] sparse_features = SparseVector(indices=indices, data=dense_features, shape=len(dense_features)) return Instance(inst_id=None, features=sparse_features, label=instance.label) return self.data_inst.mapValues(trans_sparse)
def test_cipher_add_sub_mul(self): encrypter = PaillierEncrypt() encrypter.generate_key(1024) en_1, en_2, en_3, en_4 = encrypter.encrypt(1), encrypter.encrypt( 2), encrypter.encrypt(3), encrypter.encrypt(4) en_5, en_6, en_7, en_8 = encrypter.encrypt(5), encrypter.encrypt( 6), encrypter.encrypt(7), encrypter.encrypt(8) a = PackingCipherTensor([en_1, en_2, en_3, en_4]) b = PackingCipherTensor([en_5, en_6, en_7, en_8]) c = PackingCipherTensor(encrypter.encrypt(1)) d = PackingCipherTensor([encrypter.encrypt(5)]) rs_1 = a + b rs_2 = b - a rs_3 = c + d rs_4 = 123 * c rs_5 = d * 456 rs_6 = a * 114 print(encrypter.recursive_decrypt(rs_1.ciphers)) print(encrypter.recursive_decrypt(rs_2.ciphers)) print(encrypter.recursive_decrypt(rs_3.ciphers)) print(encrypter.decrypt(rs_4.ciphers)) print(encrypter.decrypt(rs_5.ciphers)) print(encrypter.recursive_decrypt(rs_6.ciphers)) print('cipher test done') print('*' * 30)
class TestHeteroLogisticGradient(unittest.TestCase): def setUp(self): self.paillier_encrypt = PaillierEncrypt() self.paillier_encrypt.generate_key() self.hetero_lr_gradient = HeteroLogisticGradient(self.paillier_encrypt) size = 10 self.wx = eggroll.parallelize([self.paillier_encrypt.encrypt(i) for i in range(size)]) self.en_sum_wx_square = eggroll.parallelize([self.paillier_encrypt.encrypt(np.square(i)) for i in range(size)]) self.w = [i for i in range(size)] self.data_inst = eggroll.parallelize( [Instance(features=[1 for _ in range(size)], label=pow(-1, i % 2)) for i in range(size)], partition=1) # test fore_gradient self.fore_gradient_local = [-0.5, 0.75, 0, 1.25, 0.5, 1.75, 1, 2.25, 1.5, 2.75] # test gradient self.gradient = [1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125] self.gradient_fit_intercept = [1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125] self.loss = 4.505647 def test_compute_fore_gradient(self): fore_gradient = self.hetero_lr_gradient.compute_fore_gradient(self.data_inst, self.wx) fore_gradient_local = [self.paillier_encrypt.decrypt(iterator[1]) for iterator in fore_gradient.collect()] self.assertListEqual(fore_gradient_local, self.fore_gradient_local) def test_compute_gradient(self): fore_gradient = self.hetero_lr_gradient.compute_fore_gradient(self.data_inst, self.wx) gradient = self.hetero_lr_gradient.compute_gradient(self.data_inst, fore_gradient, fit_intercept=False) de_gradient = [self.paillier_encrypt.decrypt(iterator) for iterator in gradient] self.assertListEqual(de_gradient, self.gradient) gradient = self.hetero_lr_gradient.compute_gradient(self.data_inst, fore_gradient, fit_intercept=True) de_gradient = [self.paillier_encrypt.decrypt(iterator) for iterator in gradient] self.assertListEqual(de_gradient, self.gradient_fit_intercept) def test_compute_gradient_and_loss(self): fore_gradient = self.hetero_lr_gradient.compute_fore_gradient(self.data_inst, self.wx) gradient, loss = self.hetero_lr_gradient.compute_gradient_and_loss(self.data_inst, fore_gradient, self.wx, self.en_sum_wx_square, False) de_gradient = [self.paillier_encrypt.decrypt(i) for i in gradient] self.assertListEqual(de_gradient, self.gradient) diff_loss = np.abs(self.loss - self.paillier_encrypt.decrypt(loss)) self.assertLess(diff_loss, 1e-5)
def test_diff_mode(self, round=10, mode="strict", re_encrypted_rate=0.2): from federatedml.secureprotol.encrypt_mode import EncryptModeCalculator from federatedml.secureprotol import PaillierEncrypt encrypter = PaillierEncrypt() encrypter.generate_key(1024) encrypted_calculator = EncryptModeCalculator(encrypter, mode, re_encrypted_rate) for i in range(round): data_i = self.data_numpy.mapValues(lambda v: v + i) data_i = encrypted_calculator.encrypt(data_i) decrypt_data_i = dict(data_i.mapValues(lambda arr: np.array([encrypter.decrypt(val) for val in arr])).collect()) for j in range(30): self.assertTrue(np.fabs(self.numpy_data[j] - decrypt_data_i[j] + i).all() < 1e-5)
class HeteroFeatureBinningGuest(BaseHeteroFeatureBinning): def __init__(self): super(HeteroFeatureBinningGuest, self).__init__() self.encryptor = PaillierEncrypt() self.encryptor.generate_key() self.local_transform_result = None self.party_name = consts.GUEST # self._init_binning_obj() def fit(self, data_instances): """ Apply binning method for both data instances in local party as well as the other one. Afterwards, calculate the specific metric value for specific columns. Currently, iv is support for binary labeled data only. """ LOGGER.info("Start feature binning fit and transform") self._abnormal_detection(data_instances) self._parse_cols(data_instances) self.binning_obj.fit_split_points(data_instances) LOGGER.debug("After fit, binning_obj split_points: {}".format( self.binning_obj.split_points)) is_binary_data = data_overview.is_binary_labels(data_instances) if not is_binary_data: LOGGER.warning("Iv is not supported for Multiple-label data.") # data_instances = self.fit_local(data_instances) return data_instances # 1. Synchronize encryption information self.__synchronize_encryption() # 2. Prepare labels data_instances = data_instances.mapValues(self.load_data) self.set_schema(data_instances) label_table = data_instances.mapValues(lambda x: x.label) # 3. Transfer encrypted label f = functools.partial(self.encrypt, encryptor=self.encryptor) encrypted_label_table = label_table.mapValues(f) # encrypted_label_table_id = self.transfer_variable.generate_transferid(self.transfer_variable.encrypted_label) self.transfer_variable.encrypted_label.remote(encrypted_label_table, role=consts.HOST, idx=0) # federation.remote(encrypted_label_table, name=self.transfer_variable.encrypted_label.name, # tag=encrypted_label_table_id, role=consts.HOST, idx=0) LOGGER.info("Sent encrypted_label_table to host") # 4. Calculates self's binning. In case the other party need time to compute its data, # do binning calculation at this point. data_instances = self.fit_local(data_instances, label_table) # 5. Received host result and calculate iv value encrypted_bin_sum = self.transfer_variable.encrypted_bin_sum.get(idx=0) LOGGER.info("Get encrypted_bin_sum from host") result_counts = self.__decrypt_bin_sum(encrypted_bin_sum) host_iv_attrs = self.binning_obj.cal_iv_woe( result_counts, self.model_param.adjustment_factor) # Support one host only in this version. Multiple host will be supported in the future. self.host_results[consts.HOST] = host_iv_attrs self.set_schema(data_instances) LOGGER.debug("Before transform, binning_obj split_points: {}".format( self.binning_obj.split_points)) self.transform(data_instances) LOGGER.info("Finish feature binning fit and transform") return self.data_output @staticmethod def encrypt(x, encryptor): return encryptor.encrypt(x), encryptor.encrypt(1 - x) def transform_local(self, data_instances, label_table=None): self._abnormal_detection(data_instances) self._parse_cols(data_instances) split_points = {} for col_name, iv_attr in self.binning_result.items(): split_points[col_name] = iv_attr.split_points self.local_transform_result = self.binning_obj.cal_local_iv( data_instances, split_points=split_points, label_table=label_table) for col_name, col_index in self.local_transform_result.items(): LOGGER.info("The local feature {} 's iv is {}".format( col_name, self.local_transform_result[col_name].iv)) self.set_schema(data_instances) return data_instances def __synchronize_encryption(self): pub_key = self.encryptor.get_public_key() # pubkey_id = self.transfer_variable.generate_transferid(self.transfer_variable.paillier_pubkey) self.transfer_variable.paillier_pubkey.remote(pub_key, role=consts.HOST, idx=0) """ federation.remote(pub_key, name=self.transfer_variable.paillier_pubkey.name, tag=pubkey_id, role=consts.HOST, idx=0) """ LOGGER.info("send pubkey to host") self.has_synchronized = True def __decrypt_bin_sum(self, encrypted_bin_sum): # for feature_sum in encrypted_bin_sum: for col_name, count_list in encrypted_bin_sum.items(): new_list = [] for encrypted_event, encrypted_non_event in count_list: event_count = self.encryptor.decrypt(encrypted_event) non_event_count = self.encryptor.decrypt(encrypted_non_event) new_list.append((event_count, non_event_count)) encrypted_bin_sum[col_name] = new_list return encrypted_bin_sum def fit_local(self, data_instances, label_table=None): self._abnormal_detection(data_instances) self._parse_cols(data_instances) iv_attrs = self.binning_obj.cal_local_iv(data_instances, label_table=label_table) self.binning_result = iv_attrs self.set_schema(data_instances) return data_instances @staticmethod def load_data(data_instance): # Here suppose this is a binary question and the event label is 1 if data_instance.label != 1: data_instance.label = 0 return data_instance
class HeteroFeatureBinningGuest(BaseHeteroFeatureBinning): def __init__(self, params: FeatureBinningParam): super(HeteroFeatureBinningGuest, self).__init__(params) self.encryptor = PaillierEncrypt() self.encryptor.generate_key() self.local_transform_result = None self.party_name = consts.GUEST self._init_binning_obj() def fit(self, data_instances): """ Apply binning method for both data instances in local party as well as the other one. Afterwards, calculate the specific metric value for specific columns. """ self._abnormal_detection(data_instances) self._parse_cols(data_instances) # 1. Synchronize encryption information self.__synchronize_encryption() # 2. Prepare labels data_instances = data_instances.mapValues(self.load_data) self.set_schema(data_instances) label_table = data_instances.mapValues(lambda x: x.label) # 3. Transfer encrypted label f = functools.partial(self.encrypt, encryptor=self.encryptor) encrypted_label_table = label_table.mapValues(f) encrypted_label_table_id = self.transfer_variable.generate_transferid( self.transfer_variable.encrypted_label) federation.remote(encrypted_label_table, name=self.transfer_variable.encrypted_label.name, tag=encrypted_label_table_id, role=consts.HOST, idx=0) LOGGER.info("Sent encrypted_label_table to host") # 4. Calculates self's binning. In case the other party need time to compute its data, # do binning calculation at this point. data_instances = self.fit_local(data_instances, label_table) # 5. Received host result and calculate iv value encrypted_bin_sum_id = self.transfer_variable.generate_transferid( self.transfer_variable.encrypted_bin_sum) encrypted_bin_sum = federation.get( name=self.transfer_variable.encrypted_bin_sum.name, tag=encrypted_bin_sum_id, idx=0) LOGGER.info("Get encrypted_bin_sum from host") result_counts = self.__decrypt_bin_sum(encrypted_bin_sum) host_iv_attrs = self.binning_obj.cal_iv_woe( result_counts, self.bin_param.adjustment_factor) # Support one host only in this version. Multiple host will be supported in the future. self.host_results[consts.HOST] = host_iv_attrs for cols_name, iv_attr in host_iv_attrs.items(): display_result = iv_attr.display_result( self.bin_param.display_result) LOGGER.info( "[Result][FeatureBinning][Host] feature {} 's result is : {}". format(cols_name, display_result)) self.set_schema(data_instances) return data_instances def transform(self, data_instances): self._abnormal_detection(data_instances) self._parse_cols(data_instances) # 1. Synchronize encryption information self.__synchronize_encryption() # 2. Prepare labels data_instances = data_instances.mapValues(self.load_data) label_table = data_instances.mapValues(lambda x: x.label) self.set_schema(data_instances) # 3. Transfer encrypted label f = functools.partial(self.encrypt, encryptor=self.encryptor) encrypted_label_table = label_table.mapValues(f) encrypted_label_table_id = self.transfer_variable.generate_transferid( self.transfer_variable.encrypted_label) federation.remote(encrypted_label_table, name=self.transfer_variable.encrypted_label.name, tag=encrypted_label_table_id, role=consts.HOST, idx=0) LOGGER.info("Sent encrypted_label_table to host for transform") # 4. Transform locally self.transform_local(data_instances, label_table=label_table, save_result=False) # 5. Received host result and calculate iv value encrypted_bin_sum_id = self.transfer_variable.generate_transferid( self.transfer_variable.encrypted_bin_sum) encrypted_bin_sum = federation.get( name=self.transfer_variable.encrypted_bin_sum.name, tag=encrypted_bin_sum_id, idx=0) result_counts = self.__decrypt_bin_sum(encrypted_bin_sum) host_iv_attrs = self.binning_obj.cal_iv_woe( result_counts, self.bin_param.adjustment_factor) # host_results = {'host1': host_iv_attrs} # self.save_model(name=self.bin_param.transform_table, # namespace=self.bin_param.result_namespace, # binning_result=self.local_transform_result, # host_results=host_results) for col_name, iv_attr in host_iv_attrs.items(): LOGGER.info("The remote feature {} 's iv is {}".format( col_name, iv_attr.iv)) self.set_schema(data_instances) return data_instances @staticmethod def encrypt(x, encryptor): return encryptor.encrypt(x), encryptor.encrypt(1 - x) def transform_local(self, data_instances, label_table=None, save_result=True): self._abnormal_detection(data_instances) self._parse_cols(data_instances) split_points = {} for col_name, iv_attr in self.binning_result.items(): split_points[col_name] = iv_attr.split_points self.local_transform_result = self.binning_obj.cal_local_iv( data_instances, split_points=split_points, label_table=label_table) if save_result: self.save_model(name=self.bin_param.transform_table, namespace=self.bin_param.result_namespace, binning_result=self.local_transform_result, host_results={}) for col_name, col_index in self.local_transform_result.items(): LOGGER.info("The local feature {} 's iv is {}".format( col_name, self.local_transform_result[col_name].iv)) self.set_schema(data_instances) return data_instances def __synchronize_encryption(self): pub_key = self.encryptor.get_public_key() pubkey_id = self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey) federation.remote(pub_key, name=self.transfer_variable.paillier_pubkey.name, tag=pubkey_id, role=consts.HOST, idx=0) LOGGER.info("send pubkey to host") self.has_synchronized = True def __decrypt_bin_sum(self, encrypted_bin_sum): # for feature_sum in encrypted_bin_sum: for col_name, count_list in encrypted_bin_sum.items(): new_list = [] for encrypted_event, encrypted_non_event in count_list: event_count = self.encryptor.decrypt(encrypted_event) non_event_count = self.encryptor.decrypt(encrypted_non_event) new_list.append((event_count, non_event_count)) encrypted_bin_sum[col_name] = new_list return encrypted_bin_sum def fit_local(self, data_instances, label_table=None): self._abnormal_detection(data_instances) self._parse_cols(data_instances) iv_attrs = self.binning_obj.cal_local_iv(data_instances, label_table=label_table) for col_name, iv_attr in iv_attrs.items(): display_result = iv_attr.display_result( self.bin_param.display_result) LOGGER.info( "[Result][FeatureBinning][Guest] feature {} 's result is : {}". format(col_name, display_result)) # LOGGER.info("[Result][FeatureBinning]The feature {} 's iv is {}".format(col_name, iv_attrs[col_name].iv)) self.binning_result = iv_attrs self.set_schema(data_instances) return data_instances @staticmethod def load_data(data_instance): # Here suppose this is a binary question and the event label is 1 if data_instance.label != 1: data_instance.label = 0 return data_instance
class HeteroFeatureBinningGuest(BaseHeteroFeatureBinning): def __init__(self, params: FeatureBinningParam): super(HeteroFeatureBinningGuest, self).__init__(params) self.encryptor = PaillierEncrypt() self.encryptor.generate_key() self.iv_attrs = None self.host_iv_attrs = None def fit(self, data_instances): """ Apply binning method for both data instances in local party as well as the other one. Afterwards, calculate the specific metric value for specific columns. """ self._abnormal_detection(data_instances) self._parse_cols(data_instances) # 1. Synchronize encryption information self.__synchronize_encryption() # 2. Prepare labels data_instances = data_instances.mapValues(self.load_data) label_table = data_instances.mapValues(lambda x: x.label) # 3. Transfer encrypted label f = functools.partial(self.encrypt, encryptor=self.encryptor) encrypted_label_table = label_table.mapValues(f) encrypted_label_table_id = self.transfer_variable.generate_transferid( self.transfer_variable.encrypted_label) federation.remote(encrypted_label_table, name=self.transfer_variable.encrypted_label.name, tag=encrypted_label_table_id, role=consts.HOST, idx=0) LOGGER.info("Sent encrypted_label_table to host") # 4. Calculates self's binning. In case the other party need time to compute its data, # do binning calculation at this point. local_iv = self.fit_local(data_instances, label_table) # 5. Received host result and calculate iv value encrypted_bin_sum_id = self.transfer_variable.generate_transferid( self.transfer_variable.encrypted_bin_sum) encrypted_bin_sum = federation.get( name=self.transfer_variable.encrypted_bin_sum.name, tag=encrypted_bin_sum_id, idx=0) LOGGER.info("Get encrypted_bin_sum from host") result_counts = self.__decrypt_bin_sum(encrypted_bin_sum) host_iv_attrs = self.binning_obj.cal_iv_woe( result_counts, self.bin_param.adjustment_factor) self.host_iv_attrs = host_iv_attrs # LOGGER.debug("Lenght of host iv attrs: {}".format(len(self.host_iv_attrs))) # for idx, col in enumerate(self.cols): # LOGGER.info("The local iv of {}th feature is {}".format(col, local_iv[idx].iv)) for idx, iv_attr in enumerate(host_iv_attrs): LOGGER.info("The remote iv of {}th measured feature is {}".format( idx, iv_attr.iv)) iv_result = {'local': local_iv, 'remote': host_iv_attrs} return iv_result def transform(self, data_instances): self._abnormal_detection(data_instances) self.header = data_instances.schema.get( 'header') # ['x1', 'x2', 'x3' ... ] self._parse_cols(data_instances) # 1. Synchronize encryption information self.__synchronize_encryption() # 2. Prepare labels data_instances = data_instances.mapValues(self.load_data) label_table = data_instances.mapValues(lambda x: x.label) # 3. Transfer encrypted label f = functools.partial(self.encrypt, encryptor=self.encryptor) encrypted_label_table = label_table.mapValues(f) encrypted_label_table_id = self.transfer_variable.generate_transferid( self.transfer_variable.encrypted_label) federation.remote(encrypted_label_table, name=self.transfer_variable.encrypted_label.name, tag=encrypted_label_table_id, role=consts.HOST, idx=0) LOGGER.info("Sent encrypted_label_table to host for transform") # 4. Transform locally self.transform_local(data_instances, reformated=True) # 5. Received host result and calculate iv value encrypted_bin_sum_id = self.transfer_variable.generate_transferid( self.transfer_variable.encrypted_bin_sum) encrypted_bin_sum = federation.get( name=self.transfer_variable.encrypted_bin_sum.name, tag=encrypted_bin_sum_id, idx=0) result_counts = self.__decrypt_bin_sum(encrypted_bin_sum) host_iv_attrs = self.binning_obj.cal_iv_woe( result_counts, self.bin_param.adjustment_factor) self.host_iv_attrs = host_iv_attrs for idx, iv_attr in enumerate(host_iv_attrs): LOGGER.info("The remote iv of {}th measured feature is {}".format( idx, iv_attr.iv)) data_instances.schema['header'] = self.header return data_instances @staticmethod def encrypt(x, encryptor): return encryptor.encrypt(x), encryptor.encrypt(1 - x) def transform_local(self, data_instances, reformated=False): self._abnormal_detection(data_instances) self._parse_cols(data_instances) if not reformated: # Reformat the label type data_instances = data_instances.mapValues(self.load_data) split_points = [] for iv_attr in self.iv_attrs: s_p = list(iv_attr.split_points) split_points.append(s_p) self.iv_attrs = self.binning_obj.cal_local_iv(data_instances, self.cols, split_points) for idx, col in enumerate(self.cols): LOGGER.info("The local iv of {}th feature is {}".format( col, self.iv_attrs[idx].iv)) def __synchronize_encryption(self): pub_key = self.encryptor.get_public_key() pubkey_id = self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey) # LOGGER.debug("pubkey_id is : {}".format(pubkey_id)) federation.remote(pub_key, name=self.transfer_variable.paillier_pubkey.name, tag=pubkey_id, role=consts.HOST, idx=0) LOGGER.info("send pubkey to host") self.has_synchronized = True def __decrypt_bin_sum(self, encrypted_bin_sum): for feature_sum in encrypted_bin_sum: for idx, (encrypted_event, encrypted_non_event) in enumerate(feature_sum): event_count = self.encryptor.decrypt(encrypted_event) non_event_count = self.encryptor.decrypt(encrypted_non_event) feature_sum[idx] = (event_count, non_event_count) return encrypted_bin_sum def fit_local(self, data_instances, label_table=None): self._abnormal_detection(data_instances) self._parse_cols(data_instances) iv_attrs = self.binning_obj.cal_local_iv(data_instances, self.cols, label_table=label_table) for idx, col in enumerate(self.cols): LOGGER.info("The local iv of {}th feature is {}".format( col, iv_attrs[idx].iv)) self.iv_attrs = iv_attrs return iv_attrs @staticmethod def load_data(data_instance): # Here suppose this is a binary question and the event label is 1 # LOGGER.debug('label type is {}'.format(type(data_instance.label))) if data_instance.label != 1: data_instance.label = 0 return data_instance
class TestHeteroLogisticGradient(unittest.TestCase): def setUp(self): self.paillier_encrypt = PaillierEncrypt() self.paillier_encrypt.generate_key() # self.hetero_lr_gradient = HeteroLogisticGradient(self.paillier_encrypt) self.hetero_lr_gradient = hetero_lr_gradient_and_loss.Guest() size = 10 self.wx = session.parallelize( [self.paillier_encrypt.encrypt(i) for i in range(size)]) self.en_sum_wx_square = session.parallelize( [self.paillier_encrypt.encrypt(np.square(i)) for i in range(size)]) self.w = [i for i in range(size)] self.data_inst = session.parallelize([ Instance(features=[1 for _ in range(size)], label=pow(-1, i % 2)) for i in range(size) ], partition=1) # test fore_gradient self.fore_gradient_local = [ -0.5, 0.75, 0, 1.25, 0.5, 1.75, 1, 2.25, 1.5, 2.75 ] # test gradient self.gradient = [ 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125 ] self.gradient_fit_intercept = [ 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125 ] self.loss = 4.505647 def test_compute_fore_gradient(self): # fore_gradient = self.hetero_lr_gradient.compute_and_aggregate_forwards(self.data_inst, self.wx) model_weights = LinearModelWeights(l=self.w, fit_intercept=False) class EncryptedCalculator(object): encrypter = self.paillier_encrypt def encrypt_row(self, row): return np.array([self.encrypter.encrypt(row)]) def encrypt(self, input_data): return input_data.mapValues(self.encrypt_row) encrypted_calculator = [EncryptedCalculator()] batch_index = 0 fore_gradient = self.hetero_lr_gradient.compute_and_aggregate_forwards( self.data_inst, model_weights, encrypted_calculator, batch_index) fore_gradient_local = [ self.paillier_encrypt.decrypt(iterator[1]) for iterator in fore_gradient.collect() ] self.assertListEqual(fore_gradient_local, self.fore_gradient_local) def test_compute_gradient(self): fore_gradient = self.hetero_lr_gradient.compute_fore_gradient( self.data_inst, self.wx) gradient = self.hetero_lr_gradient.compute_gradient( self.data_inst, fore_gradient, fit_intercept=False) de_gradient = [ self.paillier_encrypt.decrypt(iterator) for iterator in gradient ] self.assertListEqual(de_gradient, self.gradient) gradient = self.hetero_lr_gradient.compute_gradient(self.data_inst, fore_gradient, fit_intercept=True) de_gradient = [ self.paillier_encrypt.decrypt(iterator) for iterator in gradient ] self.assertListEqual(de_gradient, self.gradient_fit_intercept) def test_compute_gradient_and_loss(self): fore_gradient = self.hetero_lr_gradient.compute_fore_gradient( self.data_inst, self.wx) gradient, loss = self.hetero_lr_gradient.compute_gradient_and_loss( self.data_inst, fore_gradient, self.wx, self.en_sum_wx_square, False) de_gradient = [self.paillier_encrypt.decrypt(i) for i in gradient] self.assertListEqual(de_gradient, self.gradient) diff_loss = np.abs(self.loss - self.paillier_encrypt.decrypt(loss)) self.assertLess(diff_loss, 1e-5)