def select_backward_sample(self, selective_ids): cached_shape = self.input_cached.shape[0] offsets = [i + cached_shape for i in range(len(selective_ids))] id_map = dict(zip(selective_ids, offsets)) if cached_shape == 0: self.input_cached = (self.input.get_obj().filter( lambda k, v: k in id_map).map(lambda k, v: (id_map[k], v))) self.input_cached = PaillierTensor(tb_obj=self.input_cached) # selective_ids_tb = session.parallelize(zip(selective_ids, range(len(selective_ids))), include_key=True, # partition=self.input.partitions) # self.input_cached = self.input.get_obj().join(selective_ids_tb, lambda v1, v2: (v1, v2)) # self.input_cached = PaillierTensor(tb_obj=self.input_cached.map(lambda k, v: (v[1], v[0]))) self.activation_cached = self.activation_input[selective_ids] else: # selective_ids_tb = session.parallelize(zip(selective_ids, range(len(selective_ids))), include_key=True, # partition=self.input.partitions) # selective_input = self.input.get_obj().join(selective_ids_tb, lambda v1, v2: (v1, v2)) # pre_count = self.input_cached.shape[0] # selective_input = selective_input.map(lambda k, v: (v[1] + pre_count, v[0])) selective_input = (self.input.get_obj().filter( lambda k, v: k in id_map).map(lambda k, v: (id_map[k], v))) self.input_cached = PaillierTensor( tb_obj=self.input_cached.get_obj().union(selective_input)) self.activation_cached = np.vstack( (self.activation_cached, self.activation_input[selective_ids]))
def decrypt_guest_data(self, epoch_idx, local_round=-1): encrypted_consts = self.transfer_variable.guest_side_const.get(suffix=( epoch_idx, local_round, ), idx=0) grad_table = self.transfer_variable.guest_side_gradients.get(suffix=( epoch_idx, local_round, ), idx=0) inter_grad = PaillierTensor(tb_obj=grad_table, partitions=self.partitions) decrpyted_grad = inter_grad.decrypt(self.encrypter) decrypted_const = self.encrypter.recursive_decrypt(encrypted_consts) self.transfer_variable.decrypted_guest_const.remote(decrypted_const, suffix=( epoch_idx, local_round, )) self.transfer_variable.decrypted_guest_gradients.remote( decrpyted_grad.get_obj(), suffix=( epoch_idx, local_round, ))
def backward(self, epoch, batch): encrypted_guest_weight_gradient = self.get_guest_encrypted_weight_gradient_from_guest( epoch, batch) LOGGER.info("decrypt weight gradient of epoch {} batch {}".format( epoch, batch)) decrypted_guest_weight_gradient = self.encrypter.recursive_decrypt( encrypted_guest_weight_gradient) noise_weight_gradient = self.rng_generator.generate_random_number( (self.input_shape, self.output_unit)) decrypted_guest_weight_gradient += noise_weight_gradient / self.learning_rate self.send_guest_decrypted_weight_gradient_to_guest( decrypted_guest_weight_gradient, epoch, batch) LOGGER.info("encrypt acc_noise of epoch {} batch {}".format( epoch, batch)) encrypted_acc_noise = self.encrypter.recursive_encrypt(self.acc_noise) self.send_encrypted_acc_noise_to_guest(encrypted_acc_noise, epoch, batch) self.acc_noise += noise_weight_gradient host_input_gradient = PaillierTensor( tb_obj=self.get_host_backward_from_guest(epoch, batch)) host_input_gradient = host_input_gradient.decrypt( self.encrypter).numpy() return host_input_gradient
def setUp(self): session.init("test_paillier_tensor" + str(random.random()), 0) self.data1 = np.ones((1000, 10)) self.data2 = np.ones((1000, 10)) self.paillier_tensor1 = PaillierTensor(ori_data=self.data1, partitions=10) self.paillier_tensor2 = PaillierTensor(ori_data=self.data2, partitions=10)
def decrypt_host_data(self, epoch_idx, local_round=-1): inter_grad = self.transfer_variable.host_side_gradients.get( suffix=(epoch_idx, local_round, 'host_de_send'), idx=0) inter_grad_pt = PaillierTensor(tb_obj=inter_grad, partitions=self.partitions) self.transfer_variable.decrypted_host_gradients.remote( inter_grad_pt.decrypt(self.encrypter).get_obj(), suffix=(epoch_idx, local_round, 'host_de_get'))
def forward_interactive(self, encrypted_host_input, epoch, batch, train=True): LOGGER.info( "get encrypted dense output of host model of epoch {} batch {}". format(epoch, batch)) mask_table = None encrypted_dense_output = self.host_model.forward_dense( encrypted_host_input, self.fixed_point_encoder) if train: self._create_drop_out(encrypted_dense_output.shape) if self.drop_out: mask_table = self.drop_out.generate_mask_table() self.encrypted_host_dense_output = encrypted_dense_output if mask_table: encrypted_dense_output = encrypted_dense_output.select_columns( mask_table) guest_forward_noise = self.rng_generator.fast_generate_random_number( encrypted_dense_output.shape, encrypted_dense_output.partitions, keep_table=mask_table) if self.fixed_point_encoder: encrypted_dense_output += guest_forward_noise.encode( self.fixed_point_encoder) else: encrypted_dense_output += guest_forward_noise self.send_guest_encrypted_forward_output_with_noise_to_host( encrypted_dense_output.get_obj(), epoch, batch) if mask_table: self.send_interactive_layer_drop_out_table(mask_table, epoch, batch) LOGGER.info( "get decrypted dense output of host model of epoch {} batch {}". format(epoch, batch)) decrypted_dense_output = self.get_guest_decrypted_forward_from_host( epoch, batch) if mask_table: out = PaillierTensor( tb_obj=decrypted_dense_output) - guest_forward_noise out = out.get_obj().join(mask_table, self.expand_columns) return PaillierTensor(tb_obj=out) else: return PaillierTensor( tb_obj=decrypted_dense_output) - guest_forward_noise
def fast_generate_random_number(self, shape, partition=10, mixed_rate=MIXED_RATE, keep_table=None): if keep_table: tb = keep_table.mapValues(lambda keep_array: self.generate_random_number(keep=keep_array, mixed_rate=mixed_rate)) return PaillierTensor(tb_obj=tb) else: tb = computing_session.parallelize([None for _ in range(shape[0])], include_key=False, partition=partition) tb = tb.mapValues(lambda val: self.generate_random_number(shape[1:], mixed_rate=mixed_rate)) return PaillierTensor(tb_obj=tb)
def forward(self, guest_input, epoch=0, batch=0, train=True): LOGGER.info( "interactive layer start forward propagation of epoch {} batch {}". format(epoch, batch)) encrypted_host_input = PaillierTensor( tb_obj=self.get_host_encrypted_forward_from_host(epoch, batch)) if not self.partitions: self.partitions = encrypted_host_input.partitions self.encrypted_host_input = encrypted_host_input self.guest_input = guest_input if self.guest_model is None: LOGGER.info("building interactive layers' training model") self.host_input_shape = encrypted_host_input.shape[1] self.guest_input_shape = guest_input.shape[ 1] if guest_input is not None else 0 self.__build_model() if not self.sync_output_unit: self.sync_output_unit = True self.sync_interactive_layer_output_unit( self.host_model.output_shape[0]) host_output = self.forward_interactive(encrypted_host_input, epoch, batch, train) guest_output = self.guest_model.forward_dense(guest_input) if not self.guest_model.empty: dense_output_data = host_output + PaillierTensor( ori_data=guest_output, partitions=self.partitions) else: dense_output_data = host_output self.dense_output_data = dense_output_data self.guest_output = guest_output self.host_output = host_output LOGGER.info( "start to get interactive layer's activation output of epoch {} batch {}" .format(epoch, batch)) activation_out = self.host_model.forward_activation( self.dense_output_data.numpy()) LOGGER.info( "end to get interactive layer's activation output of epoch {} batch {}" .format(epoch, batch)) if train and self.drop_out: activation_out = self.drop_out.forward(activation_out) return activation_out
def exchange_components(self, comp_to_send, epoch_idx): """ send guest components and get host components """ if self.mode == 'encrypted': comp_to_send = self.encrypt_tensor(comp_to_send) # sending [y_overlap_2_phi_2, y_overlap_phi, mapping_comp_a] self.transfer_variable.y_overlap_2_phi_2.remote(comp_to_send[0], suffix=(epoch_idx, )) self.transfer_variable.y_overlap_phi.remote(comp_to_send[1], suffix=(epoch_idx, )) self.transfer_variable.mapping_comp_a.remote(comp_to_send[2], suffix=(epoch_idx, )) # receiving [overlap_ub, overlap_ub_2, mapping_comp_b] overlap_ub = self.transfer_variable.overlap_ub.get(idx=0, suffix=(epoch_idx, )) overlap_ub_2 = self.transfer_variable.overlap_ub_2.get(idx=0, suffix=(epoch_idx, )) mapping_comp_b = self.transfer_variable.mapping_comp_b.get(idx=0, suffix=(epoch_idx, )) host_components = [overlap_ub, overlap_ub_2, mapping_comp_b] if self.mode == 'encrypted': host_paillier_tensors = [PaillierTensor(tb_obj=tb, partitions=self.partitions) for tb in host_components] return host_paillier_tensors else: return host_components
def decrypt_inter_result(self, loss_grad_b, epoch_idx, local_round=-1): rand_0 = PaillierTensor( ori_data=self.rng_generator.generate_random_number( loss_grad_b.shape), partitions=self.partitions) grad_a_overlap = loss_grad_b + rand_0 self.transfer_variable.host_side_gradients.remote( grad_a_overlap.get_obj(), suffix=(epoch_idx, local_round, 'host_de_send')) de_loss_grad_b = self.transfer_variable.decrypted_host_gradients\ .get(suffix=(epoch_idx, local_round, 'host_de_get'), idx=0) de_loss_grad_b = PaillierTensor(tb_obj=de_loss_grad_b, partitions=self.partitions) - rand_0 return de_loss_grad_b
def forward_interactive(self, encrypted_host_input, epoch, batch): LOGGER.info( "get encrypted dense output of host model of epoch {} batch {}". format(epoch, batch)) encrypted_dense_output = self.host_model.forward_dense( encrypted_host_input) self.encrypted_host_dense_output = encrypted_dense_output guest_forward_noise = self.rng_generator.fast_generate_random_number( encrypted_dense_output.shape, encrypted_dense_output.partitions) encrypted_dense_output += guest_forward_noise self.send_guest_encrypted_forward_output_with_noise_to_host( encrypted_dense_output.get_obj(), epoch, batch) LOGGER.info( "get decrypted dense output of host model of epoch {} batch {}". format(epoch, batch)) decrypted_dense_output = self.get_guest_decrypted_forward_from_host( epoch, batch) return PaillierTensor( tb_obj=decrypted_dense_output) - guest_forward_noise
def exchange_components(self, comp_to_send, epoch_idx): """ compute host components and sent to guest """ if self.mode == 'encrypted': comp_to_send = self.encrypt_tensor(comp_to_send) # receiving guest components y_overlap_2_phi_2 = self.transfer_variable.y_overlap_2_phi_2.get( idx=0, suffix=(epoch_idx, )) y_overlap_phi = self.transfer_variable.y_overlap_phi.get( idx=0, suffix=(epoch_idx, )) mapping_comp_a = self.transfer_variable.mapping_comp_a.get( idx=0, suffix=(epoch_idx, )) guest_components = [y_overlap_2_phi_2, y_overlap_phi, mapping_comp_a] # sending host components self.transfer_variable.overlap_ub.remote(comp_to_send[0], suffix=(epoch_idx, )) self.transfer_variable.overlap_ub_2.remote(comp_to_send[1], suffix=(epoch_idx, )) self.transfer_variable.mapping_comp_b.remote(comp_to_send[2], suffix=(epoch_idx, )) if self.mode == 'encrypted': guest_paillier_tensors = [ PaillierTensor(tb_obj=tb, partitions=self.partitions) for tb in guest_components ] return guest_paillier_tensors else: return guest_components
def encrypt_tensor(self, components, return_dtable=True): """ transform numpy array into Paillier tensor and encrypt """ if len(self.encrypt_calculators) == 0: self.encrypt_calculators = [self.generated_encrypted_calculator() for i in range(3)] encrypted_tensors = [] for comp, calculator in zip(components, self.encrypt_calculators): encrypted_tensor = PaillierTensor(ori_data=comp, partitions=self.partitions) if return_dtable: encrypted_tensors.append(encrypted_tensor.encrypt(calculator).get_obj()) else: encrypted_tensors.append(encrypted_tensor.encrypt(calculator)) return encrypted_tensors
def fast_generate_random_number(self, shape, partition=10): tb = session.parallelize([None for i in range(shape[0])], include_key=False, partition=partition) tb = tb.mapValues(lambda val: self.generate_random_number(shape[1:])) return PaillierTensor(tb_obj=tb)
def compute_backward_gradients(self, host_components, data_loader: FTLDataLoader, epoch_idx, local_round=-1): """ compute backward gradients using host components """ # they are Paillier tensors or np array overlap_ub, overlap_ub_2, mapping_comp_b = host_components[0], host_components[1], host_components[2] y_overlap_2_phi = np.expand_dims(self.overlap_y_2 * self.phi, axis=1) if self.mode == 'plain': loss_grads_const_part1 = 0.25 * np.squeeze(np.matmul(y_overlap_2_phi, overlap_ub_2), axis=1) loss_grads_const_part2 = self.overlap_y * overlap_ub const = np.sum(loss_grads_const_part1, axis=0) - 0.5 * np.sum(loss_grads_const_part2, axis=0) grad_a_nonoverlap = self.alpha * const * data_loader.y[data_loader.get_non_overlap_indexes()] / self.data_num grad_a_overlap = self.alpha * const * self.overlap_y / self.data_num + mapping_comp_b return np.concatenate([grad_a_overlap, grad_a_nonoverlap], axis=0) elif self.mode == 'encrypted': loss_grads_const_part1 = overlap_ub_2.matmul_3d(0.25 * y_overlap_2_phi, multiply='right') loss_grads_const_part1 = loss_grads_const_part1.squeeze(axis=1) if self.overlap_y_pt is None: self.overlap_y_pt = PaillierTensor(self.overlap_y, partitions=self.partitions) loss_grads_const_part2 = overlap_ub * self.overlap_y_pt encrypted_const = loss_grads_const_part1.reduce_sum() - 0.5 * loss_grads_const_part2.reduce_sum() grad_a_overlap = self.overlap_y_pt.map_ndarray_product((self.alpha/self.data_num * encrypted_const)) + mapping_comp_b const, grad_a_overlap = self.decrypt_inter_result(encrypted_const, grad_a_overlap, epoch_idx=epoch_idx , local_round=local_round) self.decrypt_host_data(epoch_idx, local_round=local_round) grad_a_nonoverlap = self.alpha * const * data_loader.y[data_loader.get_non_overlap_indexes()]/self.data_num return np.concatenate([grad_a_overlap.numpy(), grad_a_nonoverlap], axis=0)
def backward(self, epoch, batch): do_backward = True selective_ids = [] if self.do_backward_select_strategy: selective_ids, do_backward = self.sync_backward_select_info( epoch, batch) if not do_backward: return [], selective_ids encrypted_guest_weight_gradient = self.get_guest_encrypted_weight_gradient_from_guest( epoch, batch) LOGGER.info("decrypt weight gradient of epoch {} batch {}".format( epoch, batch)) decrypted_guest_weight_gradient = self.encrypter.recursive_decrypt( encrypted_guest_weight_gradient) noise_weight_gradient = self.rng_generator.generate_random_number( (self.input_shape, self.output_unit)) decrypted_guest_weight_gradient += noise_weight_gradient / self.learning_rate self.send_guest_decrypted_weight_gradient_to_guest( decrypted_guest_weight_gradient, epoch, batch) LOGGER.info("encrypt acc_noise of epoch {} batch {}".format( epoch, batch)) encrypted_acc_noise = self.encrypter.recursive_encrypt(self.acc_noise) self.send_encrypted_acc_noise_to_guest(encrypted_acc_noise, epoch, batch) self.acc_noise += noise_weight_gradient host_input_gradient = PaillierTensor( tb_obj=self.get_host_backward_from_guest(epoch, batch)) host_input_gradient = host_input_gradient.decrypt(self.encrypter) if self.fixed_point_encoder: host_input_gradient = host_input_gradient.decode( self.fixed_point_encoder).numpy() else: host_input_gradient = host_input_gradient.numpy() return host_input_gradient, selective_ids
def decrypt_inter_result(self, encrypted_const, grad_a_overlap, epoch_idx, local_round=-1): """ add random mask to encrypted inter-result, get decrypted data from host add subtract random mask """ rand_0 = self.rng_generator.generate_random_number( encrypted_const.shape) encrypted_const = encrypted_const + rand_0 rand_1 = PaillierTensor( ori_data=self.rng_generator.generate_random_number( grad_a_overlap.shape), partitions=self.partitions) grad_a_overlap = grad_a_overlap + rand_1 self.transfer_variable.guest_side_const.remote(encrypted_const, suffix=( epoch_idx, local_round, )) self.transfer_variable.guest_side_gradients.remote( grad_a_overlap.get_obj(), suffix=( epoch_idx, local_round, )) const = self.transfer_variable.decrypted_guest_const.get(suffix=( epoch_idx, local_round, ), idx=0) grad = self.transfer_variable.decrypted_guest_gradients.get(suffix=( epoch_idx, local_round, ), idx=0) const = const - rand_0 grad_a_overlap = PaillierTensor(tb_obj=grad, partitions=self.partitions) - rand_1 return const, grad_a_overlap
def update_host(self, activation_gradient, weight_gradient, acc_noise): activation_gradient_tensor = PaillierTensor( ori_data=activation_gradient, partitions=self.partitions) input_gradient = self.host_model.get_input_gradient( activation_gradient_tensor, acc_noise) # input_gradient = self.host_model.get_input_gradient(activation_gradient, acc_noise) self.host_model.update_weight(weight_gradient) self.host_model.update_bias(activation_gradient) return input_gradient
def forward(self, host_input, epoch=0, batch=0): if batch >= len(self.train_encrypted_calculator): self.train_encrypted_calculator.append( self.generated_encrypted_calculator()) LOGGER.info( "forward propagation: encrypt host_bottom_output of epoch {} batch {}" .format(epoch, batch)) host_input = PaillierTensor(ori_data=host_input, partitions=self.partitions) encrypted_host_input = host_input.encrypt( self.train_encrypted_calculator[batch]) self.send_host_encrypted_forward_to_guest( encrypted_host_input.get_obj(), epoch, batch) encrypted_guest_forward = PaillierTensor( tb_obj=self.get_guest_encrypted_forwrad_from_guest(epoch, batch)) decrypted_guest_forward = encrypted_guest_forward.decrypt( self.encrypter) if self.acc_noise is None: self.input_shape = host_input.shape[1] self.output_unit = encrypted_guest_forward.shape[1] self.acc_noise = np.zeros((self.input_shape, self.output_unit)) """some bugs here""" decrypted_guest_forward_with_noise = decrypted_guest_forward + host_input * self.acc_noise self.send_decrypted_guest_forward_with_noise_to_guest( decrypted_guest_forward_with_noise.get_obj(), epoch, batch)
def forward(self, host_input, epoch=0, batch=0, train=True): if batch >= len(self.train_encrypted_calculator): self.train_encrypted_calculator.append( self.generated_encrypted_calculator()) LOGGER.info( "forward propagation: encrypt host_bottom_output of epoch {} batch {}" .format(epoch, batch)) host_input = PaillierTensor(ori_data=host_input, partitions=self.partitions) encrypted_host_input = host_input.encrypt( self.train_encrypted_calculator[batch]) self.send_host_encrypted_forward_to_guest( encrypted_host_input.get_obj(), epoch, batch) encrypted_guest_forward = PaillierTensor( tb_obj=self.get_guest_encrypted_forwrad_from_guest(epoch, batch)) decrypted_guest_forward = encrypted_guest_forward.decrypt( self.encrypter) if self.fixed_point_encoder: decrypted_guest_forward = decrypted_guest_forward.decode( self.fixed_point_encoder) if self.acc_noise is None: self.input_shape = host_input.shape[1] self.output_unit = self.get_interactive_layer_output_unit() self.acc_noise = np.zeros((self.input_shape, self.output_unit)) mask_table = None if train and self.drop_out_keep_rate and self.drop_out_keep_rate < 1: mask_table = self.get_interactive_layer_drop_out_table( epoch, batch) if mask_table: decrypted_guest_forward_with_noise = decrypted_guest_forward + ( host_input * self.acc_noise).select_columns(mask_table) self.mask_table = mask_table else: decrypted_guest_forward_with_noise = decrypted_guest_forward + ( host_input * self.acc_noise) self.send_decrypted_guest_forward_with_noise_to_guest( decrypted_guest_forward_with_noise.get_obj(), epoch, batch)
class TestPaillierTensor(unittest.TestCase): def setUp(self): session.init(str(random.randint(0, 10000000)), 0) self.data1 = np.ones((1000, 10)) self.data2 = np.ones((1000, 10)) self.paillier_tensor1 = PaillierTensor(ori_data=self.data1, partitions=10) self.paillier_tensor2 = PaillierTensor(ori_data=self.data2, partitions=10) def test_tensor_add(self): paillier_tensor = self.paillier_tensor1 + self.paillier_tensor2 self.assertTrue(isinstance(paillier_tensor, PaillierTensor)) self.assertTrue(paillier_tensor.shape == self.paillier_tensor1.shape) arr = paillier_tensor.numpy() self.assertTrue(abs(arr.sum() - 20000) < consts.FLOAT_ZERO) def test_ndarray_add(self): paillier_tensor = self.paillier_tensor1 + np.ones(10) self.assertTrue(isinstance(paillier_tensor, PaillierTensor)) self.assertTrue(paillier_tensor.shape == self.paillier_tensor1.shape) arr = paillier_tensor.numpy() self.assertTrue(abs(arr.sum() - 20000) < consts.FLOAT_ZERO) def test_tensor_sub(self): paillier_tensor = self.paillier_tensor1 - self.paillier_tensor2 self.assertTrue(isinstance(paillier_tensor, PaillierTensor)) self.assertTrue(paillier_tensor.shape == self.paillier_tensor1.shape) arr = paillier_tensor.numpy() self.assertTrue(abs(arr.sum()) < consts.FLOAT_ZERO) def test_tensor_sub(self): paillier_tensor = self.paillier_tensor1 - np.ones(10) self.assertTrue(isinstance(paillier_tensor, PaillierTensor)) self.assertTrue(paillier_tensor.shape == self.paillier_tensor1.shape) arr = paillier_tensor.numpy() self.assertTrue(abs(arr.sum()) < consts.FLOAT_ZERO) def test_constant_mul(self): paillier_tensor = self.paillier_tensor1 * 10 self.assertTrue(isinstance(paillier_tensor, PaillierTensor)) self.assertTrue(paillier_tensor.shape == self.paillier_tensor1.shape) arr = paillier_tensor.numpy() self.assertTrue(abs(arr.sum() - 100000) < consts.FLOAT_ZERO) def test_inverse(self): paillier_tensor = self.paillier_tensor2.T self.assertTrue(isinstance(paillier_tensor, PaillierTensor)) self.assertTrue(paillier_tensor.shape == tuple([10, 1000])) def test_get_partition(self): self.assertTrue(self.paillier_tensor1.partitions == 10) def test_mean(self): self.assertTrue(abs(self.paillier_tensor1.mean() - 1.0) < consts.FLOAT_ZERO) def test_encrypt_and_decrypt(self): from federatedml.secureprotol import PaillierEncrypt from federatedml.secureprotol.encrypt_mode import EncryptModeCalculator encrypter = PaillierEncrypt() encrypter.generate_key(1024) encrypted_calculator = EncryptModeCalculator(encrypter, "fast") encrypter_tensor = self.paillier_tensor1.encrypt(encrypted_calculator) decrypted_tensor = encrypter_tensor.decrypt(encrypter) self.assertTrue(isinstance(encrypter_tensor, PaillierTensor)) self.assertTrue(isinstance(decrypted_tensor, PaillierTensor)) arr = decrypted_tensor.numpy() self.assertTrue(abs(arr.sum() - 10000) < consts.FLOAT_ZERO)
class FTLGuest(FTL): def __init__(self): super(FTLGuest, self).__init__() self.phi = None # Φ_A self.phi_product = None # (Φ_A)‘(Φ_A) [feature_dim, feature_dim] self.overlap_y = None # y_i ∈ N_c self.overlap_y_2 = None # (y_i ∈ N_c )^2 self.overlap_ua = None # u_i ∈ N_AB self.constant_k = None # κ self.feat_dim = None # output feature dimension self.send_components = None # components to send self.convergence = None self.overlap_y_pt = None # paillier tensor self.history_loss = [] # list to record history loss self.role = consts.GUEST def init_intersect_obj(self): intersect_obj = intersect_guest.RsaIntersectionGuest() intersect_obj.guest_party_id = self.component_properties.local_partyid intersect_obj.host_party_id_list = self.component_properties.host_party_idlist intersect_obj.load_params(self.intersect_param) LOGGER.debug('intersect done') return intersect_obj def check_convergence(self, loss): LOGGER.info("check convergence") if self.convergence is None: self.convergence = converge_func_factory("diff", self.tol) return self.convergence.is_converge(loss) def compute_phi_and_overlap_ua(self, data_loader: FTLDataLoader): """ compute Φ and ua of overlap samples """ phi = None # [1, feature_dim] Φ_A overlap_ua = [] for i in range(len(data_loader)): batch_x, batch_y = data_loader[i] ua_batch = self.nn.predict(batch_x) # [batch_size, feature_dim] relative_overlap_index = data_loader.get_relative_overlap_index(i) if len(relative_overlap_index) != 0: if self.verbose: LOGGER.debug('batch {}/{} overlap index is {}'.format(i, len(data_loader), relative_overlap_index)) overlap_ua.append(ua_batch[relative_overlap_index]) phi_tmp = np.expand_dims(np.sum(batch_y * ua_batch, axis=0), axis=0) if phi is None: phi = phi_tmp else: phi += phi_tmp phi = phi / self.data_num return phi, overlap_ua def batch_compute_components(self, data_loader: FTLDataLoader): """ compute guest components """ phi, overlap_ua = self.compute_phi_and_overlap_ua(data_loader) # Φ_A [1, feature_dim] phi_product = np.matmul(phi.transpose(), phi) # (Φ_A)‘(Φ_A) [feature_dim, feature_dim] if self.overlap_y is None: self.overlap_y = data_loader.get_overlap_y() # {C(y)=y} [1, feat_dim] if self.overlap_y_2 is None: self.overlap_y_2 = self.overlap_y * self.overlap_y # {D(y)=y^2} # [1, feat_dim] overlap_ua = np.concatenate(overlap_ua, axis=0) # [overlap_num, feat_dim] # 3 components will be sent to host y_overlap_2_phi_2 = 0.25 * np.expand_dims(self.overlap_y_2, axis=2) * phi_product y_overlap_phi = -0.5 * self.overlap_y * phi mapping_comp_a = -overlap_ua * self.constant_k return phi, phi_product, overlap_ua, [y_overlap_2_phi_2, y_overlap_phi, mapping_comp_a] def exchange_components(self, comp_to_send, epoch_idx): """ send guest components and get host components """ if self.mode == 'encrypted': comp_to_send = self.encrypt_tensor(comp_to_send) # sending [y_overlap_2_phi_2, y_overlap_phi, mapping_comp_a] self.transfer_variable.y_overlap_2_phi_2.remote(comp_to_send[0], suffix=(epoch_idx, )) self.transfer_variable.y_overlap_phi.remote(comp_to_send[1], suffix=(epoch_idx, )) self.transfer_variable.mapping_comp_a.remote(comp_to_send[2], suffix=(epoch_idx, )) # receiving [overlap_ub, overlap_ub_2, mapping_comp_b] overlap_ub = self.transfer_variable.overlap_ub.get(idx=0, suffix=(epoch_idx, )) overlap_ub_2 = self.transfer_variable.overlap_ub_2.get(idx=0, suffix=(epoch_idx, )) mapping_comp_b = self.transfer_variable.mapping_comp_b.get(idx=0, suffix=(epoch_idx, )) host_components = [overlap_ub, overlap_ub_2, mapping_comp_b] if self.mode == 'encrypted': host_paillier_tensors = [PaillierTensor(tb_obj=tb, partitions=self.partitions) for tb in host_components] return host_paillier_tensors else: return host_components def decrypt_inter_result(self, encrypted_const, grad_a_overlap, epoch_idx, local_round=-1): """ add random mask to encrypted inter-result, get decrypted data from host add subtract random mask """ rand_0 = self.rng_generator.generate_random_number(encrypted_const.shape) encrypted_const = encrypted_const + rand_0 rand_1 = PaillierTensor(ori_data=self.rng_generator.generate_random_number(grad_a_overlap.shape), partitions=self.partitions) grad_a_overlap = grad_a_overlap + rand_1 self.transfer_variable.guest_side_const.remote(encrypted_const, suffix=(epoch_idx, local_round,)) self.transfer_variable.guest_side_gradients.remote(grad_a_overlap.get_obj(), suffix=(epoch_idx, local_round,)) const = self.transfer_variable.decrypted_guest_const.get(suffix=(epoch_idx, local_round, ), idx=0) grad = self.transfer_variable.decrypted_guest_gradients.get(suffix=(epoch_idx, local_round, ), idx=0) const = const - rand_0 grad_a_overlap = PaillierTensor(tb_obj=grad, partitions=self.partitions) - rand_1 return const, grad_a_overlap def decrypt_host_data(self, epoch_idx, local_round=-1): inter_grad = self.transfer_variable.host_side_gradients.get(suffix=(epoch_idx, local_round, 'host_de_send'), idx=0) inter_grad_pt = PaillierTensor(tb_obj=inter_grad, partitions=self.partitions) self.transfer_variable.decrypted_host_gradients.remote(inter_grad_pt.decrypt(self.encrypter).get_obj(), suffix=(epoch_idx, local_round, 'host_de_get')) def decrypt_loss_val(self, encrypted_loss, epoch_idx): self.transfer_variable.encrypted_loss.remote(encrypted_loss, suffix=(epoch_idx, 'send_loss')) decrypted_loss = self.transfer_variable.decrypted_loss.get(idx=0, suffix=(epoch_idx, 'get_loss')) return decrypted_loss def compute_backward_gradients(self, host_components, data_loader: FTLDataLoader, epoch_idx, local_round=-1): """ compute backward gradients using host components """ # they are Paillier tensors or np array overlap_ub, overlap_ub_2, mapping_comp_b = host_components[0], host_components[1], host_components[2] y_overlap_2_phi = np.expand_dims(self.overlap_y_2 * self.phi, axis=1) if self.mode == 'plain': loss_grads_const_part1 = 0.25 * np.squeeze(np.matmul(y_overlap_2_phi, overlap_ub_2), axis=1) loss_grads_const_part2 = self.overlap_y * overlap_ub const = np.sum(loss_grads_const_part1, axis=0) - 0.5 * np.sum(loss_grads_const_part2, axis=0) grad_a_nonoverlap = self.alpha * const * data_loader.y[data_loader.get_non_overlap_indexes()] / self.data_num grad_a_overlap = self.alpha * const * self.overlap_y / self.data_num + mapping_comp_b return np.concatenate([grad_a_overlap, grad_a_nonoverlap], axis=0) elif self.mode == 'encrypted': loss_grads_const_part1 = overlap_ub_2.matmul_3d(0.25 * y_overlap_2_phi, multiply='right') loss_grads_const_part1 = loss_grads_const_part1.squeeze(axis=1) if self.overlap_y_pt is None: self.overlap_y_pt = PaillierTensor(self.overlap_y, partitions=self.partitions) loss_grads_const_part2 = overlap_ub * self.overlap_y_pt encrypted_const = loss_grads_const_part1.reduce_sum() - 0.5 * loss_grads_const_part2.reduce_sum() grad_a_overlap = self.overlap_y_pt.map_ndarray_product((self.alpha/self.data_num * encrypted_const)) + mapping_comp_b const, grad_a_overlap = self.decrypt_inter_result(encrypted_const, grad_a_overlap, epoch_idx=epoch_idx , local_round=local_round) self.decrypt_host_data(epoch_idx, local_round=local_round) grad_a_nonoverlap = self.alpha * const * data_loader.y[data_loader.get_non_overlap_indexes()]/self.data_num return np.concatenate([grad_a_overlap.numpy(), grad_a_nonoverlap], axis=0) def compute_loss(self, host_components, epoch_idx, overlap_num): """ compute training loss """ overlap_ub, overlap_ub_2, mapping_comp_b = host_components[0], host_components[1], host_components[2] if self.mode == 'plain': loss_overlap = np.sum((-self.overlap_ua * self.constant_k) * overlap_ub) ub_phi = np.matmul(overlap_ub, self.phi.transpose()) part1 = -0.5*np.sum(self.overlap_y*ub_phi) part2 = 1.0/8*np.sum(ub_phi * ub_phi) part3 = len(self.overlap_y)*np.log(2) loss_y = part1 + part2 + part3 return self.alpha * (loss_y/overlap_num) + loss_overlap/overlap_num elif self.mode == 'encrypted': loss_overlap = overlap_ub.element_wise_product((-self.overlap_ua*self.constant_k)) sum = np.sum(loss_overlap.reduce_sum()) ub_phi = overlap_ub.T.fast_matmul_2d(self.phi.transpose()) part1 = -0.5 * np.sum((self.overlap_y * ub_phi)) ub_2 = overlap_ub_2.reduce_sum() enc_phi_uB_2_phi = np.matmul(np.matmul(self.phi, ub_2), self.phi.transpose()) part2 = 1/8 * np.sum(enc_phi_uB_2_phi) part3 = len(self.overlap_y)*np.log(2) loss_y = part1 + part2 + part3 en_loss = (self.alpha/self.overlap_num) * loss_y + sum / overlap_num loss_val = self.decrypt_loss_val(en_loss, epoch_idx) return loss_val @staticmethod def sigmoid(x): return np.array(list(map(sigmoid, x))) def generate_summary(self): summary = {'loss_history': self.history_loss, "best_iteration": -1 if self.validation_strategy is None else self.validation_strategy.best_iteration} if self.validation_strategy: summary['validation_metrics'] = self.validation_strategy.summary() return summary def check_host_number(self): host_num = len(self.component_properties.host_party_idlist) LOGGER.info('host number is {}'.format(host_num)) if host_num != 1: raise ValueError('only 1 host party is allowed') def fit(self, data_inst, validate_data=None): LOGGER.debug('in training, partitions is {}'.format(data_inst.partitions)) LOGGER.info('start to fit a ftl model, ' 'run mode is {},' 'communication efficient mode is {}'.format(self.mode, self.comm_eff)) self.check_host_number() data_loader, self.x_shape, self.data_num, self.overlap_num = self.prepare_data(self.init_intersect_obj(), data_inst, guest_side=True) self.input_dim = self.x_shape[0] # cache data_loader for faster validation self.cache_dataloader[self.get_dataset_key(data_inst)] = data_loader self.partitions = data_inst.partitions LOGGER.debug('self partitions is {}'.format(self.partitions)) self.initialize_nn(input_shape=self.x_shape) self.feat_dim = self.nn._model.output_shape[1] self.constant_k = 1 / self.feat_dim self.validation_strategy = self.init_validation_strategy(data_inst, validate_data) self.callback_meta("loss", "train", MetricMeta(name="train", metric_type="LOSS", extra_metas={"unit_name": "iters"})) # compute intermediate result of first epoch self.phi, self.phi_product, self.overlap_ua, self.send_components = self.batch_compute_components(data_loader) for epoch_idx in range(self.epochs): LOGGER.debug('fitting epoch {}'.format(epoch_idx)) host_components = self.exchange_components(self.send_components, epoch_idx=epoch_idx) loss = None for local_round_idx in range(self.local_round): if self.comm_eff: LOGGER.debug('running local iter {}'.format(local_round_idx)) grads = self.compute_backward_gradients(host_components, data_loader, epoch_idx=epoch_idx, local_round=local_round_idx) self.update_nn_weights(grads, data_loader, epoch_idx, decay=self.comm_eff) if local_round_idx == 0: loss = self.compute_loss(host_components, epoch_idx, len(data_loader.get_overlap_indexes())) if local_round_idx + 1 != self.local_round: self.phi, self.overlap_ua = self.compute_phi_and_overlap_ua(data_loader) self.callback_metric("loss", "train", [Metric(epoch_idx, loss)]) self.history_loss.append(loss) # updating variables for next epochs if epoch_idx + 1 == self.epochs: # only need to update phi in last epochs self.phi, _ = self.compute_phi_and_overlap_ua(data_loader) else: # compute phi, phi_product, overlap_ua etc. for next epoch self.phi, self.phi_product, self.overlap_ua, self.send_components = self.batch_compute_components( data_loader) # check early_stopping_rounds if self.validation_strategy is not None: self.validation_strategy.validate(self, epoch_idx) if self.validation_strategy.need_stop(): LOGGER.debug('early stopping triggered') break # check n_iter_no_change if self.n_iter_no_change is True: if self.check_convergence(loss): self.sync_stop_flag(epoch_idx, stop_flag=True) break else: self.sync_stop_flag(epoch_idx, stop_flag=False) LOGGER.debug('fitting epoch {} done, loss is {}'.format(epoch_idx, loss)) self.callback_meta("loss", "train", MetricMeta(name="train", metric_type="LOSS", extra_metas={"Best": min(self.history_loss)})) self.set_summary(self.generate_summary()) LOGGER.debug('fitting ftl model done') def predict(self, data_inst): LOGGER.debug('guest start to predict') data_loader_key = self.get_dataset_key(data_inst) data_inst_ = data_overview.header_alignment(data_inst, self.store_header) if data_loader_key in self.cache_dataloader: data_loader = self.cache_dataloader[data_loader_key] else: data_loader, _, _, _ = self.prepare_data(self.init_intersect_obj(), data_inst_, guest_side=True) self.cache_dataloader[data_loader_key] = data_loader LOGGER.debug('try to get predict u from host, suffix is {}'.format((0, 'host_u'))) host_predicts = self.transfer_variable.predict_host_u.get(idx=0, suffix=(0, 'host_u')) predict_score = np.matmul(host_predicts, self.phi.transpose()) predicts = self.sigmoid(predict_score) # convert to predict scores predicts = list(map(float, predicts)) predict_tb = session.parallelize(zip(data_loader.get_overlap_keys(), predicts,), include_key=True, partition=data_inst.partitions) threshold = self.predict_param.threshold predict_result = self.predict_score_to_output(data_inst_, predict_tb, classes=[0, 1], threshold=threshold) LOGGER.debug('ftl guest prediction done') return predict_result def export_model(self): model_param = self.get_model_param() model_param.phi_a.extend(self.phi.tolist()[0]) return {"FTLGuestMeta": self.get_model_meta(), "FTLHostParam": model_param} def load_model(self, model_dict): model_param = None model_meta = None for _, value in model_dict["model"].items(): for model in value: if model.endswith("Meta"): model_meta = value[model] if model.endswith("Param"): model_param = value[model] LOGGER.info("load model") self.set_model_meta(model_meta) self.set_model_param(model_param) self.phi = np.array([model_param.phi_a])
def test_tensor_op(self): arr1 = np.ones((10, 1, 3)) arr1[0] = np.array([[2, 3, 4]]) arr2 = np.ones((10, 3, 3)) arr3 = np.ones([1, 1, 3]) arr4 = np.ones([50, 1]) arr5 = np.ones([32]) pt = PaillierTensor(ori_data=arr1) pt2 = PaillierTensor(ori_data=arr2) pt3 = PaillierTensor(ori_data=arr3) pt4 = PaillierTensor(ori_data=arr4) pt5 = PaillierTensor(ori_data=arr5) encrypter = PaillierEncrypt() encrypter.generate_key(EncryptParam().key_length) encrypted_calculator = EncryptModeCalculator( encrypter, EncryptedModeCalculatorParam().mode, EncryptedModeCalculatorParam().re_encrypted_rate) rs1 = pt * arr2 rs2 = pt * pt2 rs3 = pt.matmul_3d(pt2) enpt = pt2.encrypt(encrypted_calculator) enrs = enpt.matmul_3d(arr1, multiply='right') rng_generator = random_number_generator.RandomNumberGenerator() enpt2 = pt4.encrypt(encrypted_calculator) random_num = rng_generator.generate_random_number(enpt2.shape)
class HostDenseModel(DenseModel): def __init__(self): super(HostDenseModel, self).__init__() self.role = "host" def select_backward_sample(self, selective_ids): cached_shape = self.input_cached.shape[0] offsets = [i + cached_shape for i in range(len(selective_ids))] id_map = dict(zip(selective_ids, offsets)) if cached_shape == 0: self.input_cached = (self.input.get_obj().filter( lambda k, v: k in id_map).map(lambda k, v: (id_map[k], v))) self.input_cached = PaillierTensor(tb_obj=self.input_cached) # selective_ids_tb = session.parallelize(zip(selective_ids, range(len(selective_ids))), include_key=True, # partition=self.input.partitions) # self.input_cached = self.input.get_obj().join(selective_ids_tb, lambda v1, v2: (v1, v2)) # self.input_cached = PaillierTensor(tb_obj=self.input_cached.map(lambda k, v: (v[1], v[0]))) self.activation_cached = self.activation_input[selective_ids] else: # selective_ids_tb = session.parallelize(zip(selective_ids, range(len(selective_ids))), include_key=True, # partition=self.input.partitions) # selective_input = self.input.get_obj().join(selective_ids_tb, lambda v1, v2: (v1, v2)) # pre_count = self.input_cached.shape[0] # selective_input = selective_input.map(lambda k, v: (v[1] + pre_count, v[0])) selective_input = (self.input.get_obj().filter( lambda k, v: k in id_map).map(lambda k, v: (id_map[k], v))) self.input_cached = PaillierTensor( tb_obj=self.input_cached.get_obj().union(selective_input)) self.activation_cached = np.vstack( (self.activation_cached, self.activation_input[selective_ids])) def forward_dense(self, x, encoder=None): self.input = x if encoder is not None: output = x * encoder.encode(self.model_weight) else: output = x * self.model_weight if self.bias is not None: if encoder is not None: output += encoder.encode(self.bias) else: output += self.bias return output def get_input_gradient(self, delta, acc_noise, encoder=None): if not encoder: error = delta * self.model_weight.T + delta * acc_noise.T else: error = delta.encode(encoder) * (self.model_weight + acc_noise).T return error def get_weight_gradient(self, delta, encoder=None): # delta_w = self.input.fast_matmul_2d(delta) / self.input.shape[0] if self.do_backward_selective_strategy: self.input = self.input_cached.filter( lambda k, v: k < self.batch_size) self.input_cached = self.input_cached.filter( lambda k, v: k >= self.batch_size).map( lambda kv: (kv[0] - self.batch_size, kv[1])) # self.input_cached = self.input_cached.subtractByKey(self.input).map(lambda kv: (kv[0] - self.batch_size, kv[1])) if encoder: delta_w = self.input.fast_matmul_2d(encoder.encode(delta)) else: delta_w = self.input.fast_matmul_2d(delta) delta_w /= self.input.shape[0] return delta_w def update_weight(self, delta): self.model_weight -= delta * self.lr def update_bias(self, delta): self.bias -= np.mean(delta, axis=0) * self.lr