def fit(self, data_instance): # local data = self._select_columns(data_instance) n, normed = self._standardized(data) self.local_corr = table_dot(normed, normed) self.local_corr /= n if self.model_param.cross_parties: with SPDZ("pearson", local_party=self.local_party, all_parties=self.parties, use_mix_rand=self.model_param.use_mix_rand) as spdz: source = [normed, self.other_party] if self.local_party.role == "guest": x, y = FixedPointTensor.from_source( "x", source[0]), FixedPointTensor.from_source( "y", source[1]) else: y, x = FixedPointTensor.from_source( "y", source[0]), FixedPointTensor.from_source( "x", source[1]) m1 = len(x.value.first()[1]) m2 = len(y.value.first()[1]) self.shapes.append(m1) self.shapes.append(m2) self.corr = spdz.dot(x, y, "corr").get() / n else: self.shapes.append(self.local_corr.shape[0]) self.parties = [self.local_party] self._callback()
def create_and_get(job_id, idx, data): _, all_parties = session_init(job_id, idx) with SPDZ(): if idx == 0: x = FixedPointTensor.from_source("x", data) else: x = FixedPointTensor.from_source("x", all_parties[0]) return x.get()
def mul_plaintext(job_id, idx, data_list): _, all_parties = session_init(job_id, idx) with SPDZ(): if idx == 0: x = FixedPointTensor.from_source("x", data_list[0]) else: x = FixedPointTensor.from_source("x", all_parties[0]) y = data_list[1] return (x * y).get(), (y * x).get()
def broadcast_reconstruct_share(self, tensor_name=None): from federatedml.secureprotol.spdz import SPDZ spdz = SPDZ.get_instance() share_val = self.value.copy() name = tensor_name or self.tensor_name if name is None: raise ValueError("name not specified") # remote share to other parties spdz.communicator.broadcast_rescontruct_share(share_val, name) return share_val
def einsum(job_id, idx, einsum_expr, data_list): _, all_parties = session_init(job_id, idx) with SPDZ(): if idx == 0: x = FixedPointTensor.from_source("x", data_list[0]) y = FixedPointTensor.from_source("y", all_parties[1]) else: x = FixedPointTensor.from_source("x", all_parties[0]) y = FixedPointTensor.from_source("y", data_list[1]) return x.einsum(y, einsum_expr).get()
def mat_mul(job_id, idx, data_list): _, all_parties = session_init(job_id, idx) with SPDZ(): if idx == 0: x = FixedPointTensor.from_source("x", data_list[0]) y = FixedPointTensor.from_source("y", all_parties[1]) else: x = FixedPointTensor.from_source("x", all_parties[0]) y = FixedPointTensor.from_source("y", data_list[1]) return (x @ y).get()
def add_and_sub(job_id, idx, data_list): _, all_parties = session_init(job_id, idx) with SPDZ(): if idx == 0: x = FixedPointTensor.from_source("x", data_list[0]) y = FixedPointTensor.from_source("y", all_parties[1]) else: x = FixedPointTensor.from_source("x", all_parties[0]) y = FixedPointTensor.from_source("y", data_list[1]) a = (x + y).get() b = (x - y).get() return a, b
def add_and_sub_plaintext(job_id, idx, data_list): _, all_parties = session_init(job_id, idx) with SPDZ(): if idx == 0: x = FixedPointTensor.from_source("x", data_list[0]) else: x = FixedPointTensor.from_source("x", all_parties[0]) y = data_list[1] a = (x + y).get() a1 = (y + x).get() b = (x - y).get() b1 = (y - x).get() return a, a1, b, b1
def rescontruct(self, tensor_name=None): from federatedml.secureprotol.spdz import SPDZ spdz = SPDZ.get_instance() share_val = self.value name = tensor_name or self.tensor_name if name is None: raise ValueError("name not specified") # remote share to other parties spdz.communicator.broadcast_rescontruct_share(share_val, name) # get shares from other parties for other_share in spdz.communicator.get_rescontruct_shares(name): share_val = _table_binary_op(share_val, other_share, self.q_field, operator.add) return share_val
def fit(self, data_instance): # local data = self._select_columns(data_instance) n, normed = self._standardized(data) self.local_corr = table_dot(normed, normed) self.local_corr /= n if self.model_param.calc_local_vif: self.local_vif = self._vif_from_pearson_matrix(self.local_corr) self._summary["local_corr"] = self.local_corr.tolist() self._summary["num_local_features"] = n if self.model_param.cross_parties: with SPDZ( "pearson", local_party=self.local_party, all_parties=self.parties, use_mix_rand=self.model_param.use_mix_rand, ) as spdz: source = [normed, self.other_party] if self.local_party.role == "guest": x, y = ( FixedPointTensor.from_source("x", source[0]), FixedPointTensor.from_source("y", source[1]), ) else: y, x = ( FixedPointTensor.from_source("y", source[0]), FixedPointTensor.from_source("x", source[1]), ) m1 = len(x.value.first()[1]) m2 = len(y.value.first()[1]) self.shapes.append(m1) self.shapes.append(m2) self.corr = spdz.dot(x, y, "corr").get() / n self._summary["corr"] = self.corr.tolist() self._summary["num_remote_features"] = ( m2 if self.local_party.role == "guest" else m1) else: self.shapes.append(self.local_corr.shape[0]) self.parties = [self.local_party] self._callback() self.set_summary(self._summary)
def fit(self, data_instance): data = self._select_columns(data_instance) n, normed = self._standardized(data) self.local_corr = table_dot(normed, normed) with SPDZ("pearson") as spdz: source = [normed, self._other_party] if self._local_party.role == "guest": x, y = FixedPointTensor.from_source( "x", source[0]), FixedPointTensor.from_source("y", source[1]) else: y, x = FixedPointTensor.from_source( "y", source[0]), FixedPointTensor.from_source("x", source[1]) m1 = len(x.value.first()[1]) m2 = len(y.value.first()[1]) self.shapes.append(m1) self.shapes.append(m2) self.corr = spdz.dot(x, y, "corr").get() / n self.local_corr /= n self._callback()
def reconstruct(self, tensor_name=None, broadcast=True): from federatedml.secureprotol.spdz import SPDZ spdz = SPDZ.get_instance() share_val = self.value.copy() LOGGER.debug(f"share_val: {share_val}") name = tensor_name or self.tensor_name if name is None: raise ValueError("name not specified") # remote share to other parties if broadcast: spdz.communicator.broadcast_rescontruct_share(share_val, name) # get shares from other parties for other_share in spdz.communicator.get_rescontruct_shares(name): # LOGGER.debug(f"share_val: {share_val}, other_share: {other_share}") share_val += other_share try: share_val %= self.q_field return share_val except BaseException: return share_val
def get_spdz(cls): from federatedml.secureprotol.spdz import SPDZ return SPDZ.get_instance()
def fit_binary(self, data_instances, validate_data=None): LOGGER.info("Starting to hetero_sshe_logistic_regression") self.callback_list.on_train_begin(data_instances, validate_data) model_shape = self.get_features_shape(data_instances) instances_count = data_instances.count() if not self.component_properties.is_warm_start: w = self._init_weights(model_shape) self.model_weights = LinearModelWeights( l=w, fit_intercept=self.model_param.init_param.fit_intercept) last_models = copy.deepcopy(self.model_weights) else: last_models = copy.deepcopy(self.model_weights) w = last_models.unboxed self.callback_warm_start_init_iter(self.n_iter_) self.batch_generator.initialize_batch_generator( data_instances, batch_size=self.batch_size) with SPDZ( "sshe_lr", local_party=self.local_party, all_parties=self.parties, q_field=self.q_field, use_mix_rand=self.model_param.use_mix_rand, ) as spdz: spdz.set_flowid(self.flowid) self.secure_matrix_obj.set_flowid(self.flowid) if self.role == consts.GUEST: self.labels = data_instances.mapValues( lambda x: np.array([x.label], dtype=int)) w_self, w_remote = self.share_model(w, suffix="init") last_w_self, last_w_remote = w_self, w_remote LOGGER.debug( f"first_w_self shape: {w_self.shape}, w_remote_shape: {w_remote.shape}" ) batch_data_generator = self.batch_generator.generate_batch_data() self.cipher_tool = [] encoded_batch_data = [] for batch_data in batch_data_generator: if self.fit_intercept: batch_features = batch_data.mapValues(lambda x: np.hstack( (x.features, 1.0))) else: batch_features = batch_data.mapValues(lambda x: x.features) self.batch_num.append(batch_data.count()) encoded_batch_data.append( fixedpoint_table.FixedPointTensor( self.fixedpoint_encoder.encode(batch_features), q_field=self.fixedpoint_encoder.n, endec=self.fixedpoint_encoder)) self.cipher_tool.append( EncryptModeCalculator( self.cipher, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) ) while self.n_iter_ < self.max_iter: self.callback_list.on_epoch_begin(self.n_iter_) LOGGER.info(f"start to n_iter: {self.n_iter_}") loss_list = [] self.optimizer.set_iters(self.n_iter_) if not self.reveal_every_iter: self.self_optimizer.set_iters(self.n_iter_) self.remote_optimizer.set_iters(self.n_iter_) for batch_idx, batch_data in enumerate(encoded_batch_data): current_suffix = (str(self.n_iter_), str(batch_idx)) if self.reveal_every_iter: y = self.forward(weights=self.model_weights, features=batch_data, suffix=current_suffix, cipher=self.cipher_tool[batch_idx]) else: y = self.forward(weights=(w_self, w_remote), features=batch_data, suffix=current_suffix, cipher=self.cipher_tool[batch_idx]) if self.role == consts.GUEST: error = y - self.labels self_g, remote_g = self.backward( error=error, features=batch_data, suffix=current_suffix, cipher=self.cipher_tool[batch_idx]) else: self_g, remote_g = self.backward( error=y, features=batch_data, suffix=current_suffix, cipher=self.cipher_tool[batch_idx]) # loss computing; suffix = ("loss", ) + current_suffix if self.reveal_every_iter: batch_loss = self.compute_loss( weights=self.model_weights, suffix=suffix, cipher=self.cipher_tool[batch_idx]) else: batch_loss = self.compute_loss( weights=(w_self, w_remote), suffix=suffix, cipher=self.cipher_tool[batch_idx]) if batch_loss is not None: batch_loss = batch_loss * self.batch_num[batch_idx] loss_list.append(batch_loss) if self.reveal_every_iter: # LOGGER.debug(f"before reveal: self_g shape: {self_g.shape}, remote_g_shape: {remote_g}," # f"self_g: {self_g}") new_g = self.reveal_models(self_g, remote_g, suffix=current_suffix) # LOGGER.debug(f"after reveal: new_g shape: {new_g.shape}, new_g: {new_g}" # f"self.model_param.reveal_strategy: {self.model_param.reveal_strategy}") if new_g is not None: self.model_weights = self.optimizer.update_model( self.model_weights, new_g, has_applied=False) else: self.model_weights = LinearModelWeights( l=np.zeros(self_g.shape), fit_intercept=self.model_param.init_param. fit_intercept) else: if self.optimizer.penalty == consts.L2_PENALTY: self_g = self_g + self.self_optimizer.alpha * w_self remote_g = remote_g + self.remote_optimizer.alpha * w_remote # LOGGER.debug(f"before optimizer: {self_g}, {remote_g}") self_g = self.self_optimizer.apply_gradients(self_g) remote_g = self.remote_optimizer.apply_gradients( remote_g) # LOGGER.debug(f"after optimizer: {self_g}, {remote_g}") w_self -= self_g w_remote -= remote_g LOGGER.debug( f"w_self shape: {w_self.shape}, w_remote_shape: {w_remote.shape}" ) if self.role == consts.GUEST: loss = np.sum(loss_list) / instances_count self.loss_history.append(loss) if self.need_call_back_loss: self.callback_loss(self.n_iter_, loss) else: loss = None if self.converge_func_name in ["diff", "abs"]: self.is_converged = self.check_converge_by_loss( loss, suffix=(str(self.n_iter_), )) elif self.converge_func_name == "weight_diff": if self.reveal_every_iter: self.is_converged = self.check_converge_by_weights( last_w=last_models.unboxed, new_w=self.model_weights.unboxed, suffix=(str(self.n_iter_), )) last_models = copy.deepcopy(self.model_weights) else: self.is_converged = self.check_converge_by_weights( last_w=(last_w_self, last_w_remote), new_w=(w_self, w_remote), suffix=(str(self.n_iter_), )) last_w_self, last_w_remote = copy.deepcopy( w_self), copy.deepcopy(w_remote) else: raise ValueError( f"Cannot recognize early_stop function: {self.converge_func_name}" ) LOGGER.info("iter: {}, is_converged: {}".format( self.n_iter_, self.is_converged)) self.callback_list.on_epoch_end(self.n_iter_) self.n_iter_ += 1 if self.stop_training: break if self.is_converged: break # Finally reconstruct if not self.reveal_every_iter: new_w = self.reveal_models(w_self, w_remote, suffix=("final", )) if new_w is not None: self.model_weights = LinearModelWeights( l=new_w, fit_intercept=self.model_param.init_param.fit_intercept ) LOGGER.debug(f"loss_history: {self.loss_history}") self.set_summary(self.get_model_summary())
def _test_spdz(self): table_list = [] table_int_data_x, table_float_data_x = None, None table_int_data_y, table_float_data_y = None, None if self.local_party.role == "guest": table_int_data_x = session.parallelize( self.int_data_x, include_key=False, partition=self.data_partition) table_int_data_x = table_int_data_x.mapValues( lambda x: np.array([x])) table_float_data_x = session.parallelize( self.float_data_x, include_key=False, partition=self.data_partition) table_float_data_x = table_float_data_x.mapValues( lambda x: np.array([x])) else: table_int_data_y = session.parallelize( self.int_data_y, include_key=False, partition=self.data_partition) table_int_data_y = table_int_data_y.mapValues( lambda y: np.array([y])) table_float_data_y = session.parallelize( self.float_data_y, include_key=False, partition=self.data_partition) table_float_data_y = table_float_data_y.mapValues( lambda y: np.array([y])) for tensor_type in ["numpy", "table"]: table = PrettyTable() table.set_style(ORGMODE) field_name = [ "DataType", "One time consumption", f"{self.data_num} times consumption", "relative acc", "log2 acc", "operations per second" ] self._summary["field_name"] = field_name table.field_names = field_name with SPDZ(local_party=self.local_party, all_parties=self.parties) as spdz: for op_type in self.op_test_list: start_time = time.time() for epoch in range(self.test_round): LOGGER.info( f"test spdz, tensor_type: {tensor_type}, op_type: {op_type}, epoch: {epoch}" ) tag = "_".join([tensor_type, op_type, str(epoch)]) spdz.set_flowid(tag) if self.local_party.role == "guest": if tensor_type == "table": if op_type.startswith("int"): fixed_point_x = TableTensor.from_source( "int_x_" + tag, table_int_data_x) fixed_point_y = TableTensor.from_source( "int_y_" + tag, self.other_party) else: fixed_point_x = TableTensor.from_source( "float_x_" + tag, table_float_data_x) fixed_point_y = TableTensor.from_source( "float_y_" + tag, self.other_party) else: if op_type.startswith("int"): fixed_point_x = NumpyTensor.from_source( "int_x_" + tag, self.int_data_x) fixed_point_y = NumpyTensor.from_source( "int_y_" + tag, self.other_party) else: fixed_point_x = NumpyTensor.from_source( "float_x_" + tag, self.float_data_x) fixed_point_y = NumpyTensor.from_source( "float_y_" + tag, self.other_party) else: if tensor_type == "table": if op_type.startswith("int"): fixed_point_y = TableTensor.from_source( "int_y_" + tag, table_int_data_y) fixed_point_x = TableTensor.from_source( "int_x_" + tag, self.other_party) else: fixed_point_y = TableTensor.from_source( "float_y_" + tag, table_float_data_y) fixed_point_x = TableTensor.from_source( "float_x_" + tag, self.other_party) else: if op_type.startswith("int"): fixed_point_y = NumpyTensor.from_source( "int_y_" + tag, self.int_data_y) fixed_point_x = NumpyTensor.from_source( "int_x_" + tag, self.other_party) else: fixed_point_y = NumpyTensor.from_source( "float_y_" + tag, self.float_data_y) fixed_point_x = NumpyTensor.from_source( "float_x_" + tag, self.other_party) ret = self.calculate_ret(op_type, tensor_type, fixed_point_x, fixed_point_y) total_time = time.time() - start_time self.output_table(op_type, table, tensor_type, total_time, ret) table_list.append(table) self.tracker.log_component_summary(self._summary) for table in table_list: LOGGER.info(table)