def compute_half_d(self, data_instances, w, cipher, batch_index, current_suffix): if self.use_sample_weight: self.half_d = data_instances.mapValues(lambda v: (vec_dot( v.features, w.coef_) + w.intercept_ - v.label) * v.weight) else: self.half_d = data_instances.mapValues(lambda v: vec_dot( v.features, w.coef_) + w.intercept_ - v.label) return self.half_d
def compute_forwards(self, data_instances, model_weights): if self.use_sample_weight: wx = data_instances.mapValues( lambda v: (vec_dot(v.features, model_weights.coef_) + model_weights.intercept_) * v.weight) else: wx = data_instances.mapValues(lambda v: vec_dot( v.features, model_weights.coef_) + model_weights.intercept_) return wx
def compute_half_g(self, data_instances, w, cipher, batch_index): if self.use_sample_weight: half_g = data_instances.mapValues(lambda v: (vec_dot( v.features, w.coef_) + w.intercept_) * v.weight) else: half_g = data_instances.mapValues( lambda v: vec_dot(v.features, w.coef_) + w.intercept_) encrypt_half_g = cipher[batch_index].encrypt(half_g) return half_g, encrypt_half_g
def compute_forwards(self, data_instances, model_weights): """ forwards = 1/4 * wx """ # wx = data_instances.mapValues(lambda v: vec_dot(v.features, model_weights.coef_) + model_weights.intercept_) if self.use_sample_weight: self.forwards = data_instances.mapValues(lambda v: 0.25 * vec_dot( v.features, model_weights.coef_) * v.weight) else: self.forwards = data_instances.mapValues( lambda v: 0.25 * vec_dot(v.features, model_weights.coef_)) return self.forwards
def compute_half_d(self, data_instances, w, cipher, batch_index, current_suffix): if self.use_sample_weight: self.half_d = data_instances.mapValues( lambda v: 0.25 * (vec_dot(v.features, w.coef_) + w.intercept_ ) * v.weight - 0.5 * v.label * v.weight) else: self.half_d = data_instances.mapValues(lambda v: 0.25 * (vec_dot( v.features, w.coef_) + w.intercept_) - 0.5 * v.label) # encrypted_half_d = cipher[batch_index].encrypt(self.half_d) # self.fore_gradient_transfer.remote(encrypted_half_d, suffix=current_suffix) return self.half_d
def compute_loss(self, data_instances, w, n_iter_, batch_index, loss_norm=None): """ Compute hetero-lr loss for: loss = (1/N)*∑(log2 - 1/2*ywx + 1/8*(wx)^2), where y is label, w is model weight and x is features where (wx)^2 = (Wg * Xg + Wh * Xh)^2 = (Wg*Xg)^2 + (Wh*Xh)^2 + 2 * Wg*Xg * Wh*Xh Then loss = log2 - (1/N)*0.5*∑ywx + (1/N)*0.125*[∑(Wg*Xg)^2 + ∑(Wh*Xh)^2 + 2 * ∑(Wg*Xg * Wh*Xh)] where Wh*Xh is a table obtain from host and ∑(Wh*Xh)^2 is a sum number get from host. """ current_suffix = (n_iter_, batch_index) n = data_instances.count() quarter_wx = self.host_forwards[0].join(self.half_d, lambda x, y: x + y) ywx = quarter_wx.join(data_instances, lambda wx, d: wx * (4 * d.label) + 2).reduce(reduce_add) # self_wx_square = self.forwards.mapValues(lambda x: np.square(x)).reduce(reduce_add) self_wx_square = data_instances.mapValues(lambda v: np.square( vec_dot(v.features, w.coef_) + w.intercept_)).reduce(reduce_add) half_wx = data_instances.mapValues( lambda v: vec_dot(v.features, w.coef_) + w.intercept_) loss_list = [] wx_squares = self.get_host_loss_intermediate(suffix=current_suffix) if loss_norm is not None: host_loss_regular = self.get_host_loss_regular( suffix=current_suffix) else: host_loss_regular = [] # for host_idx, host_forward in enumerate(self.host_forwards): if len(self.host_forwards) > 1: LOGGER.info("More than one host exist, loss is not available") else: host_forward = self.host_forwards[0] wx_square = wx_squares[0] wxg_wxh = half_wx.join( host_forward, lambda wxg, wxh: wxg * wxh).reduce(reduce_add) loss = np.log(2) - 0.5 * (1 / n) * ywx + 0.125 * (1 / n) * \ (self_wx_square + wx_square + 2 * wxg_wxh) if loss_norm is not None: loss += loss_norm loss += host_loss_regular[0] loss_list.append(loss) LOGGER.debug("In compute_loss, loss list are: {}".format(loss_list)) self.sync_loss_info(loss_list, suffix=current_suffix)
def compute_mu(self, data_instances, coef_, intercept_=0, exposure=None): if exposure is None: mu = data_instances.mapValues( lambda v: np.exp(vec_dot(v.features, coef_) + intercept_)) else: offset = exposure.mapValues( lambda v: BasePoissonRegression.safe_log(v)) mu = data_instances.join( offset, lambda v, m: np.exp( vec_dot(v.features, coef_) + intercept_ + m)) return mu
def predict(self, data_instances): self._abnormal_detection(data_instances) self.init_schema(data_instances) data_instances = self.align_data_header(data_instances, self.header) LOGGER.info("Start predict is a one_vs_rest task: {}".format( self.need_one_vs_rest)) if self.need_one_vs_rest: predict_result = self.one_vs_rest_obj.predict(data_instances) return predict_result # predict_wx = self.compute_wx(data_instances, self.model_weights.coef_, self.model_weights.intercept_) pred_prob = data_instances.mapValues(lambda v: activation.sigmoid( vec_dot(v.features, self.model_weights.coef_) + self.model_weights. intercept_)) predict_result = self.predict_score_to_output( data_instances, pred_prob, classes=[0, 1], threshold=self.model_param.predict_param.threshold) return predict_result
def compute_and_aggregate_forwards(self, data_instances, model_weights, encrypted_calculator, batch_index, offset=None): """ gradient = (1/N)*∑(1/2*ywx-1)*1/2yx = (1/N)*∑(0.25 * wx - 0.5 * y) * x, where y = 1 or -1 Define wx as guest_forward or host_forward Define (0.25 * wx - 0.5 * y) as fore_gradient """ half_wx = data_instances.mapValues(lambda v: vec_dot( v.features, model_weights.coef_) + model_weights.intercept_) self.forwards = half_wx # LOGGER.debug("half_wx: {}".format(half_wx.take(20))) self.aggregated_forwards = encrypted_calculator[batch_index].encrypt( half_wx) for host_forward in self.host_forwards: self.aggregated_forwards = self.aggregated_forwards.join( host_forward, lambda g, h: g + h) fore_gradient = self.aggregated_forwards.join( data_instances, lambda wx, d: 0.25 * wx - 0.5 * d.label) return fore_gradient
def compute_forwards(self, data_instances, model_weights): """ forwards = wx """ wx = data_instances.mapValues(lambda v: vec_dot( v.features, model_weights.coef_) + model_weights.intercept_) return wx
def compute_and_aggregate_forwards(self, data_instances, model_weights, encrypted_calculator, batch_index, current_suffix, offset=None): ''' Compute gradients: gradient = (1/N) * \sum(exp(wx) - y) * x Define exp(wx) as mu, named it as guest_forward or host_forward Define (mu-y) as fore_gradient Then, gradient = fore_gradient * x ''' if offset is None: raise ValueError( "Offset should be provided when compute poisson forwards") mu = data_instances.join( offset, lambda d, m: np.exp( vec_dot(d.features, model_weights.coef_) + model_weights. intercept_ + m)) self.forwards = mu self.host_forwards = self.get_host_forward(suffix=current_suffix) self.aggregated_forwards = self.forwards.join(self.host_forwards[0], lambda g, h: g * h) fore_gradient = self.aggregated_forwards.join( data_instances, lambda mu, d: mu - d.label) return fore_gradient
def compute_loss(self, data_instances, model_weights, encrypted_calculator, optimizer, n_iter_, batch_index, cipher_operator): ''' Compute hetero poisson loss: h_loss = sum(exp(mu_h)) Parameters: ___________ data_instances: DTable, input data model_weights: model weight object, stores intercept_ and coef_ encrypted_calculator: ecnrypted calculator object optimizer: optimizer object n_iter_: int, current number of iter. batch_index: int, use to obtain current encrypted_calculator index cipher_operator: cipher for encrypt intermediate loss and loss_regular ''' current_suffix = (n_iter_, batch_index) self_wx = data_instances.mapValues(lambda v: vec_dot( v.features, model_weights.coef_) + model_weights.intercept_) en_wx = encrypted_calculator[batch_index].encrypt(self_wx) self.remote_loss_intermediate(en_wx, suffix=current_suffix) loss_regular = optimizer.loss_norm(model_weights) if loss_regular is not None: en_loss_regular = cipher_operator.encrypt(loss_regular) self.remote_loss_regular(en_loss_regular, suffix=current_suffix)
def compute_loss(self, data_instances, model_weights, n_iter_, batch_index, offset, loss_norm=None): ''' Compute hetero poisson loss: loss = sum(exp(mu_g)*exp(mu_h) - y(wx_g + wx_h) + log(exposure)) Parameters: ___________ data_instances: DTable, input data model_weights: model weight object, stores intercept_ and coef_ n_iter_: int, current number of iter. batch_index: int, use to obtain current encrypted_calculator index offset: log(exposure) loss_norm: penalty term, default to None ''' current_suffix = (n_iter_, batch_index) n = data_instances.count() guest_wx_y = data_instances.join( offset, lambda v, m: (vec_dot(v.features, model_weights.coef_) + model_weights.intercept_ + m, v.label)) loss_list = [] host_wxs = self.get_host_loss_intermediate(current_suffix) if loss_norm is not None: host_loss_regular = self.get_host_loss_regular( suffix=current_suffix) else: host_loss_regular = [] if len(self.host_forwards) > 1: raise ValueError( "More than one host exists. Poisson regression does not support multi-host." ) host_mu = self.host_forwards[0] host_wx = host_wxs[0] loss_wx = guest_wx_y.join(host_wx, lambda g, h: g[1] * (g[0] + h)).reduce(reduce_add) loss_mu = self.forwards.join(host_mu, lambda g, h: g * h).reduce(reduce_add) loss = (loss_mu - loss_wx) / n if loss_norm is not None: loss = loss + loss_norm + host_loss_regular[0] loss_list.append(loss) self.sync_loss_info(loss_list, suffix=current_suffix)
def compute_sqn_forwards(self, data_instances, delta_s, cipher_operator): """ To compute Hessian matrix, y, s are needed. g = (1/N)*∑(0.25 * wx - 0.5 * y) * x y = ∇2^F(w_t)s_t = g' * s = (1/N)*∑(0.25 * x * s) * x define forward_hess = ∑(0.25 * x * s) """ sqn_forwards = data_instances.mapValues( lambda v: cipher_operator.encrypt(fate_operator.vec_dot(v.features, delta_s.coef_) + delta_s.intercept_)) # forward_sum = sqn_forwards.reduce(reduce_add) return sqn_forwards
def predict(self, data_instances): self._abnormal_detection(data_instances) self.init_schema(data_instances) data_instances = self.align_data_header(data_instances, self.header) # predict_wx = self.compute_wx(data_instances, self.model_weights.coef_, self.model_weights.intercept_) pred_prob = data_instances.mapValues(lambda v: activation.sigmoid(vec_dot(v.features, self.model_weights.coef_) + self.model_weights.intercept_)) predict_result = self.predict_score_to_output(data_instances, pred_prob, classes=[0, 1], threshold=self.model_param.predict_param.threshold) return predict_result
def predict(self, data_instances): LOGGER.info(f'Start predict task') self._abnormal_detection(data_instances) self.init_schema(data_instances) data_instances = self.align_data_header(data_instances, self.header) suffix = ('predict', ) if self.component_properties.has_arbiter: pubkey = self.cipher.gen_paillier_pubkey(enable=self.use_encrypt, suffix=suffix) else: if self.use_encrypt: raise ValueError(f"In use_encrypt case, arbiter should be set") pubkey = None if self.use_encrypt: self.cipher_operator.set_public_key(pubkey) final_model = self.transfer_variable.aggregated_model.get( idx=0, suffix=suffix) model_weights = LogisticRegressionWeights(final_model.unboxed, self.fit_intercept) wx = self.compute_wx(data_instances, model_weights.coef_, model_weights.intercept_) self.transfer_variable.predict_wx.remote(wx, consts.ARBITER, 0, suffix=suffix) predict_result = self.transfer_variable.predict_result.get( idx=0, suffix=suffix) # predict_result = predict_result.join(data_instances, lambda p, d: [d.label, p, None, # {"0": None, "1": None}]) predict_result = predict_result.join( data_instances, lambda p, d: Instance( features=[d.label, p, None, { "0": None, "1": None }], inst_id=d.inst_id)) else: pred_prob = data_instances.mapValues(lambda v: activation.sigmoid( vec_dot(v.features, self.model_weights.coef_) + self. model_weights.intercept_)) predict_result = self.predict_score_to_output( data_instances, pred_prob, classes=[0, 1], threshold=self.model_param.predict_param.threshold) return predict_result
def compute_forward_hess(self, data_instances, delta_s, host_forwards): """ To compute Hessian matrix, y, s are needed. g = (1/N)*∑(0.25 * wx - 0.5 * y) * x y = ∇2^F(w_t)s_t = g' * s = (1/N)*∑(0.25 * x * s) * x define forward_hess = (1/N)*∑(0.25 * x * s) """ forwards = data_instances.mapValues(lambda v: (vec_dot( v.features, delta_s.coef_) + delta_s.intercept_) * 0.25) for host_forward in host_forwards: forwards = forwards.join(host_forward, lambda g, h: g + (h * 0.25)) # forward_hess = forwards.mapValues(lambda x: 0.25 * x / sample_size) hess_vector = self.compute_gradient(data_instances, forwards, delta_s.fit_intercept) return forwards, np.array(hess_vector)
def compute_forward_hess(self, data_instances, delta_s, host_forwards): """ To compute Hessian matrix, y, s are needed. g = (1/N)*∑(wx - y) * x y = ∇2^F(w_t)s_t = g' * s = (1/N)*∑(x * s) * x define forward_hess = (1/N)*∑(x * s) """ forwards = data_instances.mapValues(lambda v: (vec_dot( v.features, delta_s.coef_) + delta_s.intercept_)) for host_forward in host_forwards: forwards = forwards.join(host_forward, lambda g, h: g + h) if self.use_sample_weight: forwards = forwards.join(data_instances, lambda h, d: h * d.weight) hess_vector = self.compute_gradient(data_instances, forwards, delta_s.fit_intercept) return forwards, np.array(hess_vector)
def compute_and_aggregate_forwards(self, data_instances, model_weights, encrypted_calculator, batch_index, current_suffix, offset=None): """ Compute gradients: gradient = (1/N)*\sum(wx -y)*x Define wx as guest_forward or host_forward Define (wx-y) as fore_gradient Parameters ---------- data_instances: DTable of Instance, input data model_weights: LinearRegressionWeights Stores coef_ and intercept_ of model encrypted_calculator: Use for different encrypted methods offset: Used in Poisson only. batch_index: int, use to obtain current encrypted_calculator index: current_suffix: tuple or string. Used in transfer_variable """ wx = data_instances.mapValues(lambda v: vec_dot( v.features, model_weights.coef_) + model_weights.intercept_) self.forwards = wx self.aggregated_forwards = encrypted_calculator[batch_index].encrypt( wx) self.host_forwards = self.get_host_forward(suffix=current_suffix) for host_forward in self.host_forwards: self.aggregated_forwards = self.aggregated_forwards.join( host_forward, lambda g, h: g + h) fore_gradient = self.aggregated_forwards.join( data_instances, lambda wx, d: wx - d.label) return fore_gradient
def compute_wx(self, data_instances, coef_, intercept_=0): return data_instances.mapValues( lambda v: vec_dot(v.features, coef_) + intercept_)
def _vec_dot(v, coef, intercept): return fate_operator.vec_dot(v.features, coef) + intercept
def compute_half_g(self, data_instances, w, cipher, batch_index): half_g = data_instances.mapValues( lambda v: vec_dot(v.features, w.coef_) * 0.25 + w.intercept_) encrypt_half_g = cipher[batch_index].encrypt(half_g) return half_g, encrypt_half_g
def compute_loss(self, data_instances, w, n_iter_, batch_index, loss_norm=None, batch_masked=False): """ Compute hetero-lr loss for: loss = (1/N)*∑(log2 - 1/2*ywx + 1/8*(wx)^2), where y is label, w is model weight and x is features where (wx)^2 = (Wg * Xg + Wh * Xh)^2 = (Wg*Xg)^2 + (Wh*Xh)^2 + 2 * Wg*Xg * Wh*Xh Then loss = log2 - (1/N)*0.5*∑ywx + (1/N)*0.125*[∑(Wg*Xg)^2 + ∑(Wh*Xh)^2 + 2 * ∑(Wg*Xg * Wh*Xh)] where Wh*Xh is a table obtain from host and ∑(Wh*Xh)^2 is a sum number get from host. """ current_suffix = (n_iter_, batch_index) n = data_instances.count() # host_wx_y = self.host_forwards[0].join(data_instances, lambda x, y: (x, y.label)) host_wx_y = data_instances.join(self.host_forwards[0], lambda y, x: (x, y.label)) self_wx_y = self.half_d.join(data_instances, lambda x, y: (x, y.label)) def _sum_ywx(wx_y): sum1, sum2 = 0, 0 for _, (x, y) in wx_y: if y == 1: sum1 += x else: sum2 -= x return sum1 + sum2 ywx = host_wx_y.applyPartitions(_sum_ywx).reduce(reduce_add) + \ self_wx_y.applyPartitions(_sum_ywx).reduce(reduce_add) ywx = ywx * 4 + 2 * n # quarter_wx = self.host_forwards[0].join(self.half_d, lambda x, y: x + y) # ywx = quarter_wx.join(data_instances, lambda wx, d: wx * (4 * d.label) + 2).reduce(reduce_add) half_wx = data_instances.mapValues( lambda v: vec_dot(v.features, w.coef_) + w.intercept_) self_wx_square = half_wx.mapValues(lambda v: np.square(v)).reduce( reduce_add) # self_wx_square = data_instances.mapValues( # lambda v: np.square(vec_dot(v.features, w.coef_) + w.intercept_)).reduce(reduce_add) loss_list = [] wx_squares = self.get_host_loss_intermediate(suffix=current_suffix) if batch_masked: wx_squares_sum = [] for square_table in wx_squares: square_sum = data_instances.join( square_table, lambda inst, enc_h_squares: enc_h_squares).reduce( lambda x, y: x + y) wx_squares_sum.append(square_sum) wx_squares = wx_squares_sum if loss_norm is not None: host_loss_regular = self.get_host_loss_regular( suffix=current_suffix) else: host_loss_regular = [] # for host_idx, host_forward in enumerate(self.host_forwards): if len(self.host_forwards) > 1: LOGGER.info("More than one host exist, loss is not available") else: host_forward = self.host_forwards[0] wx_square = wx_squares[0] wxg_wxh = half_wx.join( host_forward, lambda wxg, wxh: wxg * wxh).reduce(reduce_add) loss = np.log(2) - 0.5 * (1 / n) * ywx + 0.125 * (1 / n) * \ (self_wx_square + wx_square + 8 * wxg_wxh) if loss_norm is not None: loss += loss_norm loss += host_loss_regular[0] loss_list.append(loss) LOGGER.debug("In compute_loss, loss list are: {}".format(loss_list)) self.sync_loss_info(loss_list, suffix=current_suffix)
def compute_forwards(self, data_instances, model_weights): mu = data_instances.mapValues(lambda v: np.exp( vec_dot(v.features, model_weights.coef_) + model_weights.intercept_ )) return mu