def feature_difference(self, y: BinaryFeatureVector, xa: BinaryFeatureVector) -> List: y_array = y.get_csr_matrix() xa_array = xa.get_csr_matrix() C_y = (y_array - xa_array).indices return C_y
def csr_mat_to_instances(csr_mat, labels, binary=False): """ Return a list of instances :param nd_arr: :param labels: :return: """ data = csr_mat.data indices = csr_mat.indices indptr = csr_mat.indptr instance_len, num_features = csr_mat.shape instance_lst = [] for i in range(instance_len): label = labels[i] instance_data = data[indptr[i]:indptr[i + 1]] instance_indices = list(indices[indptr[i]:indptr[i + 1]]) if binary: instance_lst.append( Instance(label, BinaryFeatureVector(num_features, instance_indices))) else: instance_lst.append( Instance( label, RealFeatureVector(num_features, instance_indices, instance_data))) return instance_lst
def _generate_inst(self): """ :return: a properly generated Instance that has feature vector self.x and label self.y """ indices_list = [] for i in range(len(self.x)): if self.x[i] >= 0.5: indices_list.append(i) # Generate new instance self.inst = Instance(self.y, BinaryFeatureVector(len(self.x), indices_list))
def load_dataset(emailData: EmailDataset) -> List[Instance]: """ Conversion from dataset object into a list of instances :param emailData: """ instances = [] num_features = emailData.shape[1] indptr = emailData.features.indptr indices = emailData.features.indices data = emailData.features.data for i in range(0, emailData.num_instances): if emailData.binary: tmp_vector = BinaryFeatureVector(num_features, indices[indptr[i]:indptr[i + 1]].tolist()) else: instance_data = data[indptr[i]:indptr[i + 1]].tolist() tmp_vector = RealFeatureVector(num_features, indices[indptr[i]:indptr[i + 1]].tolist(), instance_data) instances.append(Instance(emailData.labels[i], tmp_vector)) return instances
def nd_arr_to_instances(nd_arr, labels=None, binary=False): """ Return a list of instances :param nd_arr: :param labels: :param binary: :return: """ num_instances = nd_arr.shape[0] if labels is None: labels = nd_arr[:, :1] data = nd_arr[:, 1:] num_features = nd_arr.shape[1] - 1 else: data = nd_arr num_features = nd_arr.shape[1] instance_lst = [] for i in range(num_instances): if binary: mat_indices = [ x for x in range(0, num_features) if data[i][x] != 0 ] instance_lst.append( Instance(labels[i], BinaryFeatureVector(num_instances, mat_indices))) else: mat_indices = [ x for x in range(0, num_features) if data[i][x] != 0 ] mat_data = [ data[i][x] for x in range(0, num_features) if data[0][x] != 0 ] instance_lst.append( Instance( labels[i], RealFeatureVector(num_instances, mat_indices, mat_data))) return instance_lst
def attack(self, instances) -> List[Instance]: """ Performs a data modification attack :param instances: the input instances :return: the attacked instances """ if len(instances) == 0: raise ValueError('Need at least one instance.') self.instances = instances self.return_instances = deepcopy(self.instances) self._calculate_constants() fv_dist = 0.0 theta_dist = np.linalg.norm(self.theta - self.target_theta) iteration = 0 while (iteration == 0 or (theta_dist > self.alpha and iteration < self.max_iter)): print('Iteration: ', iteration, ' - FV distance: ', fv_dist, ' - theta distance: ', theta_dist, sep='') # Gradient descent gradient = self._calc_gradient() if self.verbose: print('\nGRADIENT\n', gradient, '\n', sep='') self.fvs -= (gradient * self.beta) self._project_fvs() # Update variables self._calc_theta() fv_dist = np.linalg.norm(self.fvs - self.old_fvs) theta_dist = np.linalg.norm(self.theta - self.target_theta) self.old_fvs = deepcopy(self.fvs) iteration += 1 print('Iteration: FINAL - FV distance: ', fv_dist, ' - theta distance: ', theta_dist, ' - alpha: ', self.alpha, ' - beta: ', self.beta, sep='') if self.verbose: print('\nTarget Theta:\n', self.target_theta, '\n\nTheta:\n', self.theta, '\n') # Go from floating-point values in [0, 1] to integers in {0, 1} for i in range(len(self.fvs)): indices = [] for j in range(len(self.fvs[i])): if self.fvs[i][j] >= 0.5: indices.append(j) self.return_instances[i].feature_vector = BinaryFeatureVector( self.return_instances[i].get_feature_count(), indices) return self.return_instances
def coordinate_greedy(self, instance: Instance) -> Instance: indices = [i for i in range(0, self.num_features)] x = xk = instance.get_csr_matrix().toarray()[0] # Q = [self.transform_cost(xk,x)] # f = [self.learn_model.model.learner.predict(xk.reshape(1,-1))] # p = [self.learn_model.model.learner.coef_.dot(xk)+ # self.learn_model.model.learner.intercept_] # c = [self.quadratic_cost(xk,x)] no_improve_count = 0 shuffle(indices) for i in indices: xkplus1 = self.minimize_transform(xk, i) oldQ = self.transform_cost(xk, x) newQ = self.transform_cost(xkplus1, x) # step_change = np.log(newQ) / np.log(oldQ) # using difference instead of log ratio for convergence check step_change = newQ - oldQ # print('oldQ= '+str(oldQ) + ' newQ= '+str(newQ)+ # ' step_change= '+str(step_change)) # print('xk[i]= ' + str(xk[i]) + ' xk+1[i]= ' + # str(xkplus1[i]) + ' x[i]= ' + str(x[i])) if step_change >= 0: no_improve_count += 1 if no_improve_count >= self.max_change: break else: xk = xkplus1 # Q.append(self.transform_cost(xk,x)) # f.append( # self.learn_model.model.learner.predict(xk.reshape(1, -1))) # c.append(self.quadratic_cost(xk,x)) # p.append(self.learn_model.model.learner.coef_.dot(xk) + # self.learn_model.model.learner.intercept_) # print('xk shape: '+str(xk.shape)) # Q = np.array(Q) # f = np.array(f) # c = np.array(c) # p = np.array(p).reshape((-1,)) # pnc = p+c # print(p.shape) # print(c.shape) # print(pnc.shape) # t = np.array([i for i in range(len(Q))]) # plt.plot(t,Q,'r', label='Q(x)') # plt.plot(t, f, 'b', label='sign(f(x))') # plt.plot( t,c ,'g', label='||x-xi||^2') # plt.plot(t, p, 'b--',label='w.T*x+b') # plt.plot(t, pnc, 'r--', # label='w.T*x+b + ||x-xi||^2') # plt.legend() # plt.show() # ('mod succeeded') mat_indices = [x for x in range(0, self.num_features) if xk[x] != 0] new_instance = Instance( -1, BinaryFeatureVector(self.num_features, mat_indices)) if self.learn_model.predict( new_instance) == self.learn_model.positive_classification: return instance else: return new_instance