def _non_zero_columns_search(array): """ Given an array return a CArray with non zero column index """ col_num = array.shape[1] non_zero_col = CArray([], dtype=int) for c in range(col_num): col = array[:, c] if col.any() == True: non_zero_col = non_zero_col.append(c) return non_zero_col
def refine_roc(fpr, tpr, th): """Function to ensure the bounds of a ROC. The first and last points should be (0,0) and (1,1) respectively. Parameters ---------- fpr : CArray False Positive Rates, as returned by `.BaseRoc.compute()`. tpr : CArray True Positive Rates, as returned by `.BaseRoc.compute()`. th : CArray Thresholds, as returned by `.BaseRoc.compute()`. """ if tpr[0] != fpr[0] or tpr[0] != 0 or fpr[0] != 0: fpr = CArray(0).append(fpr) tpr = CArray(0).append(tpr) th = CArray(th[0] + 1e-3).append(th) if tpr[-1] != fpr[-1] or tpr[-1] != 1 or fpr[-1] != 1: fpr = fpr.append(1) tpr = tpr.append(1) th = th.append(th[-1] - 1e-3) return fpr, tpr, th
def apply_feasible_manipulations(self, t, x: CArray) -> CArray: """ Apply the padding practical manipulation on the input sample Parameters ---------- t : CArray the vector of manipulations in [0,1] x : CArray the input space sample to perturb Returns ------- CArray: the adversarial malware """ byte_values = (t * 255).astype(np.int) x_adv = x.append(byte_values) return x_adv
def _test_evasion_multiclass(self, expected_x): # EVASION self.multiclass.verbose = 2 if self.normalizer is not None: lb = self.normalizer.feature_range[0] ub = self.normalizer.feature_range[1] else: lb = None ub = None dmax = 2 self.solver_params = {'eta': 1e-1, 'eta_min': 1.0} eva = CAttackEvasionPGDLS(classifier=self.multiclass, surrogate_classifier=self.multiclass, surrogate_data=self.ds, distance='l2', dmax=dmax, lb=lb, ub=ub, solver_params=self.solver_params, y_target=self.y_target) eva.verbose = 0 # 2 # Points from class 2 region # p_idx = 0 # Points from class 1 region # p_idx = 68 # Points from class 3 region p_idx = 1 # Wrong classified point # p_idx = 53 # Evasion goes up usually # Points from class 0 region # p_idx = 49 # Wrong classified point # p_idx = 27 # Correctly classified point x0 = self.ds.X[p_idx, :] y0 = self.ds.Y[p_idx].item() x_seq = CArray.empty((0, x0.shape[1])) scores = CArray([]) f_seq = CArray([]) x = x0 for d_idx, d in enumerate(range(0, dmax + 1)): self.logger.info("Evasion at dmax: " + str(d)) eva.dmax = d x, f_opt = eva._run(x0=x0, y0=y0, x_init=x) y_pred, score = self.multiclass.predict( x, return_decision_function=True) f_seq = f_seq.append(f_opt) # not considering all iterations, just values at dmax # for all iterations, you should bring eva.x_seq and eva.f_seq x_seq = x_seq.append(x, axis=0) s = score[:, y0 if self.y_target is None else self.y_target] scores = scores.append(s) self.logger.info("Predicted label after evasion: " + str(y_pred)) self.logger.info("Score after evasion: {:}".format(s)) self.logger.info("Objective function after evasion: {:}".format(f_opt)) # Compare optimal point with expected self.assert_array_almost_equal(eva.x_opt.todense().ravel(), expected_x, decimal=4) self._make_plots(x_seq, dmax, eva, x0, scores, f_seq)
def _make_plot(self, p_idx, eva, dmax): if self.make_figures is False: self.logger.debug("Skipping figures...") return x0 = self.ds.X[p_idx, :] y0 = self.ds.Y[p_idx].item() x_seq = CArray.empty((0, x0.shape[1])) scores = CArray([]) f_seq = CArray([]) x = x0 for d_idx, d in enumerate(range(0, dmax + 1)): self.logger.info("Evasion at dmax: " + str(d)) eva.dmax = d x, f_opt = eva._run(x0=x0, y0=y0, x_init=x) y_pred, score = self.multiclass.predict( x, return_decision_function=True) f_seq = f_seq.append(f_opt) # not considering all iterations, just values at dmax # for all iterations, you should bring eva.x_seq and eva.f_seq x_seq = x_seq.append(x, axis=0) s = score[:, y0 if self.y_target is None else self.y_target] scores = scores.append(s) self.logger.info("Predicted label after evasion: {:}".format(y_pred)) self.logger.info("Score after evasion: {:}".format(s)) self.logger.info("Objective function after evasion: {:}".format(f_opt)) fig = CFigure(height=9, width=10, markersize=6, fontsize=12) # Get plot bounds, taking into account ds and evaded point path bounds_x, bounds_y = self.ds.get_bounds() min_x, max_x = bounds_x min_y, max_y = bounds_y min_x = min(min_x, x_seq[:, 0].min()) max_x = max(max_x, x_seq[:, 0].max()) min_y = min(min_y, x_seq[:, 1].min()) max_y = max(max_y, x_seq[:, 1].max()) ds_bounds = [(min_x, max_x), (min_y, max_y)] # Plotting multiclass decision regions fig.subplot(2, 2, 1) fig = self._plot_decision_function(fig, plot_background=True) fig.sp.plot_path(x_seq, path_style='-', start_style='o', start_facecolor='w', start_edgewidth=2, final_style='o', final_facecolor='k', final_edgewidth=2) # plot distance constraint fig.sp.plot_fun(func=self._rescaled_distance, multipoint=True, plot_background=False, n_grid_points=20, levels_color='k', grid_limits=ds_bounds, levels=[0], colorbar=False, levels_linewidth=2.0, levels_style=':', alpha_levels=.4, c=x0, r=dmax) fig.sp.grid(linestyle='--', alpha=.5, zorder=0) # Plotting multiclass evasion objective function fig.subplot(2, 2, 2) fig = self._plot_decision_function(fig) fig.sp.plot_fgrads(eva.objective_function_gradient, grid_limits=ds_bounds, n_grid_points=20, color='k', alpha=.5) fig.sp.plot_path(x_seq, path_style='-', start_style='o', start_facecolor='w', start_edgewidth=2, final_style='o', final_facecolor='k', final_edgewidth=2) # plot distance constraint fig.sp.plot_fun(func=self._rescaled_distance, multipoint=True, plot_background=False, n_grid_points=20, levels_color='w', grid_limits=ds_bounds, levels=[0], colorbar=False, levels_style=':', levels_linewidth=2.0, alpha_levels=.5, c=x0, r=dmax) fig.sp.plot_fun(lambda z: eva.objective_function(z), multipoint=True, grid_limits=ds_bounds, colorbar=False, n_grid_points=20, plot_levels=False) fig.sp.grid(linestyle='--', alpha=.5, zorder=0) fig.subplot(2, 2, 3) if self.y_target is not None: fig.sp.title("Classifier Score for Target Class (Targ. Evasion)") else: fig.sp.title("Classifier Score for True Class (Indiscr. Evasion)") fig.sp.plot(scores) fig.sp.grid() fig.sp.xlim(0, dmax) fig.sp.xlabel("dmax") fig.subplot(2, 2, 4) fig.sp.title("Objective Function") fig.sp.plot(f_seq) fig.sp.grid() fig.sp.xlim(0, dmax) fig.sp.xlabel("dmax") fig.tight_layout() k_name = self.kernel.class_type if self.kernel is not None else 'lin' fig.savefig( fm.join( self.images_folder, "pgd_ls_reject_threshold_{:}c_kernel-{:}_target-{:}.pdf". format(self.ds.num_classes, k_name, self.y_target)))
def compute_indices(self, dataset): """Compute training set and test set indices for each fold. Parameters ---------- dataset : CDataset Dataset to split. Returns ------- CDataSplitter Instance of the dataset splitter with tr/ts indices. """ # Resetting indices self._tr_idx = [] self._ts_idx = [] self._tr_classes = [] # If no custom number of training classes is selected, # use half of the classes n_train_classes = int(dataset.num_classes / 2) \ if self.n_train_classes is None else int(self.n_train_classes) for fold in range(self.num_folds): if self.random_state is not None: # Adding 1234 to specified random state to get different folds random_state = self.random_state + 1234 * fold else: # Random state is None, numpy will manage it random_state = self.random_state # only 'n_train_classes' random classes will be trained... # but now we randsample all classes to backup in case one or # more classes will be skipped for n_train_samples all_tr_classes = CArray.randsample(dataset.classes, dataset.num_classes, random_state=random_state) # Placeholder for indices of chosen training classes' samples train_samples_idx = CArray([], dtype=int) train_classes = CArray([], dtype=all_tr_classes.dtype) for train_class in all_tr_classes: if train_classes.size >= n_train_classes: break # we reached the desired number of training classes # Vector with indices of current client's samples client_samples_idx = CArray( dataset.Y.find(dataset.Y == train_class)) # Check if we have at least n_train_samples + 1 samples for # current client if client_samples_idx.size < self.n_train_samples + 1: self.logger.warning("skipping class {:} for training set. " "{:} samples is less than {:}." "".format(train_class, client_samples_idx.size, self.n_train_samples + 1)) continue # Random subselection of training samples random_samples = CArray.randsample(client_samples_idx, self.n_train_samples, random_state=random_state) # Appending to vector of indices for training set a random # subselection of samples train_samples_idx = train_samples_idx.append(random_samples) # Adding class id train_classes = train_classes.append(train_class) # We store the sorted training classes list self._tr_classes += [train_classes.sort()] # Storing a sorted array of training samples indices train_samples_idx.sort(inplace=True) # All other samples go to test test_samples_idx = CArray([ idx for idx in range(dataset.num_samples) if idx not in train_samples_idx ]) self._tr_idx += [train_samples_idx] self._ts_idx += [test_samples_idx] return self
def test_append(self): """Test for CArray.append() method.""" self.logger.info("Test for CArray.append() method.") def _append_allaxis(array1, array2): self.logger.info("a1: {:} ".format(array1)) self.logger.info("a2: {:} ".format(array2)) # default append, axis None (ravelled) append_res = array1.append(array2) self.logger.info("a1.append(a2): {:}".format(append_res)) # If axis is None, result should be ravelled... if array1.isdense: self.assertEqual(1, append_res.ndim) else: # ... but if array is sparse let's check for shape[0] self.assertEqual(1, append_res.shape[0]) # Let's check the elements of the resulting array a1_comp = array1.todense().ravel() a2_comp = array2.todense().ravel() if array1.issparse: # result will be sparse, so always 2d a1_comp = a1_comp.atleast_2d() a2_comp = a2_comp.atleast_2d() self.assert_array_equal(append_res[:array1.size], a1_comp) self.assert_array_equal(append_res[array1.size:], a2_comp) array1_shape0 = array1.atleast_2d().shape[0] array1_shape1 = array1.atleast_2d().shape[1] array2_shape0 = array2.atleast_2d().shape[0] array2_shape1 = array2.atleast_2d().shape[1] # check append on axis 0 (vertical) append_res = array1.append(array2, axis=0) self.logger.info("a1.append(a2, axis=0): {:}".format(append_res)) self.assertEqual(array1_shape1, append_res.shape[1]) self.assertEqual(array1_shape0 + array2_shape0, append_res.shape[0]) self.assert_array_equal(append_res[array1_shape0:, :], array2) # check append on axis 1 (horizontal) append_res = array1.append(array2, axis=1) self.logger.info("a1.append(a2, axis=1): {:}".format(append_res)) self.assertEqual(array1_shape1 + array2_shape1, append_res.shape[1]) self.assertEqual(array1_shape0, append_res.shape[0]) self.assert_array_equal(append_res[:, array1_shape1:], array2) _append_allaxis(self.array_dense, self.array_dense) _append_allaxis(self.array_sparse, self.array_sparse) _append_allaxis(self.array_sparse, self.array_dense) _append_allaxis(self.array_dense, self.array_sparse) # check append on empty arrays empty_sparse = CArray([], tosparse=True) empty_dense = CArray([], tosparse=False) self.assertTrue((empty_sparse.append(empty_dense, axis=None) == empty_dense).all()) self.assertTrue((empty_sparse.append(empty_dense, axis=0) == empty_dense).all()) self.assertTrue((empty_sparse.append(empty_dense, axis=1) == empty_dense).all())
def apply_feasible_manipulations(self, t, x: CArray): byte_values = (t * 255).astype(np.int) x_adv = x.append(byte_values) return x_adv