def create_points_grid(grid_limits, n_grid_points): """Creates a grid of points. Parameters ---------- grid_limits : list of tuple List with a tuple of min/max limits for each axis. If None, [(0, 1), (0, 1)] limits will be used. n_grid_points : int Number of grid points. """ grid_bounds = [(0, 1), (0, 1)] if grid_limits is None else grid_limits x_min, x_max = (grid_bounds[0][0], grid_bounds[0][1]) y_min, y_max = (grid_bounds[1][0], grid_bounds[1][1]) # Padding x and y grid points padding_x, padding_y = (0.05 * (x_max - x_min), 0.05 * (y_max - y_min)) # Create the equi-spaced indices for each axis x_grid_points = CArray.linspace(x_min - padding_x, x_max + padding_x, num=n_grid_points) y_grid_points = CArray.linspace(y_min - padding_y, y_max + padding_y, num=n_grid_points) # Create the grid pad_xgrid, pad_ygrid = CArray.meshgrid((x_grid_points, y_grid_points)) pad_grid_point_features = CArray.concatenate(pad_xgrid.reshape( (pad_xgrid.size, 1)), pad_ygrid.reshape( (pad_ygrid.size, 1)), axis=1) return pad_grid_point_features, pad_xgrid, pad_ygrid
def _performance_score(self, y_true, score): """Computes the Partial Area Under the ROC Curve (AUC). Parameters ---------- y_true : CArray Flat array with true binary labels in range {0, 1} or {-1, 1} for each pattern. score : CArray Flat array with target scores for each pattern, can either be probability estimates of the positive class or confidence values. Returns ------- metric : float Returns metric value as float. Notes ----- This implementation is restricted to the binary classification task. """ fp_roc, tp_roc = CRoc().compute(y_true, score)[0:2] # Interpolating the ROC between 0 and fpr FP # Considering a number of points proportional to what used inside CRoc fpr = CArray.linspace(0, self.fpr, self.n_points) tpr = fpr.interp(fp_roc, tp_roc) return skm.auc(fpr.tondarray(), tpr.tondarray())
def plot_hyperplane(img, clf, min_v, max_v, linestyle, label): """Plot the hyperplane associated to the OVO clf.""" xx = CArray.linspace(min_v - 5, max_v + 5) # make sure the line is long enough # get the separating hyperplane yy = -(clf.w[0] * xx + clf.b) / clf.w[1] img.sp.plot(xx, yy.ravel(), linestyle, label=label)
def estimate_density(self, x, n_points=1000): """Estimate density of input array. Returns ------- x : CArray Arrays with coordinates used to estimate density. df : CArray Density function values. """ kde = KernelDensity(bandwidth=self.bandwidth, algorithm=self.algorithm, kernel=self.kernel, metric=self.metric, atol=self.atol, rtol=self.rtol, breadth_first=self.breadth_first, leaf_size=self.leaf_size, metric_params=self.metric_params).fit( x.atleast_2d().get_data()) x = CArray.linspace(x.min() * 1.01, x.max() * 1.01, n_points) x = x.atleast_2d().T df = CArray(kde.score_samples(x.get_data())) df = df.exp() return x, df
def test_margin(self): self.logger.info("Testing margin separation of SGD...") # we create 50 separable points dataset = CDLRandomBlobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60).load() # fit the model clf = CClassifierSGD(loss=CLossHinge(), regularizer=CRegularizerL2(), alpha=0.01, max_iter=200, random_state=0) clf.fit(dataset.X, dataset.Y) # plot the line, the points, and the nearest vectors to the plane xx = CArray.linspace(-1, 5, 10) yy = CArray.linspace(-1, 5, 10) X1, X2 = np.meshgrid(xx.tondarray(), yy.tondarray()) Z = CArray.empty(X1.shape) for (i, j), val in np.ndenumerate(X1): x1 = val x2 = X2[i, j] Z[i, j] = clf.decision_function(CArray([x1, x2]), y=1) levels = [-1.0, 0.0, 1.0] linestyles = ['dashed', 'solid', 'dashed'] colors = 'k' fig = CFigure(linewidth=1) fig.sp.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles) fig.sp.scatter(dataset.X[:, 0].ravel(), dataset.X[:, 1].ravel(), c=dataset.Y, s=40) fig.savefig( fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_sgd2.pdf'))
def test_margin(self): self.logger.info("Testing margin separation of SVM...") import numpy as np # we create 40 separable points rng = np.random.RandomState(0) n_samples_1 = 1000 n_samples_2 = 100 X = np.r_[1.5 * rng.randn(n_samples_1, 2), 0.5 * rng.randn(n_samples_2, 2) + [2, 2]] y = [0] * (n_samples_1) + [1] * (n_samples_2) dataset = CDataset(X, y) # fit the model clf = CClassifierSVM() clf.fit(dataset.X, dataset.Y) w = clf.w a = -w[0] / w[1] xx = CArray.linspace(-5, 5) yy = a * xx - clf.b / w[1] wclf = CClassifierSVM(class_weight={0: 1, 1: 10}) wclf.fit(dataset.X, dataset.Y) ww = wclf.w wa = -ww[0] / ww[1] wyy = wa * xx - wclf.b / ww[1] fig = CFigure(linewidth=1) fig.sp.plot(xx, yy.ravel(), 'k-', label='no weights') fig.sp.plot(xx, wyy.ravel(), 'k--', label='with weights') fig.sp.scatter(X[:, 0].ravel(), X[:, 1].ravel(), c=y) fig.sp.legend() fig.savefig( fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_svm.pdf'))
from secml.array import CArray from secml.figure import CFigure def f(x, y): return (1 - x / 2 + x**5 + y**3) * (-x**2 - y**2).exp() fig = CFigure() x_linspace = CArray.linspace(-3, 3, 256) y_linspace = CArray.linspace(-3, 3, 256) X, Y = CArray.meshgrid((x_linspace, y_linspace)) C = fig.sp.contour(X, Y, f(X, Y), linewidths=.5, cmap='hot') fig.sp.clabel(C, inline=1, fontsize=10) fig.sp.xticks(()) fig.sp.yticks(()) fig.show()
from secml.array import CArray from secml.figure import CFigure X = CArray.linspace(-3.14, 3.14, 256, endpoint=True) C, S = X.cos(), X.sin() fig = CFigure(fontsize=14) fig.sp.plot(X, C, color='red', alpha=0.5, linewidth=1.0, linestyle='-', label="cosine") fig.sp.plot(X, S, label="sine") fig.sp.xticks(CArray([-3.14, -3.14 / 2, 0, 3.14 / 2, 3.14])) fig.sp.yticks(CArray([-1, 0, +1])) fig.sp.grid() fig.sp.legend(loc=0) fig.show()
def average(fpr, tpr, n_points=1000): """Compute the average of the input tpr/fpr pairs. Parameters ---------- fpr, tpr : CArray or list of CArray CArray or list of CArrays with False/True Positive Rates as output of `.CRoc`. n_points : int, optional Default 1000, is the number of points to be used for interpolation. Returns ------- mean_fpr : CArray Flat array with increasing False Positive Rates averaged over all available repetitions. Element i is the False Positive Rate of predictions with score >= thresholds[i]. mean_tpr : CArray Flat array with increasing True Positive Rates averaged over all available repetitions. Element i is the True Positive Rate of predictions with score >= thresholds[i]. std_dev_tpr : CArray Flat array with standard deviation of True Positive Rates. """ # Working with lists fpr_list = [fpr] if not isinstance(fpr, list) else fpr tpr_list = [tpr] if not isinstance(tpr, list) else tpr n_fpr = len(fpr_list) n_tpr = len(tpr_list) # Checking consistency between input data if n_fpr == 0: raise ValueError("At least 1 array with false/true " "positives must be specified.") if n_fpr != n_tpr: raise ValueError("Number of True Positive Rates and " "False Positive Rates must be the same.") # Computing ROC for a single (labels, scores) pair mean_fpr = CArray.linspace(0, 1, n_points) mean_tpr = 0.0 all_roc_tpr = CArray.zeros(shape=(n_tpr, n_points)) for i, data_i in enumerate(zip(fpr_list, tpr_list)): # Interpolating over 'x' axis i_tpr = mean_fpr.interp(*data_i) # Will be used later to compute std all_roc_tpr[i, :] = i_tpr # Adding current tpr to mean_tpr mean_tpr += i_tpr mean_tpr[0] = 0.0 # First should be (0,0) to prevent side effects mean_tpr /= n_tpr mean_tpr[-1] = 1.0 # Last point should be (1,1) to prevent side effects # Computing standard deviation std_dev_tpr = all_roc_tpr.std(axis=0, keepdims=False) std_dev_tpr[-1] = 0 return mean_fpr, mean_tpr, std_dev_tpr
def plot_fun(self, func, multipoint=False, plot_background=True, plot_levels=True, levels=None, levels_color='k', levels_style=None, levels_linewidth=1.0, n_colors=50, cmap='jet', alpha=1.0, alpha_levels=1.0, vmin=None, vmax=None, colorbar=True, n_grid_points=30, grid_limits=None, func_args=(), **func_kwargs): """Plot a function (used for decision functions or boundaries). Parameters ---------- func : unbound function Function to be plotted. multipoint : bool, optional If True, all grid points will be passed to the function. If False (default), function is iterated over each point of the grid. plot_background : bool, optional Specifies whether to plot the value of func at each point in the background using a colorbar. plot_levels : bool, optional Specify if function levels should be plotted (default True). levels : list or None, optional List of levels to be plotted. If None, 0 (zero) level will be plotted. levels_color : str or tuple or None, optional If None, the colormap specified by cmap will be used. If a string, like 'k', all levels will be plotted in this color. If a tuple of colors (string, float, rgb, etc), different levels will be plotted in different colors in the order specified. Default 'k'. levels_style : [ None | 'solid' | 'dashed' | 'dashdot' | 'dotted' ] If levels_style is None, the default is 'solid'. levels_style can also be an iterable of the above strings specifying a set of levels_style to be used. If this iterable is shorter than the number of contour levels it will be repeated as necessary. levels_linewidth : float or list of floats, optional The line width of the contour lines. Default 1.0. n_colors : int, optional Number of color levels of background plot. Default 50. cmap : str or list or `matplotlib.pyplot.cm`, optional Colormap to use (default 'jet'). Could be a list of colors. alpha : float, optional The alpha blending value of the background. Default 1.0. alpha_levels : float, optional The alpha blending value of the levels. Default 1.0. vmin, vmax : float or None, optional Limits of the colors used for function plotting. If None, colors are determined by the colormap. colorbar : bool, optional True if colorbar should be displayed. n_grid_points : int, optional Number of grid points. grid_limits : list of tuple, optional List with a tuple of min/max limits for each axis. If None, [(0, 1), (0, 1)] limits will be used. func_args, func_kwargs Other arguments or keyword arguments to pass to `func`. Examples -------- .. plot:: pyplots/plot_fun.py :include-source: """ levels = [0] if levels is None else levels # create the grid of the point where the function will be evaluated pad_grid_point_features, pad_xgrid, pad_ygrid = \ create_points_grid(grid_limits, n_grid_points) # Evaluate function on each grid point if multipoint is True: grid_points_value = func(pad_grid_point_features, *func_args, **func_kwargs) else: grid_points_value = pad_grid_point_features.apply_along_axis( func, 1, *func_args, **func_kwargs) grid_points_val_reshaped = grid_points_value.reshape( (pad_xgrid.shape[0], pad_xgrid.shape[1])) # Clipping values to show a correct color plot clip_min = -inf if vmin is None else vmin clip_max = inf if vmax is None else vmax grid_points_val_reshaped = grid_points_val_reshaped.clip( clip_min, clip_max) if is_list(cmap): # Convert list of colors to colormap from matplotlib.colors import ListedColormap cmap = ListedColormap(cmap) ch = None if plot_background is True: # Draw a fully colored plot using 50 levels ch = self.contourf(pad_xgrid, pad_ygrid, grid_points_val_reshaped, n_colors, cmap=cmap, alpha=alpha, vmin=vmin, vmax=vmax, zorder=0) # Displaying 20 ticks on the colorbar if colorbar is True: some_y = CArray.linspace(grid_points_val_reshaped.min(), grid_points_val_reshaped.max(), 20) self.colorbar(ch, ticks=some_y) if plot_levels is True: self.contour(pad_xgrid, pad_ygrid, grid_points_val_reshaped, levels=levels, colors=levels_color, linestyles=levels_style, linewidths=levels_linewidth, alpha=alpha_levels) # Customizing figure self.apply_params_fun() return ch
from secml.array import CArray from secml.figure import CFigure fig = CFigure(fontsize=16) fig.title('Errorbars can go negative!') fig.sp.xscale("symlog", nonposx='clip') fig.sp.yscale("symlog", nonposy='clip') x = CArray(10.0).pow(CArray.linspace(0.0, 2.0, 20)) y = x ** 2.0 fig.sp.errorbar(x, y, xerr=0.1 * x, yerr=5.0 + 0.75 * y) fig.sp.ylim(bottom=0.1) fig.sp.grid() fig.show()