Esempio n. 1
0
    def plot_ds(self, dataset, colors=None, markers='o', *args, **kwargs):
        """Plot patterns of each class with a different color/marker.

        Parameters
        ----------
        dataset : CDataset
            Dataset that contain samples which we want plot.
        colors : list or None, optional
            Color to be used for plotting each class.
            If a list, each color will be assigned to a dataset's class,
            with repetitions if necessary.
            If None and the number of classes is 1, blue will be used.
            If None and the number of classes is 2, blue and red will be used.
            If None and the number of classes is > 2, 'jet' colormap is used.
        markers : list or str, optional
            Marker to use for plotting. Default is 'o' (circle).
            If a string, the same specified marker will be used for each class.
            If a list, must specify one marker for each dataset's class.
        args, kwargs : any
            Any optional argument for plots.
            If the number of classes is 2, a `plot` will be created.
            If the number of classes is > 2, a `scatter` plot will be created.

        """
        classes = dataset.classes
        if colors is None:
            if classes.size <= 6:
                colors = ['blue', 'red', 'lightgreen', 'black', 'gray', 'cyan']
                from matplotlib.colors import ListedColormap
                cmap = ListedColormap(colors[:classes.size])
            else:
                cmap = 'jet'
        else:
            from matplotlib.colors import ListedColormap
            cmap = ListedColormap(colors)

        # Next returns an ndarray classes.size X 4 (RGB + Alpha)
        colors = cm.ScalarMappable(cmap=cmap).to_rgba(range(classes.size))

        if is_list(markers) and len(markers) != classes.size:
            raise ValueError("{:} markers must be specified.".format(
                classes.size))

        for cls_idx, cls in enumerate(classes.tolist()):
            c = colors[cls_idx]
            m = markers[cls_idx] if is_list(markers) else markers
            this_c_p = dataset.Y.find(dataset.Y == cls)
            self.plot(dataset.X[this_c_p, 0],
                      dataset.X[this_c_p, 1],
                      linestyle='None',
                      color=c,
                      marker=m,
                      *args,
                      **kwargs)

        # Customizing figure
        self.apply_params_ds()
Esempio n. 2
0
    def __setattr__(self, key, value):
        """Add a new attribute to the header.

        Parameters
        ----------
        key : str
            Attribute to set.
        value : any
            Value to assign to the attribute.
            Could be an immutable object (scalar, tuple, dict, str),
            or a vector-like CArray. Lists are automatically converted
            to vector-like CArrays.

        """
        # We store lists as CArrays to facilitate indexing
        value = CArray(value) if is_list(value) else value

        # Make sure we store arrays as vector-like
        value = value.ravel() if isinstance(value, CArray) else value

        super(CDatasetHeader, self).__setattr__(key, value)

        # Make sure that input writable attributes are consistent
        if is_writable(self, key):
            self._validate_params()
        def check_init_builtin(totest_list):

            for totest_elem in totest_list:
                for tosparse in [False, True]:
                    init_array = CArray(totest_elem, tosparse=tosparse)
                    self.assertTrue(init_array.issparse == tosparse)
                    if is_list_of_lists(totest_elem):
                        self.assertTrue(
                            init_array.shape[0] == len(totest_elem))
                        self.assertTrue(
                            init_array.shape[1] == len(totest_elem[0]))
                    elif is_list(totest_elem):
                        if init_array.issparse is True:
                            self.assertTrue(
                                init_array.shape[1] == len(totest_elem))
                        elif init_array.isdense is True:
                            self.assertTrue(init_array.ndim == 1)
                            self.assertTrue(
                                init_array.shape[0] == len(totest_elem))
                    elif is_scalar(totest_elem) or is_bool(totest_elem):
                        self.assertTrue(init_array.size == 1)
                    else:
                        raise TypeError("test_init_builtin should not be used "
                                        "to test {:}".format(
                                            type(totest_elem)))
Esempio n. 4
0
    def _check_is_fitted_scaler(scaler, attributes, msg=None, check_all=True):
        """Check if the input object is trained (fitted).

        Checks if the input object is fitted by verifying if all or any of the
        input attributes are not None.

        Parameters
        ----------
        scaler : object
            Instance of the class to check. Must implement `.fit()` method.
        attributes : str or list of str
            Attribute or list of attributes to check.
            Es.: `['classes', 'n_features', ...], 'classes'`
        msg : str or None, optional
            If None, the default error message is:
            "this `{name}` is not trained. Call `.fit()` first.".
            For custom messages if '{name}' is present in the message string,
            it is substituted by the class name of the checked object.
        check_all : bool, optional
            Specify whether to check (True) if all of the given attributes
            are not None or (False) just any of them. Default True.

        Raises
        ------
        NotFittedError
            If `check_all` is True and any of the attributes is None;
            if `check_all` is False and all of attributes are None.

        """
        from secml.core.type_utils import is_list, is_str
        from secml.core.exceptions import NotFittedError

        if msg is None:
            msg = "this `{name}` is not trained. Call `._fit()` first."

        if is_str(attributes):
            attributes = [attributes]
        elif not is_list(attributes):
            raise TypeError(
                "the attribute(s) to check must be a string or a list "
                "of strings")

        obj = scaler.sklearn_scaler

        condition = any if check_all is True else all

        if condition([hasattr(obj, attr) is False for attr in attributes]):
            raise NotFittedError(msg.format(name=scaler.__class__.__name__))
    def _discretize_data(ds, eta):
        """Discretize data of input dataset based on eta.

        Parameters
        ----------
        ds : CDataset
        eta : eta or scalar

        """
        if is_list(eta):
            if len(eta) != ds.n_features:
                raise ValueError('len(eta) != n_features')
            for i in range(len(eta)):
                ds.X[:, i] = (ds.X[:, i] / eta[i]).round() * eta[i]
        else:  # eta is a single value
            ds.X = (ds.X / eta).round() * eta

        return ds
Esempio n. 6
0
        def check_init_builtin(totest_elem):

            for tosparse in [False, True]:
                init_array = CArray(totest_elem, tosparse=tosparse)
                self.assertEqual(init_array.issparse, tosparse)

                if is_list_of_lists(totest_elem):
                    if not is_list_of_lists(totest_elem[0]):
                        self.assertEqual(
                            init_array.shape[0], len(totest_elem))
                        self.assertEqual(
                            init_array.shape[1], len(totest_elem[0]))
                    else:  # N-Dimensional input
                        in_shape = init_array.input_shape
                        self.assertEqual(in_shape[0], len(totest_elem))
                        self.assertEqual(in_shape[1], len(totest_elem[0]))
                        self.assertEqual(
                            init_array.shape[0], len(totest_elem))
                        self.assertEqual(
                            init_array.shape[1], sum(in_shape[1:]))

                elif is_list(totest_elem):
                    if init_array.issparse is True:
                        self.assertEqual(
                            init_array.shape[1], len(totest_elem))
                    elif init_array.isdense is True:
                        self.assertTrue(init_array.ndim == 1)
                        self.assertEqual(
                            init_array.shape[0], len(totest_elem))
                    self.assertEqual(
                        init_array.input_shape, (len(totest_elem), ))

                elif is_scalar(totest_elem) or is_bool(totest_elem):
                    self.assertEqual(init_array.size, 1)
                    self.assertEqual(init_array.input_shape, (1, ))

                else:
                    raise TypeError(
                        "test_init_builtin should not be used "
                        "to test {:}".format(type(totest_elem)))
Esempio n. 7
0
    def _performance_score(self, y_true, score, rep_idx=0):
        """Computes the False Negative Rate @ ROC Threshold.

        Parameters
        ----------
        y_true : CArray
            Ground truth (true) labels or target scores.
        score : CArray
            Flat array with target scores for each pattern, can either be
            probability estimates of the positive class or confidence values.
        rep_idx : int, optional
            Index of the th value to use. Default 0.

        Returns
        -------
        metric : float
            Returns metric value as float.

        """
        th = self.th[rep_idx] if is_list(self.th) is True else self.th
        p = CArray(y_true == 1)  # Positives
        return 1 - (float(CArray(score[p] - th >= 0).sum()) / p.sum())
Esempio n. 8
0
    def _discretize_data(ds, eta):
        """Discretize data of input dataset based on eta.

        Parameters
        ----------
        ds : CDataset
        eta : eta or scalar

        """
        if is_list(eta):
            if len(eta) != ds.n_features:
                raise ValueError('len(eta) != n_features')
            for i in range(len(eta)):
                ds.X[:, i] = (ds.X[:, i] / eta[i]).round() * eta[i]
        else:  # eta is a single value
            ds.X = (ds.X / eta).round() * eta

        # It is likely that after the discretization there are duplicates
        new_array = [tuple(row) for row in ds.X.tondarray()]
        uniques, uniques_idx = np.unique(new_array, axis=0, return_index=True)
        ds = ds[uniques_idx.tolist(), :]

        return ds
Esempio n. 9
0
 def __init__(self, th=0.0):
     self.th = float(th) if is_list(th) is False else th
Esempio n. 10
0
    def plot_fun(self,
                 func,
                 multipoint=False,
                 plot_background=True,
                 plot_levels=True,
                 levels=None,
                 levels_color='k',
                 levels_style=None,
                 levels_linewidth=1.0,
                 n_colors=50,
                 cmap='jet',
                 alpha=1.0,
                 alpha_levels=1.0,
                 vmin=None,
                 vmax=None,
                 colorbar=True,
                 n_grid_points=30,
                 grid_limits=None,
                 func_args=(),
                 **func_kwargs):
        """Plot a function (used for decision functions or boundaries).

        Parameters
        ----------
        func : unbound function
            Function to be plotted.
        multipoint : bool, optional
            If True, all grid points will be passed to the function.
            If False (default), function is iterated over each
            point of the grid.
        plot_background : bool, optional
            Specifies whether to plot the value of func at each point
            in the background using a colorbar.
        plot_levels : bool, optional
            Specify if function levels should be plotted (default True).
        levels : list or None, optional
            List of levels to be plotted.
            If None, 0 (zero) level will be plotted.
        levels_color : str or tuple or None, optional
            If None, the colormap specified by cmap will be used.
            If a string, like 'k', all levels will be plotted in this color.
            If a tuple of colors (string, float, rgb, etc),
            different levels will be plotted in different colors
            in the order specified. Default 'k'.
        levels_style : [ None | 'solid' | 'dashed' | 'dashdot' | 'dotted' ]
            If levels_style is None, the default is 'solid'.
            levels_style can also be an iterable of the above strings
            specifying a set of levels_style to be used. If this iterable
            is shorter than the number of contour levels it will be
            repeated as necessary.
        levels_linewidth : float or list of floats, optional
            The line width of the contour lines. Default 1.0.
        n_colors : int, optional
            Number of color levels of background plot. Default 50.
        cmap : str or list or `matplotlib.pyplot.cm`, optional
            Colormap to use (default 'jet'). Could be a list of colors.
        alpha : float, optional
            The alpha blending value of the background. Default 1.0.
        alpha_levels : float, optional
            The alpha blending value of the levels. Default 1.0.
        vmin, vmax : float or None, optional
            Limits of the colors used for function plotting.
            If None, colors are determined by the colormap.
        colorbar : bool, optional
            True if colorbar should be displayed.
        n_grid_points : int, optional
            Number of grid points.
        grid_limits : list of tuple, optional
            List with a tuple of min/max limits for each axis.
            If None, [(0, 1), (0, 1)] limits will be used.
        func_args, func_kwargs
            Other arguments or keyword arguments to pass to `func`.

        Examples
        --------
        .. plot:: pyplots/plot_fun.py
            :include-source:

        """
        levels = [0] if levels is None else levels

        # create the grid of the point where the function will be evaluated
        pad_grid_point_features, pad_xgrid, pad_ygrid = \
            create_points_grid(grid_limits, n_grid_points)

        # Evaluate function on each grid point
        if multipoint is True:
            grid_points_value = func(pad_grid_point_features, *func_args,
                                     **func_kwargs)
        else:
            grid_points_value = pad_grid_point_features.apply_along_axis(
                func, 1, *func_args, **func_kwargs)

        grid_points_val_reshaped = grid_points_value.reshape(
            (pad_xgrid.shape[0], pad_xgrid.shape[1]))

        # Clipping values to show a correct color plot
        clip_min = -inf if vmin is None else vmin
        clip_max = inf if vmax is None else vmax
        grid_points_val_reshaped = grid_points_val_reshaped.clip(
            clip_min, clip_max)

        if is_list(cmap):  # Convert list of colors to colormap
            from matplotlib.colors import ListedColormap
            cmap = ListedColormap(cmap)

        ch = None
        if plot_background is True:
            # Draw a fully colored plot using 50 levels
            ch = self.contourf(pad_xgrid,
                               pad_ygrid,
                               grid_points_val_reshaped,
                               n_colors,
                               cmap=cmap,
                               alpha=alpha,
                               vmin=vmin,
                               vmax=vmax,
                               zorder=0)

            # Displaying 20 ticks on the colorbar
            if colorbar is True:
                some_y = CArray.linspace(grid_points_val_reshaped.min(),
                                         grid_points_val_reshaped.max(), 20)
                self.colorbar(ch, ticks=some_y)

        if plot_levels is True:
            self.contour(pad_xgrid,
                         pad_ygrid,
                         grid_points_val_reshaped,
                         levels=levels,
                         colors=levels_color,
                         linestyles=levels_style,
                         linewidths=levels_linewidth,
                         alpha=alpha_levels)

        # Customizing figure
        self.apply_params_fun()

        return ch