def __init__(self, mean=None, std=None, with_std=True, preprocess=None):

        if mean is not None:
            self._in_mean = (mean, ) if is_scalar(mean) else tuple(mean)
        else:  # mean is None
            self._in_mean = None
        if std is not None:
            self._in_std = (std, ) if is_scalar(std) else tuple(std)
        else:  # std is None
            self._in_std = None

        # Input validation
        if with_std is True:
            if (mean is None and std is not None) or \
                    (mean is not None and std is None) or \
                    (mean is not None and std is not None and
                     len(self._in_mean) != len(self._in_std)):
                raise ValueError("if `with_std` is True, `mean` and `std` "
                                 "should be both None or both scalar or "
                                 "both tuple of the same length")

        self._mean = None
        self._std = None

        self._with_std = with_std

        # Properties of the linear normalizer
        self._w = None
        self._b = None

        super(CNormalizerMeanStd, self).__init__(preprocess=preprocess)
コード例 #2
0
ファイル: test_c_array_init.py プロジェクト: pralab/secml
    def test_init_reshape(self):
        """Test CArray reshape during initialization."""
        arrays = [[[2, 3], [22, 33]], [2, 3], [[2], [3]], 3]

        for a in arrays:
            for sparse in (False, True):
                out_def = CArray(a)
                size = out_def.size  # Expected size
                in_shape = out_def.shape  # Expected input_shape

                for shape in [size, (size, ), (1, size), (size, 1)]:
                    out_res = CArray(a, tosparse=sparse, shape=shape)

                    # Resulting shape will always be (1, n) for sparse
                    if is_scalar(shape):
                        shape = (1, shape) if out_res.issparse else (shape, )
                    if out_res.issparse and len(shape) < 2:
                        shape = (1, shape[0])

                    self.logger.info("Expected 'shape' {:}, got {:}".format(
                        shape, out_res.shape))
                    self.assertEqual(out_res.shape, shape)

                    # The input_shape should not be altered by reshaping
                    self.logger.info(
                        "Expected 'input_shape' {:}, got {:}".format(
                            in_shape, out_res.input_shape))
                    self.assertEqual(out_res.input_shape, in_shape)

                with self.assertRaises(ValueError):
                    # Shape with wrong size, expect error
                    CArray(a, tosparse=sparse, shape=(2, size))
        def check_init_builtin(totest_list):

            for totest_elem in totest_list:
                for tosparse in [False, True]:
                    init_array = CArray(totest_elem, tosparse=tosparse)
                    self.assertTrue(init_array.issparse == tosparse)
                    if is_list_of_lists(totest_elem):
                        self.assertTrue(
                            init_array.shape[0] == len(totest_elem))
                        self.assertTrue(
                            init_array.shape[1] == len(totest_elem[0]))
                    elif is_list(totest_elem):
                        if init_array.issparse is True:
                            self.assertTrue(
                                init_array.shape[1] == len(totest_elem))
                        elif init_array.isdense is True:
                            self.assertTrue(init_array.ndim == 1)
                            self.assertTrue(
                                init_array.shape[0] == len(totest_elem))
                    elif is_scalar(totest_elem) or is_bool(totest_elem):
                        self.assertTrue(init_array.size == 1)
                    else:
                        raise TypeError("test_init_builtin should not be used "
                                        "to test {:}".format(
                                            type(totest_elem)))
 def _cmp_kernel(self, k_fun, a1, a2):
     k = k_fun(a1, a2)
     if isinstance(k, CArray):
         self.logger.info("k shape with inputs {:} {:} is: {:}"
                          "".format(a1.shape, a2.shape, k.shape))
         self.assertEqual(k.shape, (CArray(a1).atleast_2d().shape[0],
                                    CArray(a2).atleast_2d().shape[0]))
     else:
         self.assertTrue(is_scalar(k))
コード例 #5
0
    def _test_fun_result(self, fun, x, res_expected):
        """Test if function returns the correct value.

        Parameters
        ----------
        fun : CFunction
        x : CArray
        res_expected : scalar

        """
        self.logger.info("Checking value of {:} @ {:}".format(
            fun.class_type, x))

        res = fun.fun(x)

        self.logger.info("Correct result: {:}".format(res_expected))
        self.logger.info("Function result: {:}".format(res))

        self.assertTrue(is_scalar(res))
        self.assertAlmostEqual(res_expected, res, places=4)
コード例 #6
0
ファイル: test_c_array_init.py プロジェクト: pralab/secml
        def check_init_builtin(totest_elem):

            for tosparse in [False, True]:
                init_array = CArray(totest_elem, tosparse=tosparse)
                self.assertEqual(init_array.issparse, tosparse)

                if is_list_of_lists(totest_elem):
                    if not is_list_of_lists(totest_elem[0]):
                        self.assertEqual(
                            init_array.shape[0], len(totest_elem))
                        self.assertEqual(
                            init_array.shape[1], len(totest_elem[0]))
                    else:  # N-Dimensional input
                        in_shape = init_array.input_shape
                        self.assertEqual(in_shape[0], len(totest_elem))
                        self.assertEqual(in_shape[1], len(totest_elem[0]))
                        self.assertEqual(
                            init_array.shape[0], len(totest_elem))
                        self.assertEqual(
                            init_array.shape[1], sum(in_shape[1:]))

                elif is_list(totest_elem):
                    if init_array.issparse is True:
                        self.assertEqual(
                            init_array.shape[1], len(totest_elem))
                    elif init_array.isdense is True:
                        self.assertTrue(init_array.ndim == 1)
                        self.assertEqual(
                            init_array.shape[0], len(totest_elem))
                    self.assertEqual(
                        init_array.input_shape, (len(totest_elem), ))

                elif is_scalar(totest_elem) or is_bool(totest_elem):
                    self.assertEqual(init_array.size, 1)
                    self.assertEqual(init_array.input_shape, (1, ))

                else:
                    raise TypeError(
                        "test_init_builtin should not be used "
                        "to test {:}".format(type(totest_elem)))
コード例 #7
0
    def _get_best_params(self, res_vect, params, params_matrix, pick='first'):
        """Returns the best parameters given input performance scores.

        The best parameters have the closest associated performance score
        to the metric's best value.

        Parameters
        ----------
        res_vect : CArray
            Array with the performance results associated
            to each parameters combination.
        params : dict
            Dictionary with the parameters to be evaluated.
        params_matrix : CArray
            Indices of each combination of parameters to evaluate.
        pick : {'first', 'last', 'random'}, optional
            Defines which of the best parameters set pick.
            Usually, 'first' (default) correspond to the smallest
            parameters while 'last' correspond to the biggest.
            The order is consistent to the parameters dict passed as input.

        Returns
        -------
        best_params_dict : dict
            Dictionary with the parameters that have obtained
            the best performance score.
        best_value : any
            Performance value associated with the best parameters.

        """
        if not is_scalar(self.metric.best_value):
            raise TypeError(
                "XVal only works with metric with the best value as scalar")

        # Get the index of the results closest to the best value
        diff = abs(res_vect - self.metric.best_value)

        best_params_list = []
        best_score = []
        # Get the best parameters for each binary classifier
        for i in range(res_vect.shape[1]):

            # diff has one row for each parameters combination and
            # one column for each binary classifier
            condidates_idx = diff[:, i].find_2d(diff[:, i] == diff[:,
                                                                   i].min())[0]

            # Get the value of the result closest to the best value
            best_score.append(res_vect[condidates_idx[0], i])

            # Get the index of the corresponding parameters
            best_params_idx = params_matrix[condidates_idx, :]

            # Build the list of candidate parameters for binary clf
            clf_best_params_list = []
            for c_idx in range(best_params_idx.shape[0]):
                # For each candidate get corresponding parameters
                best_params_dict = dict()
                for j, par in enumerate(params):
                    par_idx = best_params_idx[c_idx, j].item()
                    best_params_dict[par] = params[par][par_idx]

                clf_best_params_list.append(best_params_dict)

            # Chose which candidate parameters assign to classifier
            if pick == 'first':  # Usually the smallest
                clf_best_params_dict = clf_best_params_list[0]
            elif pick == 'last':  # Usually the biggest
                clf_best_params_dict = clf_best_params_list[-1]
            elif pick == 'random':
                import random
                clf_best_params_dict = random.choice(clf_best_params_list)
            else:
                raise ValueError("pick strategy '{:}' not known".format(pick))

            best_params_list.append(clf_best_params_dict)

        # For each param, built the tuple of the best value for each binary clf
        best_params_dict = dict()
        for par in params:
            this_param_list = []
            for params_dict in best_params_list:
                this_param_list.append(params_dict[par])
            best_params_dict[par] = tuple(this_param_list)

        return best_params_dict, best_score
        def _check_repeat(array):
            self.logger.info("Array:\n{:}".format(array))

            for axis in (None, 0, 1):

                if axis is None or array.ndim < 2:
                    repeats_add = CArray.randint(2, shape=array.size)
                elif axis == 0:
                    repeats_add = CArray.randint(2, shape=array.shape[0])
                elif axis == 1:
                    repeats_add = CArray.randint(2, shape=array.shape[1])
                else:
                    repeats_add = None

                for repeats in (0, 1, 2, repeats_add):

                    with self.assertRaises(TypeError):
                        array.repeat(repeats=np.array([1, 2]), axis=axis)

                    if axis == 1 and array.ndim < 2:
                        # No columns to repeat
                        with self.assertRaises(ValueError):
                            array.repeat(repeats=repeats, axis=axis)
                        continue

                    res = array.repeat(repeats=repeats, axis=axis)
                    self.logger.info("array.repeat({:}, axis={:}):"
                                     "\n{:}".format(repeats, axis, res))

                    self.assertIsInstance(res, CArray)
                    self.assertEqual(res.isdense, array.isdense)
                    self.assertEqual(res.issparse, array.issparse)
                    self.assertEqual(res.dtype, array.dtype)

                    if axis is None or array.ndim < 2:
                        # A flat array is always returned
                        if is_scalar(repeats):
                            repeats_mul = array.size * repeats
                        else:
                            repeats_mul = repeats.sum()
                        self.assertEqual(res.shape, (repeats_mul, ))
                    elif axis == 0:
                        if is_scalar(repeats):
                            repeats_mul = array.shape[0] * repeats
                        else:
                            repeats_mul = repeats.sum()
                        self.assertEqual(res.shape,
                                         (repeats_mul, array.shape[1]))
                    elif axis == 1:
                        if is_scalar(repeats):
                            repeats_mul = array.shape[1] * repeats
                        else:
                            repeats_mul = repeats.sum()
                        self.assertEqual(res.shape,
                                         (array.shape[0], repeats_mul))

                    if is_scalar(repeats):
                        repeats_size = array.size * repeats
                    else:
                        if axis is None or array.ndim < 2:
                            repeats_size = repeats.sum()
                        elif axis == 0:
                            repeats_size = repeats.sum() * array.shape[1]
                        elif axis == 1:
                            repeats_size = repeats.sum() * array.shape[0]
                        else:
                            repeats_size = None
                    self.assertEqual(res.size, repeats_size)

                    if not is_scalar(repeats):
                        repeats = repeats.tondarray()
                    np_res = array.tondarray().repeat(repeats=repeats,
                                                      axis=axis)
                    self.assertFalse((res.tondarray() != np_res).any())
    def _get_best_params(self, res_vect, params, params_matrix, pick='first'):
        """Returns the best parameters given input performance scores.

        The best parameters have the closest associated performance score
        to the metric's best value.

        Parameters
        ----------
        res_vect : CArray
            Array with the performance results associated
            to each parameters combination.
        params : dict
            Dictionary with the parameters to be evaluated.
        params_matrix : CArray
            Indices of each combination of parameters to evaluate.
        pick : {'first', 'last', 'random'}, optional
            Defines which of the best parameters set pick.
            Usually, 'first' (default) correspond to the smallest
            parameters while 'last' correspond to the biggest.
            The order is consistent to the parameters dict passed as input.

        Returns
        -------
        best_params_dict : dict
            Dictionary with the parameters that have obtained
            the best performance score.
        best_value : any
            Performance value associated with the best parameters.

        """
        if not is_scalar(self.metric.best_value):
            raise TypeError(
                "XVal only works with metric with the best value as scalar")

        # Get the index of the results closest to the best value
        diff = abs(res_vect - self.metric.best_value)
        condidates_idx = diff.find(diff == diff.nanmin())

        if len(condidates_idx) < 1:
            raise ValueError("all metric outputs are equal to Nan!")

        # Get the value of the result closest to the best value
        best_score = res_vect[condidates_idx[0]]

        # Get the index of the corresponding parameters
        best_params_idx = params_matrix[condidates_idx, :]

        # Build the list of candidate parameters
        best_params_list = []
        for c_idx in range(best_params_idx.shape[0]):
            # For each candidate get corresponding parameters
            best_params_dict = dict()
            for j, par in enumerate(params):
                value_idx = best_params_idx[c_idx, j].item()
                best_params_dict[par] = params[par][value_idx]

            best_params_list.append(best_params_dict)

        # Chose which candidate parameters assign to classifier
        if pick == 'first':  # Usually the smallest
            best_params_dict = best_params_list[0]
        elif pick == 'last':  # Usually the biggest
            best_params_dict = best_params_list[-1]
        elif pick == 'random':
            import random
            best_params_dict = random.choice(best_params_list)
        else:
            raise ValueError("pick strategy '{:}' not known".format(pick))

        return best_params_dict, best_score