예제 #1
0
    def _get_param_names(cls):
        """Get parameter names for the estimator

        See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html
        and sklearn/base.py for more information.
        """

        # fetch the constructor or the original constructor before
        # deprecation wrapping if any
        init = getattr(cls.__init__, 'deprecated_original', cls.__init__)
        if init is object.__init__:
            # No explicit constructor to introspect
            return []

        # introspect the constructor arguments to find the model parameters
        # to represent
        init_signature = signature(init)
        # Consider the constructor parameters excluding 'self'
        parameters = [p for p in init_signature.parameters.values()
                      if p.name != 'self' and p.kind != p.VAR_KEYWORD]
        for p in parameters:
            if p.kind == p.VAR_POSITIONAL:
                raise RuntimeError("scikit-learn estimators should always "
                                   "specify their parameters in the signature"
                                   " of their __init__ (no varargs)."
                                   " %s with constructor %s doesn't "
                                   " follow this convention."
                                   % (cls, init_signature))
        # Extract and sort argument names excluding 'self'
        return sorted([p.name for p in parameters])
예제 #2
0
def check_samplers_pandas(name, Sampler):
    pd = pytest.importorskip("pandas")
    # Check that the samplers handle pandas dataframe and pandas series
    X, y = make_classification(n_samples=1000,
                               n_classes=3,
                               n_informative=4,
                               weights=[0.2, 0.3, 0.5],
                               random_state=0)
    X_pd, y_pd = pd.DataFrame(X), pd.Series(y)
    sampler = Sampler()
    if isinstance(Sampler(), SMOTE):
        samplers = [
            Sampler(random_state=0, kind=kind)
            for kind in ('regular', 'borderline1', 'borderline2', 'svm')
        ]
    elif isinstance(Sampler(), NearMiss):
        samplers = [Sampler(version=version) for version in (1, 2, 3)]
    else:
        sampler_attr = signature(Sampler.__init__).parameters.keys()
        if 'random_state' in sampler_attr:
            samplers = [Sampler(random_state=0)]
        else:
            samplers = [Sampler()]
    for sampler in samplers:
        X_res_pd, y_res_pd = sampler.fit_sample(X_pd, y_pd)
        X_res, y_res = sampler.fit_sample(X, y)
        assert_allclose(X_res_pd, X_res)
        assert_allclose(y_res_pd, y_res)
예제 #3
0
파일: base.py 프로젝트: flaviassantos/pyod
    def _get_param_names(cls):
        # noinspection PyPep8
        """Get parameter names for the estimator

        See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html
        and sklearn/base.py for more information.
        """

        # fetch the constructor or the original constructor before
        # deprecation wrapping if any
        init = getattr(cls.__init__, 'deprecated_original', cls.__init__)
        if init is object.__init__:
            # No explicit constructor to introspect
            return []

        # introspect the constructor arguments to find the model parameters
        # to represent
        init_signature = signature(init)
        # Consider the constructor parameters excluding 'self'
        parameters = [p for p in init_signature.parameters.values()
                      if p.name != 'self' and p.kind != p.VAR_KEYWORD]
        for p in parameters:
            if p.kind == p.VAR_POSITIONAL:
                raise RuntimeError("scikit-learn estimators should always "
                                   "specify their parameters in the signature"
                                   " of their __init__ (no varargs)."
                                   " %s with constructor %s doesn't "
                                   " follow this convention."
                                   % (cls, init_signature))
        # Extract and sort argument names excluding 'self'
        return sorted([p.name for p in parameters])
예제 #4
0
def test_kernel_theta():
    # Check that parameter vector theta of kernel is set correctly.
    for kernel in kernels:
        if isinstance(kernel, KernelOperator) \
           or isinstance(kernel, Exponentiation):  # skip non-basic kernels
            continue
        theta = kernel.theta
        _, K_gradient = kernel(X, eval_gradient=True)

        # Determine kernel parameters that contribute to theta
        init_sign = signature(kernel.__class__.__init__).parameters.values()
        args = [p.name for p in init_sign if p.name != 'self']
        theta_vars = map(lambda s: s.rstrip("_bounds"),
                         filter(lambda s: s.endswith("_bounds"), args))
        assert_equal(
            set(hyperparameter.name
                for hyperparameter in kernel.hyperparameters),
            set(theta_vars))

        # Check that values returned in theta are consistent with
        # hyperparameter values (being their logarithms)
        for i, hyperparameter in enumerate(kernel.hyperparameters):
            assert_equal(theta[i],
                         np.log(getattr(kernel, hyperparameter.name)))

        # Fixed kernel parameters must be excluded from theta and gradient.
        for i, hyperparameter in enumerate(kernel.hyperparameters):
            # create copy with certain hyperparameter fixed
            params = kernel.get_params()
            params[hyperparameter.name + "_bounds"] = "fixed"
            kernel_class = kernel.__class__
            new_kernel = kernel_class(**params)
            # Check that theta and K_gradient are identical with the fixed
            # dimension left out
            _, K_gradient_new = new_kernel(X, eval_gradient=True)
            assert_equal(theta.shape[0], new_kernel.theta.shape[0] + 1)
            assert_equal(K_gradient.shape[2], K_gradient_new.shape[2] + 1)
            if i > 0:
                assert_equal(theta[:i], new_kernel.theta[:i])
                assert_array_equal(K_gradient[..., :i],
                                   K_gradient_new[..., :i])
            if i + 1 < len(kernel.hyperparameters):
                assert_equal(theta[i + 1:], new_kernel.theta[i:])
                assert_array_equal(K_gradient[..., i + 1:],
                                   K_gradient_new[..., i:])

        # Check that values of theta are modified correctly
        for i, hyperparameter in enumerate(kernel.hyperparameters):
            theta[i] = np.log(42)
            kernel.theta = theta
            assert_almost_equal(getattr(kernel, hyperparameter.name), 42)

            setattr(kernel, hyperparameter.name, 43)
            assert_almost_equal(kernel.theta[i], np.log(43))
def test_kernel_theta():
    # Check that parameter vector theta of kernel is set correctly.
    for kernel in kernels:
        if isinstance(kernel, KernelOperator) \
           or isinstance(kernel, Exponentiation):  # skip non-basic kernels
            continue
        theta = kernel.theta
        _, K_gradient = kernel(X, eval_gradient=True)

        # Determine kernel parameters that contribute to theta
        init_sign = signature(kernel.__class__.__init__).parameters.values()
        args = [p.name for p in init_sign if p.name != 'self']
        theta_vars = map(lambda s: s[0:-len("_bounds")],
                         filter(lambda s: s.endswith("_bounds"), args))
        assert_equal(
            set(hyperparameter.name
                for hyperparameter in kernel.hyperparameters),
            set(theta_vars))

        # Check that values returned in theta are consistent with
        # hyperparameter values (being their logarithms)
        for i, hyperparameter in enumerate(kernel.hyperparameters):
            assert_equal(theta[i],
                         np.log(getattr(kernel, hyperparameter.name)))

        # Fixed kernel parameters must be excluded from theta and gradient.
        for i, hyperparameter in enumerate(kernel.hyperparameters):
            # create copy with certain hyperparameter fixed
            params = kernel.get_params()
            params[hyperparameter.name + "_bounds"] = "fixed"
            kernel_class = kernel.__class__
            new_kernel = kernel_class(**params)
            # Check that theta and K_gradient are identical with the fixed
            # dimension left out
            _, K_gradient_new = new_kernel(X, eval_gradient=True)
            assert_equal(theta.shape[0], new_kernel.theta.shape[0] + 1)
            assert_equal(K_gradient.shape[2], K_gradient_new.shape[2] + 1)
            if i > 0:
                assert_equal(theta[:i], new_kernel.theta[:i])
                assert_array_equal(K_gradient[..., :i],
                                   K_gradient_new[..., :i])
            if i + 1 < len(kernel.hyperparameters):
                assert_equal(theta[i + 1:], new_kernel.theta[i:])
                assert_array_equal(K_gradient[..., i + 1:],
                                   K_gradient_new[..., i:])

        # Check that values of theta are modified correctly
        for i, hyperparameter in enumerate(kernel.hyperparameters):
            theta[i] = np.log(42)
            kernel.theta = theta
            assert_almost_equal(getattr(kernel, hyperparameter.name), 42)

            setattr(kernel, hyperparameter.name, 43)
            assert_almost_equal(kernel.theta[i], np.log(43))
예제 #6
0
def precision_recall_plot():
    # In matplotlib < 1.5, plt.fill_between does not have a 'step' argument
    step_kwargs = ({'step': 'post'}
                   if 'step' in signature(plt.fill_between).parameters
                   else {})
    plt.step(recall, precision, color='b', alpha=0.2,
             where='post')
    plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs)

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(average_precision))
    plt.show()
예제 #7
0
def _get_args(function, varargs=False):
    """Helper to get function arguments"""

    try:
        params = signature(function).parameters
    except ValueError:
        # Error on builtin C function
        return []
    args = [key for key, param in params.items()
            if param.kind not in (param.VAR_POSITIONAL, param.VAR_KEYWORD)]
    if varargs:
        varargs = [param.name for param in params.values()
                   if param.kind == param.VAR_POSITIONAL]
        if len(varargs) == 0:
            varargs = None
        return args, varargs
    else:
        return args
예제 #8
0
def _get_args(function, varargs=False):
    """Helper to get function arguments"""

    try:
        params = signature(function).parameters
    except ValueError:
        # Error on builtin C function
        return []
    args = [key for key, param in params.items()
            if param.kind not in (param.VAR_POSITIONAL, param.VAR_KEYWORD)]
    if varargs:
        varargs = [param.name for param in params.values()
                   if param.kind == param.VAR_POSITIONAL]
        if len(varargs) == 0:
            varargs = None
        return args, varargs
    else:
        return args
예제 #9
0
def check_samplers_sparse(name, Sampler):
    # check that sparse matrices can be passed through the sampler leading to
    # the same results than dense
    X, y = make_classification(n_samples=1000,
                               n_classes=3,
                               n_informative=4,
                               weights=[0.2, 0.3, 0.5],
                               random_state=0)
    X_sparse = sparse.csr_matrix(X)
    if isinstance(Sampler(), SMOTE):
        samplers = [
            Sampler(random_state=0, kind=kind)
            for kind in ('regular', 'borderline1', 'borderline2', 'svm')
        ]
    elif isinstance(Sampler(), NearMiss):
        samplers = [Sampler(version=version) for version in (1, 2, 3)]
    elif isinstance(Sampler(), ClusterCentroids):
        # set KMeans to full since it support sparse and dense
        samplers = [
            Sampler(random_state=0,
                    voting='soft',
                    estimator=KMeans(random_state=1, algorithm='full'))
        ]
    else:
        sampler_attr = signature(Sampler.__init__).parameters.keys()
        if 'random_state' in sampler_attr:
            samplers = [Sampler(random_state=0)]
        else:
            samplers = [Sampler()]
    for sampler in samplers:
        X_res_sparse, y_res_sparse = sampler.fit_sample(X_sparse, y)
        X_res, y_res = sampler.fit_sample(X, y)
        if not isinstance(sampler, BaseEnsembleSampler):
            assert sparse.issparse(X_res_sparse)
            assert_allclose(X_res_sparse.A, X_res)
            assert_allclose(y_res_sparse, y_res)
        else:
            for x_sp, x, y_sp, y in zip(X_res_sparse, X_res, y_res_sparse,
                                        y_res):
                assert sparse.issparse(x_sp)
                assert_allclose(x_sp.A, x)
                assert_allclose(y_sp, y)
예제 #10
0
    def get_params(self, deep=True):
        """Get parameters of this kernel.

        Parameters
        ----------
        deep : boolean, optional
            If True, will return the parameters for this estimator and
            contained subobjects that are estimators.

        Returns
        -------
        params : mapping of string to any
            Parameter names mapped to their values.
        """
        params = dict()

        # introspect the constructor arguments to find the model parameters
        # to represent
        cls = self.__class__
        init = getattr(cls.__init__, 'deprecated_original', cls.__init__)
        init_sign = signature(init)
        args, varargs = [], []
        for parameter in init_sign.parameters.values():
            if (parameter.kind != parameter.VAR_KEYWORD
                    and parameter.name != 'self'):
                args.append(parameter.name)
            if parameter.kind == parameter.VAR_POSITIONAL:
                varargs.append(parameter.name)

        if len(varargs) != 0:
            raise RuntimeError("scikit-learn kernels should always "
                               "specify their parameters in the signature"
                               " of their __init__ (no varargs)."
                               " %s doesn't follow this convention." % (cls, ))
        for arg in args:
            params[arg] = getattr(self, arg, None)
        return params
print('Average precision-recall score: {0:0.2f}'.format(
      average_precision))

###############################################################################
# Plot the Precision-Recall curve
# ................................
from sklearn.metrics import precision_recall_curve
import matplotlib.pyplot as plt
from sklearn.externals.funcsigs import signature

precision, recall, _ = precision_recall_curve(y_test, y_score)

# In matplotlib < 1.5, plt.fill_between does not have a 'step' argument
step_kwargs = ({'step': 'post'}
               if 'step' in signature(plt.fill_between).parameters
               else {})
plt.step(recall, precision, color='b', alpha=0.2,
         where='post')
plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs)

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(
          average_precision))

###############################################################################
# In multi-label settings
# ------------------------
예제 #12
0
def plot_curve(Y, f, c):
    precision = dict()
    recall = dict()
    average_precision = dict()

    n_classes = len(c)

    for i in range(n_classes):
        precision[i], recall[i], _ = precision_recall_curve(Y[:, i], f[:, i])
        average_precision[i] = average_precision_score(Y[:, i], f[:, i])

    precision["micro"], recall["micro"], _ = precision_recall_curve(
        Y.ravel(), f.ravel())
    average_precision["micro"] = average_precision_score(Y, f, average="micro")
    print('Average precision score, micro-averaged over all classes: {0:0.2f}'.
          format(average_precision["micro"]))

    # Plot average precision_recall_curve
    plt.figure()
    # In matplotlib < 1.5, plt.fill_between does not have a 'step' argument
    step_kwargs = ({
        'step': 'post'
    } if 'step' in signature(plt.fill_between).parameters else {})
    plt.step(recall['micro'],
             precision['micro'],
             color='r',
             alpha=0.2,
             where='post')
    plt.fill_between(recall["micro"],
                     precision["micro"],
                     alpha=0.2,
                     color='b',
                     **step_kwargs)

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title(
        'Average precision score, micro-averaged over all Gender tags: AP={0:0.2f}'
        .format(average_precision["micro"]))

    # plot precision-recall curve for each class and iso-f1 curves
    # setup plot details
    colors = cycle(
        ['navY', 'turquoise', 'darkorange', 'cornflowerblue', 'teal'])

    plt.figure(figsize=(7, 8))
    f_scores = np.linspace(0.2, 0.8, num=4)
    lines = []
    labels = []

    for f_score in f_scores:
        x = np.linspace(0.01, 1)
        y = f_score * x / (2 * x - f_score)
        l, = plt.plot(x[y >= 0], y[y >= 0], color='gray', alpha=0.2)
        plt.annotate('f1={0:0.1f}'.format(f_score), xy=(0.9, y[45] + 0.02))

    lines.append(l)
    labels.append('iso-f1 curves')
    l, = plt.plot(recall["micro"], precision["micro"], color='gold', lw=2)
    lines.append(l)
    labels.append('micro-average Precision-recall (area = {0:0.2f})'
                  ''.format(average_precision["micro"]))

    for i, color in zip(range(n_classes), colors):
        l, = plt.plot(recall[i], precision[i], color=color, lw=2)
        lines.append(l)
        labels.append('Precision-recall for class {0} (area = {1:0.2f})'
                      ''.format(i, average_precision[i]))

    fig = plt.gcf()
    fig.subplots_adjust(bottom=0.25)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Extension of Precision-Recall curve to multi-class')
    plt.legend(lines, labels, loc=(0, -.38), prop=dict(size=14))

    plt.show()