def _get_param_names(cls): """Get parameter names for the estimator See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html and sklearn/base.py for more information. """ # fetch the constructor or the original constructor before # deprecation wrapping if any init = getattr(cls.__init__, 'deprecated_original', cls.__init__) if init is object.__init__: # No explicit constructor to introspect return [] # introspect the constructor arguments to find the model parameters # to represent init_signature = signature(init) # Consider the constructor parameters excluding 'self' parameters = [p for p in init_signature.parameters.values() if p.name != 'self' and p.kind != p.VAR_KEYWORD] for p in parameters: if p.kind == p.VAR_POSITIONAL: raise RuntimeError("scikit-learn estimators should always " "specify their parameters in the signature" " of their __init__ (no varargs)." " %s with constructor %s doesn't " " follow this convention." % (cls, init_signature)) # Extract and sort argument names excluding 'self' return sorted([p.name for p in parameters])
def check_samplers_pandas(name, Sampler): pd = pytest.importorskip("pandas") # Check that the samplers handle pandas dataframe and pandas series X, y = make_classification(n_samples=1000, n_classes=3, n_informative=4, weights=[0.2, 0.3, 0.5], random_state=0) X_pd, y_pd = pd.DataFrame(X), pd.Series(y) sampler = Sampler() if isinstance(Sampler(), SMOTE): samplers = [ Sampler(random_state=0, kind=kind) for kind in ('regular', 'borderline1', 'borderline2', 'svm') ] elif isinstance(Sampler(), NearMiss): samplers = [Sampler(version=version) for version in (1, 2, 3)] else: sampler_attr = signature(Sampler.__init__).parameters.keys() if 'random_state' in sampler_attr: samplers = [Sampler(random_state=0)] else: samplers = [Sampler()] for sampler in samplers: X_res_pd, y_res_pd = sampler.fit_sample(X_pd, y_pd) X_res, y_res = sampler.fit_sample(X, y) assert_allclose(X_res_pd, X_res) assert_allclose(y_res_pd, y_res)
def _get_param_names(cls): # noinspection PyPep8 """Get parameter names for the estimator See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html and sklearn/base.py for more information. """ # fetch the constructor or the original constructor before # deprecation wrapping if any init = getattr(cls.__init__, 'deprecated_original', cls.__init__) if init is object.__init__: # No explicit constructor to introspect return [] # introspect the constructor arguments to find the model parameters # to represent init_signature = signature(init) # Consider the constructor parameters excluding 'self' parameters = [p for p in init_signature.parameters.values() if p.name != 'self' and p.kind != p.VAR_KEYWORD] for p in parameters: if p.kind == p.VAR_POSITIONAL: raise RuntimeError("scikit-learn estimators should always " "specify their parameters in the signature" " of their __init__ (no varargs)." " %s with constructor %s doesn't " " follow this convention." % (cls, init_signature)) # Extract and sort argument names excluding 'self' return sorted([p.name for p in parameters])
def test_kernel_theta(): # Check that parameter vector theta of kernel is set correctly. for kernel in kernels: if isinstance(kernel, KernelOperator) \ or isinstance(kernel, Exponentiation): # skip non-basic kernels continue theta = kernel.theta _, K_gradient = kernel(X, eval_gradient=True) # Determine kernel parameters that contribute to theta init_sign = signature(kernel.__class__.__init__).parameters.values() args = [p.name for p in init_sign if p.name != 'self'] theta_vars = map(lambda s: s.rstrip("_bounds"), filter(lambda s: s.endswith("_bounds"), args)) assert_equal( set(hyperparameter.name for hyperparameter in kernel.hyperparameters), set(theta_vars)) # Check that values returned in theta are consistent with # hyperparameter values (being their logarithms) for i, hyperparameter in enumerate(kernel.hyperparameters): assert_equal(theta[i], np.log(getattr(kernel, hyperparameter.name))) # Fixed kernel parameters must be excluded from theta and gradient. for i, hyperparameter in enumerate(kernel.hyperparameters): # create copy with certain hyperparameter fixed params = kernel.get_params() params[hyperparameter.name + "_bounds"] = "fixed" kernel_class = kernel.__class__ new_kernel = kernel_class(**params) # Check that theta and K_gradient are identical with the fixed # dimension left out _, K_gradient_new = new_kernel(X, eval_gradient=True) assert_equal(theta.shape[0], new_kernel.theta.shape[0] + 1) assert_equal(K_gradient.shape[2], K_gradient_new.shape[2] + 1) if i > 0: assert_equal(theta[:i], new_kernel.theta[:i]) assert_array_equal(K_gradient[..., :i], K_gradient_new[..., :i]) if i + 1 < len(kernel.hyperparameters): assert_equal(theta[i + 1:], new_kernel.theta[i:]) assert_array_equal(K_gradient[..., i + 1:], K_gradient_new[..., i:]) # Check that values of theta are modified correctly for i, hyperparameter in enumerate(kernel.hyperparameters): theta[i] = np.log(42) kernel.theta = theta assert_almost_equal(getattr(kernel, hyperparameter.name), 42) setattr(kernel, hyperparameter.name, 43) assert_almost_equal(kernel.theta[i], np.log(43))
def test_kernel_theta(): # Check that parameter vector theta of kernel is set correctly. for kernel in kernels: if isinstance(kernel, KernelOperator) \ or isinstance(kernel, Exponentiation): # skip non-basic kernels continue theta = kernel.theta _, K_gradient = kernel(X, eval_gradient=True) # Determine kernel parameters that contribute to theta init_sign = signature(kernel.__class__.__init__).parameters.values() args = [p.name for p in init_sign if p.name != 'self'] theta_vars = map(lambda s: s[0:-len("_bounds")], filter(lambda s: s.endswith("_bounds"), args)) assert_equal( set(hyperparameter.name for hyperparameter in kernel.hyperparameters), set(theta_vars)) # Check that values returned in theta are consistent with # hyperparameter values (being their logarithms) for i, hyperparameter in enumerate(kernel.hyperparameters): assert_equal(theta[i], np.log(getattr(kernel, hyperparameter.name))) # Fixed kernel parameters must be excluded from theta and gradient. for i, hyperparameter in enumerate(kernel.hyperparameters): # create copy with certain hyperparameter fixed params = kernel.get_params() params[hyperparameter.name + "_bounds"] = "fixed" kernel_class = kernel.__class__ new_kernel = kernel_class(**params) # Check that theta and K_gradient are identical with the fixed # dimension left out _, K_gradient_new = new_kernel(X, eval_gradient=True) assert_equal(theta.shape[0], new_kernel.theta.shape[0] + 1) assert_equal(K_gradient.shape[2], K_gradient_new.shape[2] + 1) if i > 0: assert_equal(theta[:i], new_kernel.theta[:i]) assert_array_equal(K_gradient[..., :i], K_gradient_new[..., :i]) if i + 1 < len(kernel.hyperparameters): assert_equal(theta[i + 1:], new_kernel.theta[i:]) assert_array_equal(K_gradient[..., i + 1:], K_gradient_new[..., i:]) # Check that values of theta are modified correctly for i, hyperparameter in enumerate(kernel.hyperparameters): theta[i] = np.log(42) kernel.theta = theta assert_almost_equal(getattr(kernel, hyperparameter.name), 42) setattr(kernel, hyperparameter.name, 43) assert_almost_equal(kernel.theta[i], np.log(43))
def precision_recall_plot(): # In matplotlib < 1.5, plt.fill_between does not have a 'step' argument step_kwargs = ({'step': 'post'} if 'step' in signature(plt.fill_between).parameters else {}) plt.step(recall, precision, color='b', alpha=0.2, where='post') plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(average_precision)) plt.show()
def _get_args(function, varargs=False): """Helper to get function arguments""" try: params = signature(function).parameters except ValueError: # Error on builtin C function return [] args = [key for key, param in params.items() if param.kind not in (param.VAR_POSITIONAL, param.VAR_KEYWORD)] if varargs: varargs = [param.name for param in params.values() if param.kind == param.VAR_POSITIONAL] if len(varargs) == 0: varargs = None return args, varargs else: return args
def _get_args(function, varargs=False): """Helper to get function arguments""" try: params = signature(function).parameters except ValueError: # Error on builtin C function return [] args = [key for key, param in params.items() if param.kind not in (param.VAR_POSITIONAL, param.VAR_KEYWORD)] if varargs: varargs = [param.name for param in params.values() if param.kind == param.VAR_POSITIONAL] if len(varargs) == 0: varargs = None return args, varargs else: return args
def check_samplers_sparse(name, Sampler): # check that sparse matrices can be passed through the sampler leading to # the same results than dense X, y = make_classification(n_samples=1000, n_classes=3, n_informative=4, weights=[0.2, 0.3, 0.5], random_state=0) X_sparse = sparse.csr_matrix(X) if isinstance(Sampler(), SMOTE): samplers = [ Sampler(random_state=0, kind=kind) for kind in ('regular', 'borderline1', 'borderline2', 'svm') ] elif isinstance(Sampler(), NearMiss): samplers = [Sampler(version=version) for version in (1, 2, 3)] elif isinstance(Sampler(), ClusterCentroids): # set KMeans to full since it support sparse and dense samplers = [ Sampler(random_state=0, voting='soft', estimator=KMeans(random_state=1, algorithm='full')) ] else: sampler_attr = signature(Sampler.__init__).parameters.keys() if 'random_state' in sampler_attr: samplers = [Sampler(random_state=0)] else: samplers = [Sampler()] for sampler in samplers: X_res_sparse, y_res_sparse = sampler.fit_sample(X_sparse, y) X_res, y_res = sampler.fit_sample(X, y) if not isinstance(sampler, BaseEnsembleSampler): assert sparse.issparse(X_res_sparse) assert_allclose(X_res_sparse.A, X_res) assert_allclose(y_res_sparse, y_res) else: for x_sp, x, y_sp, y in zip(X_res_sparse, X_res, y_res_sparse, y_res): assert sparse.issparse(x_sp) assert_allclose(x_sp.A, x) assert_allclose(y_sp, y)
def get_params(self, deep=True): """Get parameters of this kernel. Parameters ---------- deep : boolean, optional If True, will return the parameters for this estimator and contained subobjects that are estimators. Returns ------- params : mapping of string to any Parameter names mapped to their values. """ params = dict() # introspect the constructor arguments to find the model parameters # to represent cls = self.__class__ init = getattr(cls.__init__, 'deprecated_original', cls.__init__) init_sign = signature(init) args, varargs = [], [] for parameter in init_sign.parameters.values(): if (parameter.kind != parameter.VAR_KEYWORD and parameter.name != 'self'): args.append(parameter.name) if parameter.kind == parameter.VAR_POSITIONAL: varargs.append(parameter.name) if len(varargs) != 0: raise RuntimeError("scikit-learn kernels should always " "specify their parameters in the signature" " of their __init__ (no varargs)." " %s doesn't follow this convention." % (cls, )) for arg in args: params[arg] = getattr(self, arg, None) return params
print('Average precision-recall score: {0:0.2f}'.format( average_precision)) ############################################################################### # Plot the Precision-Recall curve # ................................ from sklearn.metrics import precision_recall_curve import matplotlib.pyplot as plt from sklearn.externals.funcsigs import signature precision, recall, _ = precision_recall_curve(y_test, y_score) # In matplotlib < 1.5, plt.fill_between does not have a 'step' argument step_kwargs = ({'step': 'post'} if 'step' in signature(plt.fill_between).parameters else {}) plt.step(recall, precision, color='b', alpha=0.2, where='post') plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format( average_precision)) ############################################################################### # In multi-label settings # ------------------------
def plot_curve(Y, f, c): precision = dict() recall = dict() average_precision = dict() n_classes = len(c) for i in range(n_classes): precision[i], recall[i], _ = precision_recall_curve(Y[:, i], f[:, i]) average_precision[i] = average_precision_score(Y[:, i], f[:, i]) precision["micro"], recall["micro"], _ = precision_recall_curve( Y.ravel(), f.ravel()) average_precision["micro"] = average_precision_score(Y, f, average="micro") print('Average precision score, micro-averaged over all classes: {0:0.2f}'. format(average_precision["micro"])) # Plot average precision_recall_curve plt.figure() # In matplotlib < 1.5, plt.fill_between does not have a 'step' argument step_kwargs = ({ 'step': 'post' } if 'step' in signature(plt.fill_between).parameters else {}) plt.step(recall['micro'], precision['micro'], color='r', alpha=0.2, where='post') plt.fill_between(recall["micro"], precision["micro"], alpha=0.2, color='b', **step_kwargs) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title( 'Average precision score, micro-averaged over all Gender tags: AP={0:0.2f}' .format(average_precision["micro"])) # plot precision-recall curve for each class and iso-f1 curves # setup plot details colors = cycle( ['navY', 'turquoise', 'darkorange', 'cornflowerblue', 'teal']) plt.figure(figsize=(7, 8)) f_scores = np.linspace(0.2, 0.8, num=4) lines = [] labels = [] for f_score in f_scores: x = np.linspace(0.01, 1) y = f_score * x / (2 * x - f_score) l, = plt.plot(x[y >= 0], y[y >= 0], color='gray', alpha=0.2) plt.annotate('f1={0:0.1f}'.format(f_score), xy=(0.9, y[45] + 0.02)) lines.append(l) labels.append('iso-f1 curves') l, = plt.plot(recall["micro"], precision["micro"], color='gold', lw=2) lines.append(l) labels.append('micro-average Precision-recall (area = {0:0.2f})' ''.format(average_precision["micro"])) for i, color in zip(range(n_classes), colors): l, = plt.plot(recall[i], precision[i], color=color, lw=2) lines.append(l) labels.append('Precision-recall for class {0} (area = {1:0.2f})' ''.format(i, average_precision[i])) fig = plt.gcf() fig.subplots_adjust(bottom=0.25) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Extension of Precision-Recall curve to multi-class') plt.legend(lines, labels, loc=(0, -.38), prop=dict(size=14)) plt.show()