def score_with_sample_weight(estimator, X, y=None, sample_weight=None): if sklearn_version() < '0.19': if isinstance(estimator, Pipeline) and sample_weight is not None: estimator = _PipelinePatched(estimator.steps) if sample_weight is None: return estimator.score(X, y) return estimator.score(X, y, sample_weight=sample_weight)
def test_invertable_hashing_vectorizer(always_signed, binary, alternate_sign): n_features = 8 n_words = 4 * n_features kwargs = dict(n_features=n_features, binary=binary) if sklearn_version() < '0.19': kwargs['non_negative'] = not alternate_sign else: kwargs['alternate_sign'] = alternate_sign vec = HashingVectorizer(**kwargs) words = ['word_{}'.format(i) for i in range(n_words)] corpus = [w for i, word in enumerate(words, 1) for w in repeat(word, i)] split = len(corpus) // 2 doc1, doc2 = ' '.join(corpus[:split]), ' '.join(corpus[split:]) ivec = InvertableHashingVectorizer(vec) ivec.fit([doc1, doc2]) check_feature_names(vec, ivec, always_signed, corpus, alternate_sign) ivec = InvertableHashingVectorizer(vec) ivec.partial_fit([doc1]) ivec.partial_fit([doc2]) check_feature_names(vec, ivec, always_signed, corpus, alternate_sign) ivec = InvertableHashingVectorizer(vec) for w in corpus: ivec.partial_fit([w]) check_feature_names(vec, ivec, always_signed, corpus, alternate_sign)
def _default_clf(self): kwargs = dict(loss='log', penalty='elasticnet', alpha=1e-3, random_state=self.rng_) if sklearn_version() >= '0.19': kwargs['tol'] = 1e-3 return SGDClassifier(**kwargs)
import inspect import json from pprint import pprint from hypothesis.strategies import integers from hypothesis.extra.numpy import arrays import numpy as np from eli5.base import Explanation from eli5.formatters import format_as_text, format_as_html, format_as_dict from eli5.formatters.html import html_escape from eli5.formatters.text import format_signed from eli5.sklearn.utils import sklearn_version SGD_KWARGS = {'random_state': 42} if sklearn_version() >= '0.19': SGD_KWARGS['tol'] = 1e-3 def rnd_len_arrays(dtype, min_len=0, max_len=3, elements=None): """ Generate numpy arrays of random length """ lengths = integers(min_value=min_len, max_value=max_len) return lengths.flatmap(lambda n: arrays(dtype, n, elements=elements)) def format_as_all(res, clf, **kwargs): """ Format explanation as text and html, check JSON-encoding, print text explanation, save html, return text and html. """ expl_dict = format_as_dict(res) pprint(expl_dict)