Beispiel #1
0
def score_with_sample_weight(estimator, X, y=None, sample_weight=None):
    if sklearn_version() < '0.19':
        if isinstance(estimator, Pipeline) and sample_weight is not None:
            estimator = _PipelinePatched(estimator.steps)
    if sample_weight is None:
        return estimator.score(X, y)
    return estimator.score(X, y, sample_weight=sample_weight)
def test_invertable_hashing_vectorizer(always_signed, binary, alternate_sign):
    n_features = 8
    n_words = 4 * n_features
    kwargs = dict(n_features=n_features, binary=binary)
    if sklearn_version() < '0.19':
        kwargs['non_negative'] = not alternate_sign
    else:
        kwargs['alternate_sign'] = alternate_sign
    vec = HashingVectorizer(**kwargs)
    words = ['word_{}'.format(i) for i in range(n_words)]
    corpus = [w for i, word in enumerate(words, 1) for w in repeat(word, i)]
    split = len(corpus) // 2
    doc1, doc2 = ' '.join(corpus[:split]), ' '.join(corpus[split:])

    ivec = InvertableHashingVectorizer(vec)
    ivec.fit([doc1, doc2])
    check_feature_names(vec, ivec, always_signed, corpus, alternate_sign)

    ivec = InvertableHashingVectorizer(vec)
    ivec.partial_fit([doc1])
    ivec.partial_fit([doc2])
    check_feature_names(vec, ivec, always_signed, corpus, alternate_sign)

    ivec = InvertableHashingVectorizer(vec)
    for w in corpus:
        ivec.partial_fit([w])
    check_feature_names(vec, ivec, always_signed, corpus, alternate_sign)
Beispiel #3
0
 def _default_clf(self):
     kwargs = dict(loss='log',
                   penalty='elasticnet',
                   alpha=1e-3,
                   random_state=self.rng_)
     if sklearn_version() >= '0.19':
         kwargs['tol'] = 1e-3
     return SGDClassifier(**kwargs)
Beispiel #4
0
import inspect
import json
from pprint import pprint

from hypothesis.strategies import integers
from hypothesis.extra.numpy import arrays
import numpy as np

from eli5.base import Explanation
from eli5.formatters import format_as_text, format_as_html, format_as_dict
from eli5.formatters.html import html_escape
from eli5.formatters.text import format_signed
from eli5.sklearn.utils import sklearn_version

SGD_KWARGS = {'random_state': 42}
if sklearn_version() >= '0.19':
    SGD_KWARGS['tol'] = 1e-3


def rnd_len_arrays(dtype, min_len=0, max_len=3, elements=None):
    """ Generate numpy arrays of random length """
    lengths = integers(min_value=min_len, max_value=max_len)
    return lengths.flatmap(lambda n: arrays(dtype, n, elements=elements))


def format_as_all(res, clf, **kwargs):
    """ Format explanation as text and html, check JSON-encoding,
    print text explanation, save html, return text and html.
    """
    expl_dict = format_as_dict(res)
    pprint(expl_dict)