Exemple #1
0
def test_tm_njobs():
    X, y = get_data()
    evo = EvoMSA(tm_n_jobs=2,
                 n_jobs=1,
                 TH=True,
                 lang="es",
                 stacked_method="sklearn.svm.LinearSVC").fit(X, y)
    evo.predict(X)
    assert evo.n_jobs == 1
    assert evo.tm_n_jobs == 2
Exemple #2
0
def test_cache():
    import hashlib

    def func(data, output):
        from b4msa.textmodel import TextModel
        from microtc.utils import tweet_iterator, save_model

        tm = TextModel().fit(list(tweet_iterator(data)))
        save_model(tm, output)

    with StoreDelete(func, TWEETS, "textmodel_cache.tm") as sd:
        cache = os.path.join("tm", "train.json")
        evo = EvoMSA(models=[[sd._output, "sklearn.svm.LinearSVC"]],
                     cache=cache)
        assert os.path.isdir("tm")
        output = hashlib.md5(sd._output.encode()).hexdigest()
        output = cache + "-%s" % output
        print(evo.cache.textModels)
        assert evo.cache.textModels[1] == output
        X, y = get_data()
        evo.first_stage(X, y)
        assert os.path.isfile(output)
        ML = list(evo.cache.ml_train())
        ML_K = list(evo.cache.ml_kfold())
        evo = EvoMSA(models=[[sd._output, "sklearn.svm.LinearSVC"]],
                     stacked_method_args=dict(popsize=10,
                                              early_stopping_rounds=10,
                                              n_estimators=3),
                     cache=cache).fit(X, y, test_set=X[:30])
        hy = evo.predict(X[:10])
        print(len(hy), hy)
        assert len(hy) == 10
        for k in ML:
            print(k)
            assert os.path.isfile(k)
        for k in ML_K:
            print(k)
            assert os.path.isfile(k)
        cache = os.path.join("tm", "test")
        evo.predict(X, cache=cache)
        output = cache + '-' + output.split("-")[1]
        print(output)
        assert os.path.isfile(output)
Exemple #3
0
def test_EvoMSA_evodag_class():
    from sklearn.neighbors import NearestCentroid
    import numpy as np
    X, y = get_data()
    model = EvoMSA(models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernoulli']],
                   stacked_method="sklearn.neighbors.NearestCentroid",
                   TR=False,
                   n_jobs=2).fit(X, y)
    assert isinstance(model._evodag_model, NearestCentroid)
    cl = model.predict(X)
    hy = model.predict_proba(X)
    cl2 = model._le.inverse_transform(hy.argmax(axis=1))
    print(cl, cl2)
    assert np.all(cl == cl2)
Exemple #4
0
def test_EvoMSA_predict():
    import numpy as np
    X, y = get_data()
    evo = EvoMSA(stacked_method_args=dict(popsize=10,
                                          early_stopping_rounds=10,
                                          time_limit=15,
                                          n_estimators=10),
                 models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernoulli']],
                 n_jobs=1).fit(X, y)
    hy = evo.predict(X)
    assert len(hy) == 1000
    print((np.array(y) == hy).mean(), hy)
    print(evo.predict_proba(X))
    assert (np.array(y) == hy).mean() > 0.8
Exemple #5
0
def test_binary_labels_json():
    import json
    X, y = get_data()
    h = dict(NONE=0, N=0, NEU=0, P=1)
    y = [h[x] for x in y]
    evo = EvoMSA(evodag_args=dict(popsize=10,
                                  early_stopping_rounds=10,
                                  time_limit=5,
                                  n_estimators=5),
                 n_jobs=2).fit(X, y)
    hy = evo.predict(X)
    for x in hy:
        print(type(x), str(x))
        _ = json.dumps(dict(klass=str(x)))
    print(_)
Exemple #6
0
def test_EvoMSA_predict():
    import numpy as np
    X, y = get_data()
    evo = EvoMSA(
        evodag_args=dict(popsize=10,
                         early_stopping_rounds=10,
                         time_limit=15,
                         n_estimators=10),
        models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernulli']],
        n_jobs=1).fit([X, [x for x, y0 in zip(X, y) if y0 in ['P', 'N']]],
                      [y, [x for x in y if x in ['P', 'N']]])
    hy = evo.predict(X)
    assert len(hy) == 1000
    print((np.array(y) == hy).mean(), hy)
    print(evo.predict_proba(X))
    assert (np.array(y) == hy).mean() > 0.8
Exemple #7
0
def test_EvoMSA_identity():
    from EvoMSA.model import Identity
    import numpy as np
    X, y = get_data()
    model = EvoMSA(evodag_args=dict(popsize=10,
                                    early_stopping_rounds=10,
                                    n_estimators=3),
                   models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernulli']],
                   TR=False,
                   evodag_class="EvoMSA.model.Identity",
                   n_jobs=2).fit(X, y)
    assert isinstance(model._evodag_model, Identity)
    cl = model.predict(X)
    hy = model.predict_proba(X)
    cl2 = model._le.inverse_transform(hy.argmax(axis=1))
    print(cl, cl2)
    assert np.all(cl == cl2)
Exemple #8
0
def test_EvoMSA_evodag_class():
    from sklearn.neighbors import NearestCentroid
    import numpy as np
    X, y = get_data()
    model = EvoMSA(evodag_args=dict(popsize=10,
                                    early_stopping_rounds=10,
                                    n_estimators=3),
                   models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernulli']],
                   evodag_class="sklearn.neighbors.NearestCentroid",
                   TR=False,
                   n_jobs=2).fit(X, y)
    assert isinstance(model._evodag_model, NearestCentroid)
    cl = model.predict(X)
    hy = model.predict_proba(X)
    cl2 = model._le.inverse_transform(hy.argmax(axis=1))
    print(cl, cl2)
    assert np.all(cl == cl2)
Exemple #9
0
def test_EvoMSA_regression():
    from EvoMSA.base import LabelEncoderWrapper
    from EvoMSA.utils import download
    X, y = get_data()
    X = [dict(text=x) for x in X]
    l = LabelEncoderWrapper().fit(y)
    y = l.transform(y) - 1.5
    evo = EvoMSA(stacked_method_args=dict(popsize=10,
                                          early_stopping_rounds=10,
                                          time_limit=5,
                                          n_estimators=2),
                 classifier=False,
                 models=[[download("emo_Es.tm"), 'EvoMSA.model.Identity']],
                 TR=False,
                 n_jobs=1).fit(X, y)
    assert evo
    df = evo.decision_function(X)
    print(df.shape, df.ndim)
    assert df.shape[0] == len(X) and df.ndim == 1
    df = evo.predict(X)
    assert df.shape[0] == len(X) and df.ndim == 1