Ejemplo n.º 1
0
def test_TextModel():
    from b4msa.textmodel import TextModel
    X, y = get_data()
    evo = EvoMSA()
    evo.model(X)
    assert isinstance(evo._textModel, list)
    assert isinstance(evo._textModel[0], TextModel)
    assert len(evo._textModel) == 1
Ejemplo n.º 2
0
def test_tm_njobs():
    X, y = get_data()
    evo = EvoMSA(tm_n_jobs=2,
                 n_jobs=1,
                 TH=True,
                 lang="es",
                 stacked_method="sklearn.svm.LinearSVC").fit(X, y)
    evo.predict(X)
    assert evo.n_jobs == 1
    assert evo.tm_n_jobs == 2
Ejemplo n.º 3
0
def test_vector_space():
    X, y = get_data()
    evo = EvoMSA(stacked_method_args=dict(popsize=10,
                                          early_stopping_rounds=10,
                                          n_estimators=3),
                 models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernoulli']])
    evo.model(X)
    nrows = len(X)
    X = evo.vector_space(X)
    assert X[0].shape[0] == nrows
Ejemplo n.º 4
0
def test_EvoMSA_predict_proba():
    X, y = get_data()
    evo = EvoMSA(stacked_method_args=dict(popsize=100,
                                          early_stopping_rounds=100,
                                          time_limit=5,
                                          n_estimators=5),
                 n_jobs=2).fit(X, y)
    hy = evo.predict_proba(X)
    assert len(hy) == 1000
    assert hy.min() >= 0 and hy.max() <= 1
Ejemplo n.º 5
0
def test_model_instance():
    from microtc.textmodel import TextModel
    X, y = get_data()
    tm = TextModel().fit(X)
    evo = EvoMSA(tm_n_jobs=1,
                 n_jobs=1,
                 TR=False,
                 lang="es",
                 models=[[tm, "sklearn.svm.LinearSVC"]],
                 stacked_method="sklearn.svm.LinearSVC").fit(X, y)
    assert evo.models[0][0] == tm
Ejemplo n.º 6
0
def test_EvoMSA_model():
    X, y = get_data()
    model = EvoMSA(stacked_method_args=dict(popsize=10,
                                            early_stopping_rounds=10,
                                            n_estimators=3),
                   models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernoulli']],
                   n_jobs=2)
    assert len(model.models) == 2
    model.model(X)
    assert len(model._textModel) == 2
    print(model._textModel)
Ejemplo n.º 7
0
def test_EvoMSA_cpu_count():
    from EvoMSA.base import EvoMSA
    from multiprocessing import cpu_count
    X, y = get_data()
    model = EvoMSA(stacked_method_args=dict(popsize=10,
                                            early_stopping_rounds=10,
                                            n_estimators=3),
                   TR=False,
                   n_jobs=-1)
    print(model.n_jobs, cpu_count())
    assert model.n_jobs == cpu_count()
Ejemplo n.º 8
0
def test_EvoMSA_param_HA():
    from EvoMSA.base import EvoMSA
    X, y = get_data()
    for lang in ['ar', 'en', 'es']:
        model = EvoMSA(stacked_method_args=dict(popsize=10,
                                                early_stopping_rounds=10,
                                                n_estimators=3),
                       TR=False,
                       lang=lang,
                       HA=True,
                       n_jobs=2)
        assert len(model.models) == 1
Ejemplo n.º 9
0
def test_EvoMSA_multinomial():
    from EvoMSA.model import Multinomial
    X, y = get_data()
    evo = EvoMSA(stacked_method_args=dict(popsize=10,
                                          early_stopping_rounds=10,
                                          time_limit=5,
                                          n_estimators=5),
                 models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Multinomial']],
                 TR=False,
                 n_jobs=1).fit(X, y)
    assert evo
    assert isinstance(evo._svc_models[0], Multinomial)
Ejemplo n.º 10
0
def test_evomsa_wrapper():
    from microtc.utils import save_model
    from EvoMSA.base import EvoMSA
    from test_base import get_data
    X, y = get_data()
    model = EvoMSA(stacked_method="sklearn.naive_bayes.GaussianNB",
                   n_jobs=2).fit(X, y)
    save_model(model, 'tmp.evomsa')
    assert os.path.isfile('tmp.evomsa')
    evo = EvoMSA(models=[["tmp.evomsa", "EvoMSA.model.Identity"]],
                 stacked_method="sklearn.naive_bayes.GaussianNB",
                 n_jobs=2).fit(X, y)
    assert evo
    os.unlink("tmp.evomsa")
Ejemplo n.º 11
0
def test_EvoMSA_predict():
    import numpy as np
    X, y = get_data()
    evo = EvoMSA(stacked_method_args=dict(popsize=10,
                                          early_stopping_rounds=10,
                                          time_limit=15,
                                          n_estimators=10),
                 models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernoulli']],
                 n_jobs=1).fit(X, y)
    hy = evo.predict(X)
    assert len(hy) == 1000
    print((np.array(y) == hy).mean(), hy)
    print(evo.predict_proba(X))
    assert (np.array(y) == hy).mean() > 0.8
Ejemplo n.º 12
0
def test_EvoMSA_evodag_class():
    from sklearn.neighbors import NearestCentroid
    import numpy as np
    X, y = get_data()
    model = EvoMSA(models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernoulli']],
                   stacked_method="sklearn.neighbors.NearestCentroid",
                   TR=False,
                   n_jobs=2).fit(X, y)
    assert isinstance(model._evodag_model, NearestCentroid)
    cl = model.predict(X)
    hy = model.predict_proba(X)
    cl2 = model._le.inverse_transform(hy.argmax(axis=1))
    print(cl, cl2)
    assert np.all(cl == cl2)
Ejemplo n.º 13
0
def test_binary_labels_json():
    import json
    X, y = get_data()
    h = dict(NONE=0, N=0, NEU=0, P=1)
    y = [h[x] for x in y]
    evo = EvoMSA(stacked_method_args=dict(popsize=10,
                                          early_stopping_rounds=10,
                                          time_limit=5,
                                          n_estimators=5),
                 n_jobs=2).fit(X, y)
    hy = evo.predict(X)
    for x in hy:
        print(type(x), str(x))
        _ = json.dumps(dict(klass=str(x)))
    print(_)
Ejemplo n.º 14
0
def test_EvoMSA_kfold_decision_function():
    from sklearn.preprocessing import LabelEncoder
    X, y = get_data()
    le = LabelEncoder().fit(y)
    y = le.transform(y)
    evo = EvoMSA(stacked_method_args=dict(popsize=10,
                                          early_stopping_rounds=10,
                                          n_estimators=3),
                 models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernoulli']])
    evo.model(X)
    X = evo.vector_space(X)
    cl = evo.models[1][1]
    D = evo.kfold_decision_function(cl, X[1], y)
    assert len(D[0]) == 4
    assert isinstance(D[0], list)
Ejemplo n.º 15
0
def test_EvoMSA_param_TH():
    from EvoMSA.model import ThumbsUpDownAr, ThumbsUpDownEn, ThumbsUpDownEs
    from EvoMSA.base import EvoMSA

    X, y = get_data()
    for cl, lang in zip([ThumbsUpDownAr, ThumbsUpDownEn, ThumbsUpDownEs],
                        ['ar', 'en', 'es']):
        model = EvoMSA(stacked_method_args=dict(popsize=10,
                                                early_stopping_rounds=10,
                                                n_estimators=3),
                       TR=False,
                       lang=lang,
                       TH=True,
                       n_jobs=2)
        assert len(model.models) == 1
        assert model.models[0][0] == cl
Ejemplo n.º 16
0
def test_EvoMSA_identity():
    from EvoMSA.model import Identity
    import numpy as np
    X, y = get_data()
    model = EvoMSA(stacked_method_args=dict(popsize=10,
                                            early_stopping_rounds=10,
                                            n_estimators=3),
                   models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernoulli']],
                   TR=False,
                   stacked_method="EvoMSA.model.Identity",
                   n_jobs=2).fit(X, y)
    assert isinstance(model._evodag_model, Identity)
    cl = model.predict(X)
    hy = model.predict_proba(X)
    cl2 = model._le.inverse_transform(hy.argmax(axis=1))
    print(cl, cl2)
    assert np.all(cl == cl2)
Ejemplo n.º 17
0
def test_EvoMSA_param_TR():
    from EvoMSA.base import EvoMSA
    from b4msa.textmodel import TextModel
    X, y = get_data()
    model = EvoMSA(stacked_method_args=dict(popsize=10,
                                            early_stopping_rounds=10,
                                            n_estimators=3),
                   TR=False,
                   n_jobs=2)
    assert len(model.models) == 0
    model = EvoMSA(stacked_method_args=dict(popsize=10,
                                            early_stopping_rounds=10,
                                            n_estimators=3),
                   n_jobs=2)
    assert len(model.models) == 1
    print(model.models[0])
    assert model.models[0][0] == TextModel
Ejemplo n.º 18
0
def test_EvoMSA_fit_svm():
    from sklearn.preprocessing import LabelEncoder
    X, y = get_data()
    from sklearn.svm import LinearSVC
    from EvoMSA.model import Bernoulli
    model = EvoMSA(stacked_method_args=dict(popsize=10,
                                            early_stopping_rounds=10,
                                            n_estimators=3),
                   models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernoulli']],
                   n_jobs=2)
    le = LabelEncoder().fit(y)
    y = le.transform(y)
    model.model(X)
    Xvs = model.vector_space(X)
    model.fit_svm(Xvs, y)
    print(model._svc_models)
    assert len(model._svc_models) == 2
    for ins, klass in zip(model._svc_models, [LinearSVC, Bernoulli]):
        assert isinstance(ins, klass)
Ejemplo n.º 19
0
def test_cache():
    import hashlib

    def func(data, output):
        from b4msa.textmodel import TextModel
        from microtc.utils import tweet_iterator, save_model

        tm = TextModel().fit(list(tweet_iterator(data)))
        save_model(tm, output)

    with StoreDelete(func, TWEETS, "textmodel_cache.tm") as sd:
        cache = os.path.join("tm", "train.json")
        evo = EvoMSA(models=[[sd._output, "sklearn.svm.LinearSVC"]],
                     cache=cache)
        assert os.path.isdir("tm")
        output = hashlib.md5(sd._output.encode()).hexdigest()
        output = cache + "-%s" % output
        print(evo.cache.textModels)
        assert evo.cache.textModels[1] == output
        X, y = get_data()
        evo.first_stage(X, y)
        assert os.path.isfile(output)
        ML = list(evo.cache.ml_train())
        ML_K = list(evo.cache.ml_kfold())
        evo = EvoMSA(models=[[sd._output, "sklearn.svm.LinearSVC"]],
                     stacked_method_args=dict(popsize=10,
                                              early_stopping_rounds=10,
                                              n_estimators=3),
                     cache=cache).fit(X, y, test_set=X[:30])
        hy = evo.predict(X[:10])
        print(len(hy), hy)
        assert len(hy) == 10
        for k in ML:
            print(k)
            assert os.path.isfile(k)
        for k in ML_K:
            print(k)
            assert os.path.isfile(k)
        cache = os.path.join("tm", "test")
        evo.predict(X, cache=cache)
        output = cache + '-' + output.split("-")[1]
        print(output)
        assert os.path.isfile(output)
Ejemplo n.º 20
0
def test_EvoMSA_regression():
    from EvoMSA.base import LabelEncoderWrapper
    from EvoMSA.utils import download
    X, y = get_data()
    X = [dict(text=x) for x in X]
    l = LabelEncoderWrapper().fit(y)
    y = l.transform(y) - 1.5
    evo = EvoMSA(stacked_method_args=dict(popsize=10,
                                          early_stopping_rounds=10,
                                          time_limit=5,
                                          n_estimators=2),
                 classifier=False,
                 models=[[download("emo_Es.tm"), 'EvoMSA.model.Identity']],
                 TR=False,
                 n_jobs=1).fit(X, y)
    assert evo
    df = evo.decision_function(X)
    print(df.shape, df.ndim)
    assert df.shape[0] == len(X) and df.ndim == 1
    df = evo.predict(X)
    assert df.shape[0] == len(X) and df.ndim == 1
Ejemplo n.º 21
0
def test_EvoMSA_fit():
    from EvoMSA.model import Bernoulli
    from EvoDAG.model import EvoDAGE
    from microtc.utils import load_model, save_model
    X, y = get_data()
    print('iniciando')
    evo = EvoMSA(stacked_method_args=dict(popsize=10,
                                          early_stopping_rounds=10,
                                          time_limit=5,
                                          n_estimators=5),
                 models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernoulli']],
                 n_jobs=1).fit(X, y)
    print("Termine fit")
    assert evo
    assert isinstance(evo._svc_models[1], Bernoulli)
    assert isinstance(evo._evodag_model, EvoDAGE)
    save_model(evo, 'test.evomodel')
    print("Guarde modelo")
    evo = load_model('test.evomodel')
    print("Cargue modelo")
    assert isinstance(evo._svc_models[1], Bernoulli)
    assert isinstance(evo._evodag_model, EvoDAGE)
    os.unlink('test.evomodel')
Ejemplo n.º 22
0
def test_lazy_loading():
    def func(data, output):
        from b4msa.textmodel import TextModel
        from microtc.utils import tweet_iterator, save_model

        tm = TextModel().fit(list(tweet_iterator(data)))
        save_model(tm, output)

    with StoreDelete(func, TWEETS, "textmodel_cache.tm") as sd:
        cache = os.path.join("tm", "train.json")
        X, y = get_data()
        evo = EvoMSA(models=[[sd._output, "sklearn.svm.LinearSVC"]],
                     cache=cache,
                     TR=False,
                     stacked_method="sklearn.naive_bayes.GaussianNB").fit(
                         X, y)
        evo = EvoMSA(models=[[sd._output, "sklearn.svm.LinearSVC"]],
                     cache=cache,
                     TR=False,
                     stacked_method="sklearn.naive_bayes.GaussianNB").fit(
                         X, y)
        print(evo._textModel[0], sd._output)
        assert evo._textModel[0] == sd._output
        evo.stacked_method
Ejemplo n.º 23
0
def test_EvoMSA_param_HA():
    from EvoMSA.base import EvoMSA
    X, y = get_data()
    for lang in ['ar', 'en', 'es']:
        model = EvoMSA(TR=False, lang=lang, Aggress=True)
        assert len(model.models) == 1