Exemple #1
0
def test_EvoMSA_predict_proba():
    X, y = get_data()
    evo = EvoMSA(
        evodag_args=dict(popsize=100,
                         early_stopping_rounds=100,
                         time_limit=5,
                         n_estimators=5),
        n_jobs=2).fit([X, [x for x, y0 in zip(X, y) if y0 in ['P', 'N']]],
                      [y, [x for x in y if x in ['P', 'N']]])
    hy = evo.predict_proba(X)
    assert len(hy) == 1000
    assert hy.min() >= 0 and hy.max() <= 1
Exemple #2
0
def test_EvoMSA_evodag_args():
    X, y = get_data()
    evo = EvoMSA(
        evodag_args=dict(popsize=10,
                         early_stopping_rounds=10,
                         time_limit=5,
                         n_estimators=5),
        n_jobs=2).fit([X, [x for x, y0 in zip(X, y) if y0 in ['P', 'N']]],
                      [y, [x for x in y if x in ['P', 'N']]])
    assert evo
    D = evo.transform(X, y)
    assert len(D[0]) == 5
    assert len(D) == 1000
Exemple #3
0
def test_sklearn_kfold():
    import numpy as np
    evo = EvoMSA(tm_n_jobs=2,
                 n_jobs=1,
                 TH=True,
                 lang="es",
                 n_splits=3,
                 stacked_method="sklearn.svm.LinearSVC")
    D = np.array([0, 1, 1, 1, 2, 2, 2])
    res = evo.sklearn_kfold(None, D, D)
    for _, _, _, tr, ts, _ in res:
        print(tr, ts)
        assert np.unique(D[tr]).shape[0] == 3
Exemple #4
0
def test_EvoMSA_predict():
    import numpy as np
    X, y = get_data()
    evo = EvoMSA(stacked_method_args=dict(popsize=10,
                                          early_stopping_rounds=10,
                                          time_limit=15,
                                          n_estimators=10),
                 models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernoulli']],
                 n_jobs=1).fit(X, y)
    hy = evo.predict(X)
    assert len(hy) == 1000
    print((np.array(y) == hy).mean(), hy)
    print(evo.predict_proba(X))
    assert (np.array(y) == hy).mean() > 0.8
Exemple #5
0
def test_evomsa_wrapper():
    from microtc.utils import save_model
    from EvoMSA.base import EvoMSA
    from test_base import get_data
    X, y = get_data()
    model = EvoMSA(stacked_method="sklearn.naive_bayes.GaussianNB",
                   n_jobs=2).fit(X, y)
    save_model(model, 'tmp.evomsa')
    assert os.path.isfile('tmp.evomsa')
    evo = EvoMSA(models=[["tmp.evomsa", "EvoMSA.model.Identity"]],
                 stacked_method="sklearn.naive_bayes.GaussianNB",
                 n_jobs=2).fit(X, y)
    assert evo
    os.unlink("tmp.evomsa")
Exemple #6
0
def test_EvoMSA_evodag_class():
    from sklearn.neighbors import NearestCentroid
    import numpy as np
    X, y = get_data()
    model = EvoMSA(models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernoulli']],
                   stacked_method="sklearn.neighbors.NearestCentroid",
                   TR=False,
                   n_jobs=2).fit(X, y)
    assert isinstance(model._evodag_model, NearestCentroid)
    cl = model.predict(X)
    hy = model.predict_proba(X)
    cl2 = model._le.inverse_transform(hy.argmax(axis=1))
    print(cl, cl2)
    assert np.all(cl == cl2)
Exemple #7
0
def test_binary_labels_json():
    import json
    X, y = get_data()
    h = dict(NONE=0, N=0, NEU=0, P=1)
    y = [h[x] for x in y]
    evo = EvoMSA(evodag_args=dict(popsize=10,
                                  early_stopping_rounds=10,
                                  time_limit=5,
                                  n_estimators=5),
                 n_jobs=2).fit(X, y)
    hy = evo.predict(X)
    for x in hy:
        print(type(x), str(x))
        _ = json.dumps(dict(klass=str(x)))
    print(_)
Exemple #8
0
def test_EvoMSA_predict():
    import numpy as np
    X, y = get_data()
    evo = EvoMSA(
        evodag_args=dict(popsize=10,
                         early_stopping_rounds=10,
                         time_limit=15,
                         n_estimators=10),
        models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernulli']],
        n_jobs=1).fit([X, [x for x, y0 in zip(X, y) if y0 in ['P', 'N']]],
                      [y, [x for x in y if x in ['P', 'N']]])
    hy = evo.predict(X)
    assert len(hy) == 1000
    print((np.array(y) == hy).mean(), hy)
    print(evo.predict_proba(X))
    assert (np.array(y) == hy).mean() > 0.8
Exemple #9
0
def test_EvoMSA_param_HA():
    from EvoMSA.model import ThumbsUpDownAr, ThumbsUpDownEn, ThumbsUpDownEs
    from EvoMSA.model import HA
    from EvoMSA.base import EvoMSA
    from b4msa.lang_dependency import get_lang
    import os
    X, y = get_data()
    dirname = os.path.join(get_dirname(), 'models')
    if not os.path.isdir(dirname):
        os.mkdir(dirname)
    for lang in ['ar', 'en', 'es']:
        l = get_lang(lang)
        model_fname = "%s.evoha" % l
        HA.create_space(TWEETS, os.path.join(dirname, model_fname))
    for cl, lang in zip([ThumbsUpDownAr, ThumbsUpDownEn, ThumbsUpDownEs],
                        ['ar', 'en', 'es']):
        model = EvoMSA(evodag_args=dict(popsize=10,
                                        early_stopping_rounds=10,
                                        n_estimators=3),
                       TR=False,
                       lang=lang,
                       HA=True,
                       n_jobs=2)
        assert len(model.models) == 1
        print(model.models[0][0])
        assert os.path.isfile(model.models[0][0])
Exemple #10
0
def test_EvoMSA_param_TR():
    from EvoMSA.base import EvoMSA
    from b4msa.textmodel import TextModel
    X, y = get_data()
    model = EvoMSA(evodag_args=dict(popsize=10,
                                    early_stopping_rounds=10,
                                    n_estimators=3),
                   TR=False,
                   n_jobs=2)
    assert len(model.models) == 0
    model = EvoMSA(evodag_args=dict(popsize=10,
                                    early_stopping_rounds=10,
                                    n_estimators=3),
                   n_jobs=2)
    assert len(model.models) == 1
    print(model.models[0])
    assert model.models[0][0] == TextModel
Exemple #11
0
def test_EvoMSA_identity():
    from EvoMSA.model import Identity
    import numpy as np
    X, y = get_data()
    model = EvoMSA(evodag_args=dict(popsize=10,
                                    early_stopping_rounds=10,
                                    n_estimators=3),
                   models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernulli']],
                   TR=False,
                   evodag_class="EvoMSA.model.Identity",
                   n_jobs=2).fit(X, y)
    assert isinstance(model._evodag_model, Identity)
    cl = model.predict(X)
    hy = model.predict_proba(X)
    cl2 = model._le.inverse_transform(hy.argmax(axis=1))
    print(cl, cl2)
    assert np.all(cl == cl2)
Exemple #12
0
def test_EvoMSA_evodag_class():
    from sklearn.neighbors import NearestCentroid
    import numpy as np
    X, y = get_data()
    model = EvoMSA(evodag_args=dict(popsize=10,
                                    early_stopping_rounds=10,
                                    n_estimators=3),
                   models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernulli']],
                   evodag_class="sklearn.neighbors.NearestCentroid",
                   TR=False,
                   n_jobs=2).fit(X, y)
    assert isinstance(model._evodag_model, NearestCentroid)
    cl = model.predict(X)
    hy = model.predict_proba(X)
    cl2 = model._le.inverse_transform(hy.argmax(axis=1))
    print(cl, cl2)
    assert np.all(cl == cl2)
Exemple #13
0
def test_EvoMSA_fit_svm():
    from sklearn.preprocessing import LabelEncoder
    X, y = get_data()
    from sklearn.svm import LinearSVC
    from EvoMSA.model import Bernulli
    model = EvoMSA(evodag_args=dict(popsize=10,
                                    early_stopping_rounds=10,
                                    n_estimators=3),
                   models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernulli']],
                   n_jobs=2)
    le = LabelEncoder().fit(y)
    y = le.transform(y)
    model.fit_svm(X, y)
    print(model._svc_models)
    assert len(model._svc_models) == 2
    for ins, klass in zip(model._svc_models, [LinearSVC, Bernulli]):
        assert isinstance(ins, klass)
Exemple #14
0
def test_model_instance():
    from microtc.textmodel import TextModel
    X, y = get_data()
    tm = TextModel().fit(X)
    evo = EvoMSA(tm_n_jobs=1,
                 n_jobs=1,
                 TR=False,
                 lang="es",
                 models=[[tm, "sklearn.svm.LinearSVC"]],
                 stacked_method="sklearn.svm.LinearSVC").fit(X, y)
    assert evo.models[0][0] == tm
Exemple #15
0
def test_EvoMSA_cpu_count():
    from EvoMSA.base import EvoMSA
    from multiprocessing import cpu_count
    X, y = get_data()
    model = EvoMSA(evodag_args=dict(popsize=10,
                                    early_stopping_rounds=10,
                                    n_estimators=3),
                   TR=False,
                   n_jobs=-1)
    print(model.n_jobs, cpu_count())
    assert model.n_jobs == cpu_count()
Exemple #16
0
def test_EvoMSA_multinomial():
    from EvoMSA.model import Multinomial
    X, y = get_data()
    evo = EvoMSA(evodag_args=dict(popsize=10,
                                  early_stopping_rounds=10,
                                  time_limit=5,
                                  n_estimators=5),
                 models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Multinomial']],
                 TR=False,
                 n_jobs=1).fit(X, y)
    assert evo
    assert isinstance(evo._svc_models[0], Multinomial)
Exemple #17
0
def test_EvoMSA_regression():
    from EvoMSA.base import LabelEncoderWrapper
    from EvoMSA.utils import download
    X, y = get_data()
    X = [dict(text=x) for x in X]
    l = LabelEncoderWrapper().fit(y)
    y = l.transform(y) - 1.5
    evo = EvoMSA(stacked_method_args=dict(popsize=10,
                                          early_stopping_rounds=10,
                                          time_limit=5,
                                          n_estimators=2),
                 classifier=False,
                 models=[[download("emo_Es.tm"), 'EvoMSA.model.Identity']],
                 TR=False,
                 n_jobs=1).fit(X, y)
    assert evo
    df = evo.decision_function(X)
    print(df.shape, df.ndim)
    assert df.shape[0] == len(X) and df.ndim == 1
    df = evo.predict(X)
    assert df.shape[0] == len(X) and df.ndim == 1
Exemple #18
0
def test_EvoMSA_param_HA():
    from EvoMSA.base import EvoMSA
    X, y = get_data()
    for lang in ['ar', 'en', 'es']:
        model = EvoMSA(stacked_method_args=dict(popsize=10,
                                                early_stopping_rounds=10,
                                                n_estimators=3),
                       TR=False,
                       lang=lang,
                       HA=True,
                       n_jobs=2)
        assert len(model.models) == 1
Exemple #19
0
def test_cache():
    import hashlib

    def func(data, output):
        from b4msa.textmodel import TextModel
        from microtc.utils import tweet_iterator, save_model

        tm = TextModel().fit(list(tweet_iterator(data)))
        save_model(tm, output)

    with StoreDelete(func, TWEETS, "textmodel_cache.tm") as sd:
        cache = os.path.join("tm", "train.json")
        evo = EvoMSA(models=[[sd._output, "sklearn.svm.LinearSVC"]],
                     cache=cache)
        assert os.path.isdir("tm")
        output = hashlib.md5(sd._output.encode()).hexdigest()
        output = cache + "-%s" % output
        print(evo.cache.textModels)
        assert evo.cache.textModels[1] == output
        X, y = get_data()
        evo.first_stage(X, y)
        assert os.path.isfile(output)
        ML = list(evo.cache.ml_train())
        ML_K = list(evo.cache.ml_kfold())
        evo = EvoMSA(models=[[sd._output, "sklearn.svm.LinearSVC"]],
                     stacked_method_args=dict(popsize=10,
                                              early_stopping_rounds=10,
                                              n_estimators=3),
                     cache=cache).fit(X, y, test_set=X[:30])
        hy = evo.predict(X[:10])
        print(len(hy), hy)
        assert len(hy) == 10
        for k in ML:
            print(k)
            assert os.path.isfile(k)
        for k in ML_K:
            print(k)
            assert os.path.isfile(k)
        cache = os.path.join("tm", "test")
        evo.predict(X, cache=cache)
        output = cache + '-' + output.split("-")[1]
        print(output)
        assert os.path.isfile(output)
Exemple #20
0
def test_EvoMSA_empty_string():
    from EvoMSA.model import Multinomial
    X, y = get_data()
    X.append("")
    y.append("NONE")
    evo = EvoMSA(stacked_method_args=dict(popsize=10,
                                          early_stopping_rounds=10,
                                          time_limit=5,
                                          n_estimators=5),
                 models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Multinomial']],
                 TR=False,
                 n_jobs=1).fit(X, y)
    assert evo
    assert isinstance(evo._svc_models[0], Multinomial)
Exemple #21
0
def test_EvoMSA_exogenous_model():
    X, y = get_data()
    model = EvoMSA(evodag_args=dict(popsize=10, early_stopping_rounds=10),
                   n_jobs=2).fit(X, y)
    evo = EvoMSA(evodag_args=dict(popsize=10,
                                  early_stopping_rounds=10,
                                  time_limit=5,
                                  n_estimators=5),
                 n_jobs=2)
    evo.exogenous_model = model
    evo.fit(X, y)
    D = evo.transform(X)
    assert D.shape[1] == 8
Exemple #22
0
def test_lazy_loading():
    def func(data, output):
        from b4msa.textmodel import TextModel
        from microtc.utils import tweet_iterator, save_model

        tm = TextModel().fit(list(tweet_iterator(data)))
        save_model(tm, output)

    with StoreDelete(func, TWEETS, "textmodel_cache.tm") as sd:
        cache = os.path.join("tm", "train.json")
        X, y = get_data()
        evo = EvoMSA(models=[[sd._output, "sklearn.svm.LinearSVC"]],
                     cache=cache,
                     TR=False,
                     stacked_method="sklearn.naive_bayes.GaussianNB").fit(
                         X, y)
        evo = EvoMSA(models=[[sd._output, "sklearn.svm.LinearSVC"]],
                     cache=cache,
                     TR=False,
                     stacked_method="sklearn.naive_bayes.GaussianNB").fit(
                         X, y)
        print(evo._textModel[0], sd._output)
        assert evo._textModel[0] == sd._output
        evo.stacked_method
Exemple #23
0
def test_EvoMSA_param_Emo():
    from EvoMSA.model import EmoSpaceEs, EmoSpaceEn, EmoSpaceAr
    from EvoMSA.base import EvoMSA

    X, y = get_data()
    for cl, lang in zip([EmoSpaceAr, EmoSpaceEn, EmoSpaceEs],
                        ['ar', 'en', 'es']):
        model = EvoMSA(evodag_args=dict(popsize=10,
                                        early_stopping_rounds=10,
                                        n_estimators=3),
                       TR=False,
                       lang=lang,
                       Emo=True,
                       n_jobs=2)
        assert len(model.models) == 1
        assert model.models[0][0] == cl
Exemple #24
0
def test_EvoMSA_param_TH():
    from EvoMSA.model import ThumbsUpDownAr, ThumbsUpDownEn, ThumbsUpDownEs
    from EvoMSA.base import EvoMSA

    X, y = get_data()
    for cl, lang in zip([ThumbsUpDownAr, ThumbsUpDownEn, ThumbsUpDownEs],
                        ['ar', 'en', 'es']):
        model = EvoMSA(evodag_args=dict(popsize=10,
                                        early_stopping_rounds=10,
                                        n_estimators=3),
                       TR=False,
                       lang=lang,
                       TH=True,
                       n_jobs=2)
        assert len(model.models) == 1
        assert model.models[0][0] == cl
Exemple #25
0
    def _fit(self, X, y, cache):
        """Create an EvoMSA's instance

        :param X: Training set - independent variables
        :type X: list
        :param y: Training set - dependent variable
        :type y: list or np.array
        :param TR: EvoMSA's default model
        :type TR: bool
        :param test_set: Dataset to perform transductive learning
        :type test_set: list
        :rtype: self
        """

        return EvoMSA(TR=self._TR, models=self.model,
                      cache=cache,
                      **self._kwargs).fit(X, y)
Exemple #26
0
def test_TextModel():
    from b4msa.textmodel import TextModel
    X, y = get_data()
    evo = EvoMSA()
    evo.model(X)
    assert isinstance(evo._textModel, list)
    assert isinstance(evo._textModel[0], TextModel)
    assert len(evo._textModel) == 1
    evo.model([X, X])
    assert isinstance(evo._textModel, list)
    assert len(evo._textModel) == 2
    for x in evo._textModel:
        assert isinstance(x, TextModel)
Exemple #27
0
def test_EvoMSA_kfold_decision_function():
    from sklearn.preprocessing import LabelEncoder
    X, y = get_data()
    le = LabelEncoder().fit(y)
    y = le.transform(y)
    evo = EvoMSA(evodag_args=dict(popsize=10,
                                  early_stopping_rounds=10,
                                  n_estimators=3),
                 models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernulli']])
    evo.model(X)
    X = evo.vector_space(X)
    cl = evo.models[1][1]
    D = evo.kfold_decision_function(cl, X[1], y)
    assert len(D[0]) == 4
    assert isinstance(D[0], list)
Exemple #28
0
def test_EvoMSA_fit():
    from EvoMSA.model import Bernulli
    from EvoDAG.model import EvoDAGE
    from microtc.utils import load_model, save_model
    X, y = get_data()
    print('iniciando')
    evo = EvoMSA(evodag_args=dict(popsize=10,
                                  early_stopping_rounds=10,
                                  time_limit=5,
                                  n_estimators=5),
                 models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernulli']],
                 n_jobs=1).fit(X, y)
    print("Termine fit")
    assert evo
    assert isinstance(evo._svc_models[1], Bernulli)
    assert isinstance(evo._evodag_model, EvoDAGE)
    save_model(evo, 'test.evomodel')
    print("Guarde modelo")
    evo = load_model('test.evomodel')
    print("Cargue modelo")
    assert isinstance(evo._svc_models[1], Bernulli)
    assert isinstance(evo._evodag_model, EvoDAGE)
    os.unlink('test.evomodel')
Exemple #29
0
def test_EvoMSA_param_HA():
    from EvoMSA.base import EvoMSA
    X, y = get_data()
    for lang in ['ar', 'en', 'es']:
        model = EvoMSA(TR=False, lang=lang, Aggress=True)
        assert len(model.models) == 1