def test_max_lines(): sys.argv = ['EvoMSA', '--evodag-kw={"popsize": 10, "early_stopping_rounds": 10, "time_limit": 5, "n_estimators": 5}', '-ot.model', '-n2', TWEETS] train() sys.argv = ['EvoMSA', '-mt.model', '--max-lines', '500', '-ot.json', TWEETS] predict() os.unlink('t.model') os.unlink('t.json')
def test_decision_function(): sys.argv = ['EvoMSA', '--evodag-kw={"popsize": 10, "early_stopping_rounds": 10, "time_limit": 5, "n_estimators": 5}', '-ot.model', '-n2', TWEETS, TWEETS] train(output=True) sys.argv = ['EvoMSA', '--decision-function', '-mt.model', '-ot1.json', TWEETS] predict() df = [x['decision_function'] for x in tweet_iterator('t1.json')] assert len(df[0]) == 4 os.unlink('t1.json') os.unlink('t.model')
def test_raw_outputs(): sys.argv = ['EvoMSA', '--kw={"stacked_method_args": {"popsize": 10, "early_stopping_rounds": 10, "time_limit": 60, "n_estimators": 30}}', '-ot.model', '-n2', TWEETS] train(output=True) sys.argv = ['EvoMSA', '--raw-outputs', '-mt.model', '-ot1.json', TWEETS] predict() df = [x['decision_function'] for x in tweet_iterator('t1.json')] assert len(df[0]) == 30 * 4 os.unlink('t1.json') os.unlink('t.model')
def test_predict(): import numpy as np sys.argv = ['EvoMSA', '--evodag-kw={"popsize": 10, "early_stopping_rounds": 10, "time_limit": 5, "n_estimators": 5}', '-ot.model', '-n2', TWEETS, TWEETS] train(output=True) sys.argv = ['EvoMSA', '-mt.model', '-ot1.json', TWEETS] predict() hy = np.array([x['klass'] for x in tweet_iterator('t1.json')]) [x['decision_function'] for x in tweet_iterator('t1.json')] y = np.array([x['klass'] for x in tweet_iterator(TWEETS)]) acc = (y == hy).mean() print(acc) assert acc <= 1 and acc > 0.8 os.unlink('t1.json') os.unlink('t.model')
def test_predict_numbers(): from sklearn.preprocessing import LabelEncoder import json d = [x for x in tweet_iterator(TWEETS)] le = LabelEncoder().fit([x['klass'] for x in d]) y = le.transform([x['klass'] for x in d]).tolist() with open('ex.json', 'w') as fpt: for x, y0 in zip(d, y): x['klass'] = y0 fpt.write(json.dumps(x) + '\n') sys.argv = ['EvoMSA', '--kw={"stacked_method_args": {"popsize": 10, "early_stopping_rounds": 10, "time_limit": 5, "n_estimators": 5}, "models": [["EvoMSA.model.Corpus", "EvoMSA.model.Bernoulli"]], "TR": false}', '-ot.model', '-n1', 'ex.json'] train(output=True) sys.argv = ['EvoMSA', '-mt.model', '-ot1.json', TWEETS] predict()
def test_predict_NearestCentroid(): from sklearn.preprocessing import LabelEncoder import json d = [x for x in tweet_iterator(TWEETS)] le = LabelEncoder().fit([x['klass'] for x in d]) y = le.transform([x['klass'] for x in d]).tolist() with open('ex.json', 'w') as fpt: for x, y0 in zip(d, y): x['klass'] = y0 fpt.write(json.dumps(x) + '\n') sys.argv = ['EvoMSA', '--kw={"evodag_class": "sklearn.neighbors.NearestCentroid", "TR": false, "models": [["EvoMSA.model.Corpus", "EvoMSA.model.Bernulli"]]}', '-ot.model', '-n1', 'ex.json'] train(output=True) sys.argv = ['EvoMSA', '-mt.model', '-ot1.json', TWEETS] predict()
def test_performance_public_set(): import os from EvoMSA.command_line import performance for seed in range(4): if os.path.isfile('t-%s.model' % seed): continue sys.argv = ['EvoMSA', '--evodag-kw={"popsize": 10, "early_stopping_rounds": 10, "time_limit": 5, "n_estimators": 3}', '--kw={"seed": %s}' % seed, '-ot-%s.model' % seed, '-n1', TWEETS] train(output=True) for seed in range(4): if os.path.isfile('t-%s.predict' % seed): continue sys.argv = ['EvoMSA', '-mt-%s.model' % seed, '-ot-%s.predict' % seed, TWEETS] predict() for score in ['macroF1', 'macroRecall', 'macroPrecision', 'accuracy']: sys.argv = ['EvoMSA', '--score', score, '-n2', '-y', TWEETS] + ['t-%s.predict' % seed for seed in range(2)] + ['-'] + ['t-%s.predict' % seed for seed in range(2, 4)] m = performance(output=True) assert len(m._p) == 2