def test_bernoulli(): import numpy as np from EvoMSA.model import Corpus, Bernoulli from sklearn.preprocessing import LabelEncoder c = Corpus([x['text'] for x in tweet_iterator(TWEETS)]) X = c.transform([x['text'] for x in tweet_iterator(TWEETS)]) y = [x['klass'] for x in tweet_iterator(TWEETS)] le = LabelEncoder().fit(y) y = le.transform(y) b = Bernoulli() b.fit(X, y) pr = b.decision_function(X) assert pr.shape[0] == 1000 and pr.shape[1] == 4 assert np.all((pr <= 1) & (pr >= -1))
def test_multinomial(): import numpy as np from EvoMSA.model import Corpus, Multinomial from sklearn.preprocessing import LabelEncoder c = Corpus([x['text'] for x in tweet_iterator(TWEETS)]) X = c.tonp([c[x['text']] for x in tweet_iterator(TWEETS)]) y = [x['klass'] for x in tweet_iterator(TWEETS)] le = LabelEncoder().fit(y) y = le.transform(y) b = Multinomial() b.fit(X, y) pr = b.decision_function(X) print(pr.shape[0], pr, b.num_terms) assert pr.shape[0] == 1000 and pr.shape[1] == 4 assert np.all((pr <= 1) & (pr >= -1))
def test_OutputClassifier(): from EvoMSA.model import Corpus, OutputClassifier from sklearn.preprocessing import LabelEncoder c = Corpus([x['text'] for x in tweet_iterator(TWEETS)]) X = c.transform([x['text'] for x in tweet_iterator(TWEETS)]) y = [x['klass'] for x in tweet_iterator(TWEETS)] le = LabelEncoder().fit(y) y = le.transform(y) b = OutputClassifier(output='xx') assert b._output == 'xx' b.fit(X, y) assert os.path.isfile('xx_train.csv') pr = b.decision_function(X) assert os.path.isfile('xx_test.csv') assert len(open('xx_test.csv').readlines()) == pr.shape[0] os.unlink('xx_train.csv') os.unlink('xx_test.csv')
def test_corpus(): from EvoMSA.model import Corpus c = Corpus([x['text'] for x in tweet_iterator(TWEETS)]) a = c['hola hola mundo'] assert len(a) == 3 assert a[0] == a[1]