예제 #1
0
def test_EvoMSA_lang_missing():
    from EvoMSA import base
    try:
        base.EvoMSA(Emo=True)
    except AssertionError:
        return
    assert False
예제 #2
0
 def main(self):
     fnames = self.data.training_set
     fname = fnames[0]
     _ = [[x, x[self._klass]] for x in tweet_iterator(fname)]
     D = [x[0] for x in _]
     Y = [x[1] for x in _]
     if self.data.test_set is not None:
         if os.path.isfile(self.data.test_set):
             test_set = [x for x in tweet_iterator(self.data.test_set)]
         else:
             test_set = self.data.test_set
     else:
         test_set = None
     kwargs = dict(n_jobs=self.data.n_jobs)
     if self.data.kwargs is not None:
         _ = json.loads(self.data.kwargs)
         kwargs.update(_)
     evo_kwargs = dict()
     if kwargs.get("stacked_method",
                   "EvoDAG.model.EvoDAGE") == "EvoDAG.model.EvoDAGE":
         evo_kwargs = dict(tmpdir=self.data.output_file + '_dir')
     if "stacked_method_args" in kwargs:
         evo_kwargs.update(kwargs["stacked_method_args"])
         del kwargs["stacked_method_args"]
     evo = base.EvoMSA(stacked_method_args=evo_kwargs, **kwargs)
     evo.fit(D, Y, test_set=test_set)
     save_model(evo, self.data.output_file)
예제 #3
0
 def main(self):
     if self.data.transform:
         return self.transform()
     elif self.data.fitness:
         return self.fitness()
     if not self.data.b4msa_df:
         return
     fnames = self.data.training_set
     if not isinstance(fnames, list):
         fnames = [fnames]
     D = []
     Y = []
     for fname in fnames:
         _ = [[x, x[self._klass]] for x in tweet_iterator(fname)]
         D.append([x[0] for x in _])
         Y.append([x[1] for x in _])
     self._logger.info('Reading test_set %s' % self.data.test_set)
     if self.data.test_set is not None:
         test_set = [x for x in tweet_iterator(self.data.test_set)]
     else:
         test_set = None
     kwargs = dict(n_jobs=self.data.n_jobs)
     if self.data.kwargs is not None:
         _ = json.loads(self.data.kwargs)
         kwargs.update(_)
     b4msa_kwargs = {}
     if self.data.b4msa_kwargs is not None:
         _ = json.loads(self.data.b4msa_kwargs)
         b4msa_kwargs.update(_)
     evo = base.EvoMSA(b4msa_args=b4msa_kwargs, **kwargs)
     evo.fit_svm(D, Y)
     output = self.data.output_file
     if self.data.test_set is None:
         hy = evo.transform(D[0])
         with open(output, 'w') as fpt:
             for x, y in zip(tweet_iterator(fnames[0]), hy):
                 x.update(dict(vec=y.tolist()))
                 fpt.write(json.dumps(x) + '\n')
     else:
         if not os.path.isdir(output):
             os.mkdir(output)
         train = os.path.join(output, 'train.json')
         hy = evo.transform(D[0])
         with open(train, 'w') as fpt:
             for x, y in zip(tweet_iterator(fnames[0]), hy):
                 x.update(dict(vec=y.tolist()))
                 fpt.write(json.dumps(x) + '\n')
         test = os.path.join(output, 'test.json')
         hy = evo.transform(test_set)
         with open(test, 'w') as fpt:
             for x, y in zip(tweet_iterator(self.data.test_set), hy):
                 x.update(dict(vec=y.tolist()))
                 fpt.write(json.dumps(x) + '\n')
예제 #4
0
 def main(self):
     fnames = self.data.training_set
     if not isinstance(fnames, list):
         fnames = [fnames]
     D = []
     Y = []
     for fname in fnames:
         _ = [[x, x[self._klass]] for x in tweet_iterator(fname)]
         D.append([x[0] for x in _])
         Y.append([x[1] for x in _])
     if self.data.test_set is not None:
         if os.path.isfile(self.data.test_set):
             test_set = [x for x in tweet_iterator(self.data.test_set)]
         else:
             test_set = self.data.test_set
     else:
         test_set = None
     kwargs = dict(n_jobs=self.data.n_jobs)
     if self.data.kwargs is not None:
         _ = json.loads(self.data.kwargs)
         kwargs.update(_)
     evo_kwargs = dict(tmpdir=self.data.output_file + '_dir',
                       fitness_function='macro-F1')
     if self.data.evo_kwargs is not None:
         _ = json.loads(self.data.evo_kwargs)
         evo_kwargs.update(_)
     b4msa_kwargs = {}
     if self.data.b4msa_kwargs is not None:
         _ = json.loads(self.data.b4msa_kwargs)
         b4msa_kwargs.update(_)
     evo = base.EvoMSA(b4msa_args=b4msa_kwargs,
                       evodag_args=evo_kwargs,
                       **kwargs)
     evo.exogenous = self._exogenous
     if self.data.exogenous_model is not None:
         evo.exogenous_model = [
             self.load_model(x) for x in self.data.exogenous_model
         ]
     evo.fit(D, Y, test_set=test_set)
     evo.exogenous = None
     save_model(evo, self.data.output_file)
예제 #5
0
파일: 08.py 프로젝트: INGEOTEC/NLP-Course
from sklearn.metrics import recall_score
from sklearn.model_selection import KFold
from EvoMSA import base
from microtc.utils import tweet_iterator
from sklearn.preprocessing import LabelEncoder
import numpy as np


D = list(tweet_iterator("semeval2017_En_train.json"))
y = np.array([x['klass'] for x in D])

kf = KFold(shuffle=True)
hy = np.empty_like(y)
for train, test in kf.split(D):
    m = base.EvoMSA(Emo=True,
                    stacked_method="sklearn.naive_bayes.GaussianNB",
                    lang="en").fit([D[x] for x in train], 
                                   [D[x]['klass'] for x in train])
    hy[test] = m.predict([D[x] for x in test])

recall_score(y, hy, average=None)