def test_EvoMSA_lang_missing(): from EvoMSA import base try: base.EvoMSA(Emo=True) except AssertionError: return assert False
def main(self): fnames = self.data.training_set fname = fnames[0] _ = [[x, x[self._klass]] for x in tweet_iterator(fname)] D = [x[0] for x in _] Y = [x[1] for x in _] if self.data.test_set is not None: if os.path.isfile(self.data.test_set): test_set = [x for x in tweet_iterator(self.data.test_set)] else: test_set = self.data.test_set else: test_set = None kwargs = dict(n_jobs=self.data.n_jobs) if self.data.kwargs is not None: _ = json.loads(self.data.kwargs) kwargs.update(_) evo_kwargs = dict() if kwargs.get("stacked_method", "EvoDAG.model.EvoDAGE") == "EvoDAG.model.EvoDAGE": evo_kwargs = dict(tmpdir=self.data.output_file + '_dir') if "stacked_method_args" in kwargs: evo_kwargs.update(kwargs["stacked_method_args"]) del kwargs["stacked_method_args"] evo = base.EvoMSA(stacked_method_args=evo_kwargs, **kwargs) evo.fit(D, Y, test_set=test_set) save_model(evo, self.data.output_file)
def main(self): if self.data.transform: return self.transform() elif self.data.fitness: return self.fitness() if not self.data.b4msa_df: return fnames = self.data.training_set if not isinstance(fnames, list): fnames = [fnames] D = [] Y = [] for fname in fnames: _ = [[x, x[self._klass]] for x in tweet_iterator(fname)] D.append([x[0] for x in _]) Y.append([x[1] for x in _]) self._logger.info('Reading test_set %s' % self.data.test_set) if self.data.test_set is not None: test_set = [x for x in tweet_iterator(self.data.test_set)] else: test_set = None kwargs = dict(n_jobs=self.data.n_jobs) if self.data.kwargs is not None: _ = json.loads(self.data.kwargs) kwargs.update(_) b4msa_kwargs = {} if self.data.b4msa_kwargs is not None: _ = json.loads(self.data.b4msa_kwargs) b4msa_kwargs.update(_) evo = base.EvoMSA(b4msa_args=b4msa_kwargs, **kwargs) evo.fit_svm(D, Y) output = self.data.output_file if self.data.test_set is None: hy = evo.transform(D[0]) with open(output, 'w') as fpt: for x, y in zip(tweet_iterator(fnames[0]), hy): x.update(dict(vec=y.tolist())) fpt.write(json.dumps(x) + '\n') else: if not os.path.isdir(output): os.mkdir(output) train = os.path.join(output, 'train.json') hy = evo.transform(D[0]) with open(train, 'w') as fpt: for x, y in zip(tweet_iterator(fnames[0]), hy): x.update(dict(vec=y.tolist())) fpt.write(json.dumps(x) + '\n') test = os.path.join(output, 'test.json') hy = evo.transform(test_set) with open(test, 'w') as fpt: for x, y in zip(tweet_iterator(self.data.test_set), hy): x.update(dict(vec=y.tolist())) fpt.write(json.dumps(x) + '\n')
def main(self): fnames = self.data.training_set if not isinstance(fnames, list): fnames = [fnames] D = [] Y = [] for fname in fnames: _ = [[x, x[self._klass]] for x in tweet_iterator(fname)] D.append([x[0] for x in _]) Y.append([x[1] for x in _]) if self.data.test_set is not None: if os.path.isfile(self.data.test_set): test_set = [x for x in tweet_iterator(self.data.test_set)] else: test_set = self.data.test_set else: test_set = None kwargs = dict(n_jobs=self.data.n_jobs) if self.data.kwargs is not None: _ = json.loads(self.data.kwargs) kwargs.update(_) evo_kwargs = dict(tmpdir=self.data.output_file + '_dir', fitness_function='macro-F1') if self.data.evo_kwargs is not None: _ = json.loads(self.data.evo_kwargs) evo_kwargs.update(_) b4msa_kwargs = {} if self.data.b4msa_kwargs is not None: _ = json.loads(self.data.b4msa_kwargs) b4msa_kwargs.update(_) evo = base.EvoMSA(b4msa_args=b4msa_kwargs, evodag_args=evo_kwargs, **kwargs) evo.exogenous = self._exogenous if self.data.exogenous_model is not None: evo.exogenous_model = [ self.load_model(x) for x in self.data.exogenous_model ] evo.fit(D, Y, test_set=test_set) evo.exogenous = None save_model(evo, self.data.output_file)
from sklearn.metrics import recall_score from sklearn.model_selection import KFold from EvoMSA import base from microtc.utils import tweet_iterator from sklearn.preprocessing import LabelEncoder import numpy as np D = list(tweet_iterator("semeval2017_En_train.json")) y = np.array([x['klass'] for x in D]) kf = KFold(shuffle=True) hy = np.empty_like(y) for train, test in kf.split(D): m = base.EvoMSA(Emo=True, stacked_method="sklearn.naive_bayes.GaussianNB", lang="en").fit([D[x] for x in train], [D[x]['klass'] for x in train]) hy[test] = m.predict([D[x] for x in test]) recall_score(y, hy, average=None)