def test_negative_selection_false(): from EvoDAG import EvoDAG from EvoDAG.population import SteadyState import numpy as np class P(SteadyState): def random_selection(self, negative=False): if negative: self._llamo = True return np.random.randint(self.popsize) Xt = X.copy() y = cl.copy() m = EvoDAG.init(seed=11, popsize=10, orthogonal_selection=True, negative_selection=False, population_class=P, classifier=False, early_stopping_rounds=10).fit(Xt, y) assert not m._negative_selection assert not m._p._negative_selection assert m._p._llamo m = EvoDAG.init(seed=11, popsize=10, orthogonal_selection=True, negative_selection=True, population_class=P, classifier=False, early_stopping_rounds=10).fit(Xt, y) try: m._p._llamo except AttributeError: return assert False
def test_init_evodag(): from EvoDAG.model import EvoDAG m = EvoDAG().fit(X, cl) hy = m.predict(X) print((cl == hy).mean(), cl, hy) assert (cl == hy).mean() > 0.9 default_nargs()
def store_model(self, kw): if self.data.ensemble_size == 1: if self.data.seed >= 0: kw['seed'] = self.data.seed self.evo = EvoDAG(**kw).fit(self.X, self.y, test_set=self.Xtest) self.model = self.evo.model() else: min_size = self.data.min_size esize = self.data.ensemble_size init = self.data.seed end = init + esize evo = [] while len(evo) < esize: args = [(x, kw, self.X, self.y, self.Xtest) for x in range(init, end)] if self.data.cpu_cores == 1: _ = [init_evodag(x) for x in tqdm(args, total=len(args))] else: p = Pool(self.data.cpu_cores, maxtasksperchild=1) _ = [ x for x in tqdm(p.imap_unordered(init_evodag, args), total=len(args)) ] p.close() [evo.append(x) for x in _ if x.size >= min_size] init = end end = init + (esize - len(evo)) self.model = Ensemble(evo) model_file = self.get_model_file() with gzip.open(model_file, 'w') as fpt: pickle.dump(self.model, fpt) pickle.dump(self.word2id, fpt) pickle.dump(self.label2id, fpt)
def test_model_hist(): from EvoDAG import EvoDAG from EvoDAG.base import Model y = cl.copy() gp = EvoDAG(generations=np.inf, multiple_outputs=True, tournament_size=2, early_stopping_rounds=-1, seed=1, popsize=30).fit(X[:-10], y[:-10], test_set=X[-10:]) hist = gp.population.hist trace = gp.trace(gp.population.estopping) a = hist[trace[-1]].variable if not isinstance(a, list): a = [a] m = Model(trace, hist) b = m._hist[-1].variable if not isinstance(b, list): b = [b] print([(x, x.height) for x in m._hist]) print((m._map, a, b)) for v1, v2 in zip(a, b): if v1 not in m._map: assert v1 == v2 else: assert m._map[v1] == v2
def test_share_inputs(): from EvoDAG import EvoDAG y = cl.copy() gp = EvoDAG(classifier=True, multiple_outputs=True, popsize=5, share_inputs=True) gp.fit(X, y) assert gp._share_inputs
def test_multiple_outputs_error_rate_ts(): from EvoDAG import EvoDAG from EvoDAG.node import Add, Min, Max y = cl.copy() gp = EvoDAG(generations=np.inf, tournament_size=2, function_set=[Add, Min, Max], early_stopping_rounds=100, time_limit=0.9, multiple_outputs=True, fitness_function='ER', seed=0, popsize=100) gp.X = X[:-1] gp.nclasses(y[:-1]) gp.y = y[:-1] gp.create_population() a = gp.random_offspring() hys = SparseArray.argmax(a.hy) hy = np.array(hys.full_array()) # print(((hys - gp._y_klass).sign().fabs() * gp._mask_ts).sum()) mask = np.array(gp._mask_ts.full_array()).astype(np.bool) # print((y[:-1][mask] != hy[mask]).mean()) print(-a.fitness, (y[:-1][mask] != hy[mask]).mean()) assert_almost_equals(-a.fitness, (y[:-1][mask] != hy[mask]).mean())
def rs_evodag(args_X_y): args, X, y = args_X_y rs = RandomParameterSearch fit = [] init = time.time() for seed in range(3): evo = EvoDAG(seed=seed, **rs.process_params(args)).fit(X, y) fit.append(evo.model().fitness_vs) args['_time'] = time.time() - init gc.collect() return fit, args
def test_min_class(): from EvoDAG import EvoDAG y = cl.copy() gp = EvoDAG(generations=np.inf, tournament_size=2, early_stopping_rounds=100, time_limit=0.9, multiple_outputs=True, seed=0, popsize=100) gp.y = y[:-1] gp.X = X[:-1] assert gp._bagging_fitness.min_class == 2
def test_transform_to_mo(): from EvoDAG import EvoDAG y = cl.copy() gp = EvoDAG(generations=np.inf, tournament_size=2, early_stopping_rounds=100, time_limit=0.9, multiple_outputs=True, seed=0, popsize=10000) gp.nclasses(y) k = np.unique(y) y = gp._bagging_fitness.transform_to_mo(y) assert k.shape[0] == y.shape[1]
def test_inputs_func_argument_regression(): from EvoDAG import EvoDAG class Error: nargs = 2 min_nargs = 2 classification = True regression = True def __init__(self, *args, **kwargs): raise RuntimeError('aqui') y = cl.copy() y[y == 0] = -1 y[y > -1] = 1 gp = EvoDAG(classifier=False, multiple_outputs=False, pr_variable=0, input_functions=[Error], popsize=5, share_inputs=True) gp.X = X gp.nclasses(y) gp.y = y try: gp.create_population() assert False except RuntimeError: pass
def test_two_instances(): from EvoDAG import EvoDAG y = cl.copy() y[:-2] = -1 y[-2:] = 1 function_set = [x for x in EvoDAG()._function_set if x.regression and x.nargs] gp = EvoDAG(generations=np.inf, tournament_size=2, classifier=False, function_set=function_set, early_stopping_rounds=-1, seed=0, popsize=10).fit(X, y, test_set=X) assert gp
def test_classification_mo2(): from EvoDAG import EvoDAG y = cl.copy() gp = EvoDAG(generations=np.inf, tournament_size=2, early_stopping_rounds=10, time_limit=0.9, multiple_outputs=True, all_inputs=True, remove_raw_inputs=False, seed=0, popsize=10000) gp.X = X gp.nclasses(y) y = gp._bagging_fitness.transform_to_mo(y) y = [SparseArray.fromlist(x) for x in y.T] gp = EvoDAG(generations=np.inf, tournament_size=2, early_stopping_rounds=10, time_limit=0.9, multiple_outputs=True, all_inputs=True, seed=0, remove_raw_inputs=False, popsize=10000).fit(X, y) m = gp.model() print([(x, x._variable, x.height) for x in m._hist]) # assert False assert len(m.decision_function(gp.X)) == 3
def test_g_recall(): from EvoDAG import EvoDAG y = cl.copy() gp = EvoDAG(generations=np.inf, tournament_size=2, early_stopping_rounds=100, time_limit=0.9, multiple_outputs=True, seed=0, popsize=500) gp.y = y gp.X = X gp.create_population() off = gp.random_offspring() hy = SparseArray.argmax(off.hy) index = np.array(gp._mask_ts.index) y = np.array(gp._y_klass.full_array())[index] hy = np.array(hy.full_array())[index] nclasses = gp._bagging_fitness.nclasses recall = np.array([(hy[y == k] == k).mean() for k in range(nclasses)]) score = np.prod(recall) - 1 gp._fitness_function = 'g_recall' gp._bagging_fitness.set_fitness(off) assert_almost_equals(score, off.fitness) index = np.array(gp._mask_ts.full_array()) == 0 y = np.array(gp._y_klass.full_array())[index] hy = SparseArray.argmax(off.hy) hy = np.array(hy.full_array())[index] recall = np.array([(hy[y == k] == k).mean() for k in range(nclasses)]) score = np.prod(recall) - 1 assert_almost_equals(score, off.fitness_vs)
def test_process_params(): from EvoDAG.utils import RandomParameterSearch from EvoDAG import EvoDAG rs = RandomParameterSearch(npoints=1) args = [x for x in rs][0] evo = EvoDAG(**rs.process_params(args)) params = evo.get_params() for k, v in args.items(): if k in params: print(v, params[k]) if hasattr(params[k], '__name__'): assert v == params[k].__name__ else: assert v == params[k]
def test_multiple_outputs2(): from EvoDAG import EvoDAG from EvoDAG.model import Model y = cl.copy() gp = EvoDAG(generations=np.inf, tournament_size=2, early_stopping_rounds=100, time_limit=0.9, multiple_outputs=True, seed=0, popsize=10000).fit(X, y, test_set=X) m = gp.model() assert isinstance(m, Model) assert len(gp.y) == 3
def rs_evodag(args_X_y): args, X, y = args_X_y rs = RandomParameterSearch fit = [] init = time.time() for seed in range(3): try: evo = EvoDAG(seed=seed, **rs.process_params(args)).fit(X, y) fit.append(evo.model().fitness_vs) except RuntimeError: fit.append(-np.inf) args['_time'] = time.time() - init gc.collect() return fit, args
def test_a_precision(): from EvoDAG.cython_utils import Score from EvoDAG import EvoDAG y = cl.copy() gp = EvoDAG(generations=np.inf, tournament_size=2, early_stopping_rounds=100, time_limit=0.9, multiple_outputs=True, seed=0, popsize=500) gp.y = y gp.X = X gp.create_population() off = gp.random_offspring() hy = SparseArray.argmax(off.hy) index = np.array(gp._mask_ts.index) y = np.array(gp._y_klass.full_array())[index] hy = np.array(hy.full_array())[index] nclasses = gp._bagging_fitness.nclasses precision = np.array([(y[hy == k] == k).mean() for k in range(nclasses)]) f1 = Score(nclasses) mf1, mf1_v = f1.a_precision(gp._y_klass, SparseArray.argmax(off.hy), gp._mask_ts.index) assert_almost_equals(np.mean(precision), mf1) gp._fitness_function = 'a_precision' gp._bagging_fitness.set_fitness(off) assert_almost_equals(mf1 - 1, off.fitness) index = np.array(gp._mask_ts.full_array()) == 0 y = np.array(gp._y_klass.full_array())[index] hy = SparseArray.argmax(off.hy) hy = np.array(hy.full_array())[index] precision = np.array([(y[hy == k] == k).mean() for k in range(nclasses)]) assert_almost_equals(np.mean(precision) - 1, off.fitness_vs)
def test_multiple_outputs_predict(): from EvoDAG import EvoDAG y = cl.copy() gp = EvoDAG(generations=np.inf, tournament_size=2, multiple_outputs=True, early_stopping_rounds=-1, seed=0, popsize=10).fit(X[:-10], y[:-10], test_set=X[-10:]) m = gp.model() assert m.multiple_outputs hy = m.predict(X) u = np.unique(y) for i in np.unique(hy): assert i in u
def test_multiple_outputs_decision_function(): from EvoDAG import EvoDAG y = cl.copy() gp = EvoDAG(generations=np.inf, tournament_size=2, multiple_outputs=True, early_stopping_rounds=-1, seed=0, popsize=10).fit(X[:-10], y[:-10], test_set=X[-10:]) m = gp.model() assert m.multiple_outputs hy = m.decision_function(X) assert len(hy) == 3 for i in hy: assert i.isfinite()
def test_gp_population_full(): Add.nargs = 2 Mul.nargs = 2 from EvoDAG.gp import Population from EvoDAG import EvoDAG fs = EvoDAG()._function_set class Population2(Population): def __init__(self, *args, **kwargs): super(Population2, self).__init__(*args, **kwargs) self._funcs = [Add, Sin] self._terms = [2, 0] def random_function(self): func = self._funcs.pop() if func.nargs == 1: return func(0, weight=1) return func(range(func.nargs), weight=np.ones(func.nargs)) def random_terminal(self): return Variable(self._terms.pop(), 1) pop = Population2(fs, nterminals=3) ind = pop.create_random_ind_full(depth=2) assert len(pop._funcs) == 0 and len(pop._terms) == 0 assert isinstance(ind[0], Sin) and isinstance(ind[1], Add) assert ind[2].variable == 0 and ind[3].variable == 2 ind = Individual(ind) print(X.shape, ind.individual) hy = ind.decision_function(X) assert hy.isfinite() default_nargs()
def test_init_evodag_extras(): from EvoDAG import EvoDAG from test_command_line import default_nargs m = EvoDAG.init(seed=10, popsize=10, early_stopping_rounds=10).fit(X, cl) assert m.popsize == 10 default_nargs()
def test_finite(): from EvoDAG import EvoDAG evo = EvoDAG.init() evo._finite = False evo.fit(X, cl) hy = evo.predict(X) assert (hy == cl).mean() > 0.9
def init_evodag(seed_args_X_y_test): seed, args, X, y, test, dirname = seed_args_X_y_test if dirname is not None: output = os.path.join(dirname, '%s.evodag' % seed) if os.path.isfile(output): with gzip.open(output) as fpt: try: return pickle.load(fpt) except Exception: pass m = EvoDAG(seed=seed, **args).fit(X, y, test_set=test) m = m.model() gc.collect() if dirname is not None: with gzip.open(output, 'w') as fpt: pickle.dump(m, fpt) return m
def test_popsize_nvar(): from EvoDAG import EvoDAG y = cl.copy() gp = EvoDAG.init(popsize='nvar', time_limit=5) print(X.shape) gp.fit(X, y) default_nargs() assert gp.population._popsize == (X.shape[1] + len(gp._input_functions))
def test_model_nvar(): from EvoDAG import EvoDAG y = cl.copy() gp = EvoDAG(classifier=True, multiple_outputs=True, popsize=5, share_inputs=True) gp.fit(X, y) assert gp._share_inputs m = gp.model() print(X.shape) assert m.nvar == X.shape[1] try: m.predict(X[:, :3]) assert False except RuntimeError: pass
def test_add_repeated_args(): from EvoDAG import EvoDAG from EvoDAG.node import Add, Min, Max y = cl.copy() for ff in [Add, Min, Max]: ff.nargs = 10 gp = EvoDAG( generations=np.inf, tournament_size=2, early_stopping_rounds=100, time_limit=0.9, # multiple_outputs=True, classifier=False, all_inputs=True, function_set=[ff], pr_variable=1, seed=0, popsize=10000) gp.X = X # gp.nclasses(y) gp.y = y gp.create_population() print(gp.population.population) node = gp.random_offspring() print(node, node._variable, X.shape) assert len(node._variable) <= X.shape[1] ff.nargs = 2
def test_finite(): from EvoDAG import EvoDAG evo = EvoDAG.init() evo._finite = False evo.fit(X, cl) m = evo.model() hy = m.predict(X) print((hy == cl).mean(), [x.full_array() for x in m.decision_function(np.array(X))]) assert (hy == cl).mean() > 0.9
def test_X_list(): from EvoDAG import EvoDAG from test_command_line import default_nargs m = EvoDAG.init(seed=10, popsize=10, early_stopping_rounds=10).fit(X.tolist(), cl) assert m.popsize == 10 default_nargs() print(X.shape, len(m.X)) assert len(m.X) == 4
def test_SteadyState_generation(): from EvoDAG import EvoDAG y = cl.copy() y[y != 1] = -1 gp = EvoDAG(population_class='SteadyState', all_inputs=True, classifier=False, early_stopping_rounds=1, popsize=2) gp.X = X gp.y = y gp.create_population() for i in range(3): gp.replace(gp.random_offspring()) assert gp.population.generation == 2
def process_params(a): from EvoDAG import EvoDAG fs_class = {} function_set = [] for x in EvoDAG()._function_set: fs_class[x.__name__] = x args = {} for k, v in a.items(): if k in fs_class: if not isinstance(v, bool): fs_class[k].nargs = v if v: function_set.append(fs_class[k]) else: args[k] = v fs_evo = EvoDAG()._function_set fs_evo = filter(lambda x: x in function_set, fs_evo) args['function_set'] = [x for x in fs_evo] return args
def test_macro_F1(): from EvoDAG.cython_utils import Score from EvoDAG import EvoDAG y = cl.copy() gp = EvoDAG(generations=np.inf, tournament_size=2, early_stopping_rounds=100, time_limit=0.9, multiple_outputs=True, seed=2, popsize=1000) gp.y = y gp.X = X gp.create_population() off = gp.random_offspring() hy = SparseArray.argmax(off.hy) index = np.array(gp._mask_ts.index) y = np.array(gp._y_klass.full_array())[index] hy = np.array(hy.full_array())[index] nclasses = gp._bagging_fitness.nclasses precision = np.array([(y[hy == k] == k).mean() for k in range(nclasses)]) recall = np.array([(hy[y == k] == k).mean() for k in range(nclasses)]) print(precision, recall) f1 = Score(nclasses) mf1, mf1_v = f1.a_F1(gp._y_klass, SparseArray.argmax(off.hy), gp._mask_ts.index) for x, y in zip(precision, f1.precision): if not np.isfinite(x): continue assert_almost_equals(x, y) for x, y in zip(recall, f1.recall): if not np.isfinite(x): continue assert_almost_equals(x, y) _ = (2 * precision * recall) / (precision + recall) m = ~np.isfinite(_) _[m] = 0 assert_almost_equals(np.mean(_), mf1) print(f1.precision, f1.recall, mf1, mf1_v) gp._fitness_function = 'macro-F1' gp._bagging_fitness.set_fitness(off) assert_almost_equals(off.fitness, mf1 - 1) assert_almost_equals(off.fitness_vs, mf1_v - 1) index = np.array(gp._mask_ts.full_array()) == 0 y = np.array(gp._y_klass.full_array())[index] hy = SparseArray.argmax(off.hy) hy = np.array(hy.full_array())[index] precision = np.array([(y[hy == k] == k).mean() for k in range(nclasses)]) recall = np.array([(hy[y == k] == k).mean() for k in range(nclasses)]) _ = (2 * precision * recall) / (precision + recall) m = ~np.isfinite(_) _[m] = 0 assert_almost_equals(np.mean(_) - 1, off.fitness_vs)
def test_process_params(): from EvoDAG.utils import RandomParameterSearch from EvoDAG import EvoDAG rs = RandomParameterSearch(npoints=1) args = [x for x in rs][0] evo = EvoDAG(**rs.process_params(args)) params = evo.get_params() for k, v in args.items(): if k in params: if k == 'generations': v = np.inf print(k, v, params[k]) if isinstance(v, list): for a, b in zip(v, params[k]): assert a == b.__name__ elif hasattr(params[k], '__name__'): assert v == params[k].__name__ else: assert v == params[k]
def test_all_init_popsize(): from EvoDAG import EvoDAG y = cl.copy() y[y != 1] = -1 gp = EvoDAG(population_class='Generational', all_inputs=True, early_stopping_rounds=1, popsize=2) gp.X = X gp.y = y gp.create_population() assert gp.init_popsize == len(gp.X) gp = EvoDAG(population_class='Generational', # all_inputs=True, early_stopping_rounds=1, popsize=2) gp.X = X gp.y = y gp.create_population() assert gp.init_popsize == gp.popsize
def test_SteadyState_generation(): from EvoDAG import EvoDAG y = cl.copy() y[y != 1] = -1 gp = EvoDAG(population_class='SteadyState', all_inputs=True, early_stopping_rounds=1, popsize=2) gp.X = X gp.y = y gp.create_population() for i in range(3): gp.replace(gp.random_offspring()) assert gp.population.generation == 2
def test_all_inputs2(): from EvoDAG import EvoDAG y = cl.copy() y[y != 1] = -1 gp = EvoDAG(population_class='Generational', all_inputs=True, popsize=3) gp.X = X gp.y = y gp.create_population() print(len(gp.population.population), len(gp.X)) assert len(gp.population.population) == len(gp.X) for i in range(gp.popsize): a = gp.random_offspring() gp.replace(a) assert len(gp.population.population) == gp.popsize
def test_all_inputs(): from EvoDAG import EvoDAG y = cl.copy() y[y != 1] = -1 for pc in ['Generational', 'SteadyState']: gp = EvoDAG(population_class=pc, all_inputs=True, popsize=10) gp.X = X gp.y = y gp.create_population() assert len(gp.population.population) < 10 for i in range(gp.population.popsize, gp.population._popsize): a = gp.random_offspring() gp.replace(a) assert len(gp.population.population) == 10
def test_clean(): from EvoDAG import EvoDAG y = cl.copy() y[y != 1] = -1 for pc in ['Generational', 'SteadyState']: gp = EvoDAG(population_class=pc, popsize=5) gp.X = X gp.y = y gp.create_population() for i in range(10): v = gp.random_offspring() gp.replace(v) pop = gp.population.population esi = gp.population.estopping for i in gp.population._hist: print(i == esi, i in pop, i, '-'*10, i.fitness) if i == esi: assert i.hy is not None elif i in pop: assert i.hy is not None assert gp.population.estopping.hy is not None
def store_model(self, kw): if self.data.ensemble_size == 1: self.evo = EvoDAG(**kw).fit(self.X, self.y, test_set=self.Xtest) self.model = self.evo.model() else: seed = self.data.seed esize = self.data.ensemble_size args = [(x, kw, self.X, self.y, self.Xtest) for x in range(seed, seed+esize)] if self.data.cpu_cores == 1: evo = [init_evodag(x) for x in tqdm(args, total=len(args))] else: p = Pool(self.data.cpu_cores, maxtasksperchild=1) evo = [x for x in tqdm(p.imap_unordered(init_evodag, args), total=len(args))] p.close() self.model = Ensemble(evo) model_file = self.get_model_file() with gzip.open(model_file, 'w') as fpt: pickle.dump(self.model, fpt) pickle.dump(self.word2id, fpt) pickle.dump(self.label2id, fpt)
def test_generational_generation(): from EvoDAG.population import Generational from EvoDAG import EvoDAG gp = EvoDAG(population_class='Generational', popsize=10) gp.X = X y = cl.copy() y[y != 1] = -1 gp.y = y gp.create_population() assert isinstance(gp.population, Generational) p = [] for i in range(gp.popsize-1): a = gp.random_offspring() p.append(a) gp.replace(a) assert len(gp.population._inner) == (gp.popsize - 1) a = gp.random_offspring() p.append(a) gp.replace(a) assert len(gp.population._inner) == 0 for a, b in zip(gp.population.population, p): assert a == b
def init_evodag(seed_args_X_y_test): seed, args, X, y, test = seed_args_X_y_test m = EvoDAG(seed=seed, **args).fit(X, y, test_set=test) m = m.model() gc.collect() return m
def test_models_fitness_vs(): from EvoDAG import EvoDAG evo = EvoDAG(popsize=10, early_stopping_rounds=2).fit(X, cl) l_fs = [x.fitness_vs for x in evo.model().models] assert evo.model().fitness_vs == np.median(l_fs)
def test_random_generations(): from EvoDAG import EvoDAG from EvoDAG.population import SteadyState class P(SteadyState): def random_selection(self, negative=False): raise RuntimeError('!') y = cl.copy() y[y != 1] = -1 for pop in ['SteadyState', 'Generational', P]: gp = EvoDAG(population_class=pop, all_inputs=True, random_generations=1, early_stopping_rounds=1, popsize=2) gp.X = X gp.y = y gp.create_population() print(gp.population._random_generations) assert gp.population._random_generations == 1 if pop == P: try: ind = gp.random_offspring() gp.replace(ind) assert False except RuntimeError: pass else: for i in range(3): gp.replace(gp.random_offspring()) assert gp.population.generation == 2
class CommandLine(object): def version(self): pa = self.parser.add_argument pa('--version', action='version', version='EvoDAG %s' % evodag.__version__) def output_file(self): self.parser.add_argument('-o', '--output-file', help='File to store the test set', dest='output_file', default=None, type=str) def ensemble(self): self.parser.add_argument('-n', '--ensemble-size', help='Ensemble size', dest='ensemble_size', default=1, type=int) def cores(self): self.parser.add_argument('-u', '--cpu-cores', help='Number of cores', dest='cpu_cores', default=1, type=int) def test_set(self): cdn = 'File containing the test set on csv.' self.parser.add_argument('-t', '--test_set', default=None, type=str, help=cdn) def init_params(self): pa = self.parser.add_argument g = self.parser.add_mutually_exclusive_group(required=True) g.add_argument('-C', '--classifier', dest='classifier', help='The task is classification (default)', default=True, action="store_true") g.add_argument('-R', '--regressor', dest='regressor', help='The task is regression', action="store_true") pa('-e', '--early_stopping_rounds', dest='early_stopping_rounds', type=int, help='Early stopping rounds') pa('-p', '--popsize', dest='popsize', type=int, help='Population size') pa('-s', '--seed', dest='seed', default=0, type=int, help='Seed') pa('-j', '--json', dest='json', action="store_true", help='Whether the inputs are in json format', default=False) pa('--evolution', dest='population_class', help="Type of evolution (SteadyState|Generational)", type=str) pa('--all-inputs', dest='all_inputs', help="The initial population has all the available inputs ", action="store_true") pa('--time-limit', dest='time_limit', help='Time limit in seconds', type=int) pa('--random-generations', dest='random_generations', help='Number of random generations', type=int) def training_set(self): cdn = 'File containing the training set on csv.' self.parser.add_argument('training_set', nargs='?', default=None, help=cdn) def parse_args(self): self.data = self.parser.parse_args() if hasattr(self.data, 'regressor') and self.data.regressor: self.data.classifier = False self.main() def convert(self, x): try: return float(x) except ValueError: if x not in self.word2id: self.word2id[x] = len(self.word2id) return self.word2id[x] def convert_label(self, x): try: return float(x) except ValueError: if x not in self.label2id: self.label2id[x] = len(self.label2id) return self.label2id[x] def read_data(self, fname): with open(fname, 'r') as fpt: l = fpt.readlines() X = [] for i in l: x = i.rstrip().lstrip() if len(x): X.append([i for i in x.split(',')]) return X @staticmethod def _num_terms(a): if 'num_terms' in a: num_terms = a['num_terms'] else: num_terms = len(a) if 'klass' in a: num_terms -= 1 return num_terms def read_data_json(self, fname): import json X = None y = [] if fname.endswith('.gz'): with gzip.open(fname, 'rb') as fpt: l = fpt.readlines() else: with open(fname, 'r') as fpt: l = fpt.readlines() for row, d in enumerate(l): try: a = json.loads(str(d, encoding='utf-8')) except TypeError: a = json.loads(d) if X is None: X = [list() for i in range(self._num_terms(a))] for k, v in a.items(): try: k = int(k) X[k].append((row, self.convert(v))) except ValueError: if k == 'klass' or k == 'y': y.append(self.convert_label(v)) num_rows = len(l) X = [SparseArray.init_index_data([i[0] for i in x], [i[1] for i in x], num_rows) for x in X] if len(y) == 0: y = None else: y = np.array(y) return X, y def read_training_set(self): if self.data.training_set is None: return if not self.data.json: d = self.read_data(self.data.training_set) X = [] y = [] for x in d: X.append([self.convert(i) for i in x[:-1]]) y.append(self.convert_label(x[-1])) self.X = np.array(X) self.y = np.array(y) return True else: X, y = self.read_data_json(self.data.training_set) self.X = X self.y = y return True def read_test_set(self): if self.data.test_set is None: return False if not self.data.json: X = self.read_data(self.data.test_set) self.Xtest = np.array([[self.convert(i) for i in x] for x in X]) return True else: X, _ = self.read_data_json(self.data.test_set) self.Xtest = X return True def get_model_file(self): if self.data.model_file is None: a = self.data.training_set.split('.')[0] self.data.model_file = a + '.evodag.gz' return self.data.model_file def store_model(self, kw): if self.data.ensemble_size == 1: self.evo = EvoDAG(**kw).fit(self.X, self.y, test_set=self.Xtest) self.model = self.evo.model() else: seed = self.data.seed esize = self.data.ensemble_size args = [(x, kw, self.X, self.y, self.Xtest) for x in range(seed, seed+esize)] if self.data.cpu_cores == 1: evo = [init_evodag(x) for x in tqdm(args, total=len(args))] else: p = Pool(self.data.cpu_cores, maxtasksperchild=1) evo = [x for x in tqdm(p.imap_unordered(init_evodag, args), total=len(args))] p.close() self.model = Ensemble(evo) model_file = self.get_model_file() with gzip.open(model_file, 'w') as fpt: pickle.dump(self.model, fpt) pickle.dump(self.word2id, fpt) pickle.dump(self.label2id, fpt) def get_output_file(self): if self.data.output_file is None: self.data.output_file = self.data.test_set + '.evodag.csv' # if self.data.json: # self.data.output_file += '.json' # else: # self.data.output_file += '.csv' return self.data.output_file def id2label(self, x): if not self.data.classifier: return x if len(self.label2id) == 0: return x i2w = dict([(i[1], i[0]) for i in self.label2id.items()]) return [i2w[int(i)] for i in x] def main(self): pass