Ejemplo n.º 1
0
def test_negative_selection_false():
    from EvoDAG import EvoDAG
    from EvoDAG.population import SteadyState
    import numpy as np

    class P(SteadyState):
        def random_selection(self, negative=False):
            if negative:
                self._llamo = True
            return np.random.randint(self.popsize)

    Xt = X.copy()
    y = cl.copy()
    m = EvoDAG.init(seed=11,
                    popsize=10,
                    orthogonal_selection=True,
                    negative_selection=False,
                    population_class=P,
                    classifier=False,
                    early_stopping_rounds=10).fit(Xt, y)
    assert not m._negative_selection
    assert not m._p._negative_selection
    assert m._p._llamo
    m = EvoDAG.init(seed=11,
                    popsize=10,
                    orthogonal_selection=True,
                    negative_selection=True,
                    population_class=P,
                    classifier=False,
                    early_stopping_rounds=10).fit(Xt, y)
    try:
        m._p._llamo
    except AttributeError:
        return
    assert False
Ejemplo n.º 2
0
def test_init_evodag():
    from EvoDAG.model import EvoDAG
    m = EvoDAG().fit(X, cl)
    hy = m.predict(X)
    print((cl == hy).mean(), cl, hy)
    assert (cl == hy).mean() > 0.9
    default_nargs()
Ejemplo n.º 3
0
 def store_model(self, kw):
     if self.data.ensemble_size == 1:
         if self.data.seed >= 0:
             kw['seed'] = self.data.seed
         self.evo = EvoDAG(**kw).fit(self.X, self.y, test_set=self.Xtest)
         self.model = self.evo.model()
     else:
         min_size = self.data.min_size
         esize = self.data.ensemble_size
         init = self.data.seed
         end = init + esize
         evo = []
         while len(evo) < esize:
             args = [(x, kw, self.X, self.y, self.Xtest)
                     for x in range(init, end)]
             if self.data.cpu_cores == 1:
                 _ = [init_evodag(x) for x in tqdm(args, total=len(args))]
             else:
                 p = Pool(self.data.cpu_cores, maxtasksperchild=1)
                 _ = [
                     x for x in tqdm(p.imap_unordered(init_evodag, args),
                                     total=len(args))
                 ]
                 p.close()
             [evo.append(x) for x in _ if x.size >= min_size]
             init = end
             end = init + (esize - len(evo))
         self.model = Ensemble(evo)
     model_file = self.get_model_file()
     with gzip.open(model_file, 'w') as fpt:
         pickle.dump(self.model, fpt)
         pickle.dump(self.word2id, fpt)
         pickle.dump(self.label2id, fpt)
Ejemplo n.º 4
0
def test_model_hist():
    from EvoDAG import EvoDAG
    from EvoDAG.base import Model
    y = cl.copy()
    gp = EvoDAG(generations=np.inf,
                multiple_outputs=True,
                tournament_size=2,
                early_stopping_rounds=-1,
                seed=1,
                popsize=30).fit(X[:-10], y[:-10], test_set=X[-10:])
    hist = gp.population.hist
    trace = gp.trace(gp.population.estopping)
    a = hist[trace[-1]].variable
    if not isinstance(a, list):
        a = [a]
    m = Model(trace, hist)
    b = m._hist[-1].variable
    if not isinstance(b, list):
        b = [b]
    print([(x, x.height) for x in m._hist])
    print((m._map, a, b))
    for v1, v2 in zip(a, b):
        if v1 not in m._map:
            assert v1 == v2
        else:
            assert m._map[v1] == v2
Ejemplo n.º 5
0
def test_share_inputs():
    from EvoDAG import EvoDAG
    y = cl.copy()
    gp = EvoDAG(classifier=True,
                multiple_outputs=True,
                popsize=5,
                share_inputs=True)
    gp.fit(X, y)
    assert gp._share_inputs
Ejemplo n.º 6
0
def test_multiple_outputs_error_rate_ts():
    from EvoDAG import EvoDAG
    from EvoDAG.node import Add, Min, Max
    y = cl.copy()
    gp = EvoDAG(generations=np.inf,
                tournament_size=2,
                function_set=[Add, Min, Max],
                early_stopping_rounds=100,
                time_limit=0.9,
                multiple_outputs=True,
                fitness_function='ER',
                seed=0,
                popsize=100)
    gp.X = X[:-1]
    gp.nclasses(y[:-1])
    gp.y = y[:-1]
    gp.create_population()
    a = gp.random_offspring()
    hys = SparseArray.argmax(a.hy)
    hy = np.array(hys.full_array())
    # print(((hys - gp._y_klass).sign().fabs() * gp._mask_ts).sum())
    mask = np.array(gp._mask_ts.full_array()).astype(np.bool)
    # print((y[:-1][mask] != hy[mask]).mean())
    print(-a.fitness, (y[:-1][mask] != hy[mask]).mean())
    assert_almost_equals(-a.fitness, (y[:-1][mask] != hy[mask]).mean())
Ejemplo n.º 7
0
def rs_evodag(args_X_y):
    args, X, y = args_X_y
    rs = RandomParameterSearch
    fit = []
    init = time.time()
    for seed in range(3):
        evo = EvoDAG(seed=seed,
                     **rs.process_params(args)).fit(X, y)
        fit.append(evo.model().fitness_vs)
    args['_time'] = time.time() - init
    gc.collect()
    return fit, args
Ejemplo n.º 8
0
def test_min_class():
    from EvoDAG import EvoDAG
    y = cl.copy()
    gp = EvoDAG(generations=np.inf,
                tournament_size=2,
                early_stopping_rounds=100,
                time_limit=0.9,
                multiple_outputs=True,
                seed=0,
                popsize=100)
    gp.y = y[:-1]
    gp.X = X[:-1]
    assert gp._bagging_fitness.min_class == 2
Ejemplo n.º 9
0
def test_transform_to_mo():
    from EvoDAG import EvoDAG
    y = cl.copy()
    gp = EvoDAG(generations=np.inf,
                tournament_size=2,
                early_stopping_rounds=100,
                time_limit=0.9,
                multiple_outputs=True,
                seed=0,
                popsize=10000)
    gp.nclasses(y)
    k = np.unique(y)
    y = gp._bagging_fitness.transform_to_mo(y)
    assert k.shape[0] == y.shape[1]
Ejemplo n.º 10
0
def test_inputs_func_argument_regression():
    from EvoDAG import EvoDAG

    class Error:
        nargs = 2
        min_nargs = 2
        classification = True
        regression = True

        def __init__(self, *args, **kwargs):
            raise RuntimeError('aqui')

    y = cl.copy()
    y[y == 0] = -1
    y[y > -1] = 1
    gp = EvoDAG(classifier=False,
                multiple_outputs=False,
                pr_variable=0,
                input_functions=[Error],
                popsize=5,
                share_inputs=True)
    gp.X = X
    gp.nclasses(y)
    gp.y = y
    try:
        gp.create_population()
        assert False
    except RuntimeError:
        pass
Ejemplo n.º 11
0
def test_two_instances():
    from EvoDAG import EvoDAG
    y = cl.copy()
    y[:-2] = -1
    y[-2:] = 1
    function_set = [x for x in EvoDAG()._function_set if x.regression and x.nargs]
    gp = EvoDAG(generations=np.inf,
                tournament_size=2,
                classifier=False,
                function_set=function_set,
                early_stopping_rounds=-1,
                seed=0,
                popsize=10).fit(X, y, test_set=X)
    assert gp
Ejemplo n.º 12
0
def test_classification_mo2():
    from EvoDAG import EvoDAG
    y = cl.copy()
    gp = EvoDAG(generations=np.inf,
                tournament_size=2,
                early_stopping_rounds=10,
                time_limit=0.9,
                multiple_outputs=True,
                all_inputs=True,
                remove_raw_inputs=False,
                seed=0,
                popsize=10000)
    gp.X = X
    gp.nclasses(y)
    y = gp._bagging_fitness.transform_to_mo(y)
    y = [SparseArray.fromlist(x) for x in y.T]
    gp = EvoDAG(generations=np.inf,
                tournament_size=2,
                early_stopping_rounds=10,
                time_limit=0.9,
                multiple_outputs=True,
                all_inputs=True,
                seed=0,
                remove_raw_inputs=False,
                popsize=10000).fit(X, y)
    m = gp.model()
    print([(x, x._variable, x.height) for x in m._hist])
    # assert False
    assert len(m.decision_function(gp.X)) == 3
Ejemplo n.º 13
0
def test_g_recall():
    from EvoDAG import EvoDAG
    y = cl.copy()
    gp = EvoDAG(generations=np.inf,
                tournament_size=2,
                early_stopping_rounds=100,
                time_limit=0.9,
                multiple_outputs=True,
                seed=0,
                popsize=500)
    gp.y = y
    gp.X = X
    gp.create_population()
    off = gp.random_offspring()
    hy = SparseArray.argmax(off.hy)
    index = np.array(gp._mask_ts.index)
    y = np.array(gp._y_klass.full_array())[index]
    hy = np.array(hy.full_array())[index]
    nclasses = gp._bagging_fitness.nclasses
    recall = np.array([(hy[y == k] == k).mean() for k in range(nclasses)])
    score = np.prod(recall) - 1
    gp._fitness_function = 'g_recall'
    gp._bagging_fitness.set_fitness(off)
    assert_almost_equals(score, off.fitness)
    index = np.array(gp._mask_ts.full_array()) == 0
    y = np.array(gp._y_klass.full_array())[index]
    hy = SparseArray.argmax(off.hy)
    hy = np.array(hy.full_array())[index]
    recall = np.array([(hy[y == k] == k).mean() for k in range(nclasses)])
    score = np.prod(recall) - 1
    assert_almost_equals(score, off.fitness_vs)
Ejemplo n.º 14
0
def test_process_params():
    from EvoDAG.utils import RandomParameterSearch
    from EvoDAG import EvoDAG
    rs = RandomParameterSearch(npoints=1)
    args = [x for x in rs][0]
    evo = EvoDAG(**rs.process_params(args))
    params = evo.get_params()
    for k, v in args.items():
        if k in params:
            print(v, params[k])
            if hasattr(params[k], '__name__'):
                assert v == params[k].__name__
            else:
                assert v == params[k]
Ejemplo n.º 15
0
def test_multiple_outputs2():
    from EvoDAG import EvoDAG
    from EvoDAG.model import Model
    y = cl.copy()
    gp = EvoDAG(generations=np.inf,
                tournament_size=2,
                early_stopping_rounds=100,
                time_limit=0.9,
                multiple_outputs=True,
                seed=0,
                popsize=10000).fit(X, y, test_set=X)
    m = gp.model()
    assert isinstance(m, Model)
    assert len(gp.y) == 3
Ejemplo n.º 16
0
def rs_evodag(args_X_y):
    args, X, y = args_X_y
    rs = RandomParameterSearch
    fit = []
    init = time.time()
    for seed in range(3):
        try:
            evo = EvoDAG(seed=seed, **rs.process_params(args)).fit(X, y)
            fit.append(evo.model().fitness_vs)
        except RuntimeError:
            fit.append(-np.inf)
    args['_time'] = time.time() - init
    gc.collect()
    return fit, args
Ejemplo n.º 17
0
def test_a_precision():
    from EvoDAG.cython_utils import Score
    from EvoDAG import EvoDAG
    y = cl.copy()
    gp = EvoDAG(generations=np.inf,
                tournament_size=2,
                early_stopping_rounds=100,
                time_limit=0.9,
                multiple_outputs=True,
                seed=0,
                popsize=500)
    gp.y = y
    gp.X = X
    gp.create_population()
    off = gp.random_offspring()
    hy = SparseArray.argmax(off.hy)
    index = np.array(gp._mask_ts.index)
    y = np.array(gp._y_klass.full_array())[index]
    hy = np.array(hy.full_array())[index]
    nclasses = gp._bagging_fitness.nclasses
    precision = np.array([(y[hy == k] == k).mean() for k in range(nclasses)])
    f1 = Score(nclasses)
    mf1, mf1_v = f1.a_precision(gp._y_klass, SparseArray.argmax(off.hy),
                                gp._mask_ts.index)
    assert_almost_equals(np.mean(precision), mf1)
    gp._fitness_function = 'a_precision'
    gp._bagging_fitness.set_fitness(off)
    assert_almost_equals(mf1 - 1, off.fitness)
    index = np.array(gp._mask_ts.full_array()) == 0
    y = np.array(gp._y_klass.full_array())[index]
    hy = SparseArray.argmax(off.hy)
    hy = np.array(hy.full_array())[index]
    precision = np.array([(y[hy == k] == k).mean() for k in range(nclasses)])
    assert_almost_equals(np.mean(precision) - 1, off.fitness_vs)
Ejemplo n.º 18
0
def test_multiple_outputs_predict():
    from EvoDAG import EvoDAG
    y = cl.copy()
    gp = EvoDAG(generations=np.inf,
                tournament_size=2,
                multiple_outputs=True,
                early_stopping_rounds=-1,
                seed=0,
                popsize=10).fit(X[:-10], y[:-10], test_set=X[-10:])
    m = gp.model()
    assert m.multiple_outputs
    hy = m.predict(X)
    u = np.unique(y)
    for i in np.unique(hy):
        assert i in u
Ejemplo n.º 19
0
def test_multiple_outputs_decision_function():
    from EvoDAG import EvoDAG
    y = cl.copy()
    gp = EvoDAG(generations=np.inf,
                tournament_size=2,
                multiple_outputs=True,
                early_stopping_rounds=-1,
                seed=0,
                popsize=10).fit(X[:-10], y[:-10], test_set=X[-10:])
    m = gp.model()
    assert m.multiple_outputs
    hy = m.decision_function(X)
    assert len(hy) == 3
    for i in hy:
        assert i.isfinite()
Ejemplo n.º 20
0
def test_gp_population_full():
    Add.nargs = 2
    Mul.nargs = 2
    from EvoDAG.gp import Population
    from EvoDAG import EvoDAG
    fs = EvoDAG()._function_set

    class Population2(Population):
        def __init__(self, *args, **kwargs):
            super(Population2, self).__init__(*args, **kwargs)
            self._funcs = [Add, Sin]
            self._terms = [2, 0]

        def random_function(self):
            func = self._funcs.pop()
            if func.nargs == 1:
                return func(0, weight=1)
            return func(range(func.nargs), weight=np.ones(func.nargs))

        def random_terminal(self):
            return Variable(self._terms.pop(), 1)

    pop = Population2(fs, nterminals=3)
    ind = pop.create_random_ind_full(depth=2)
    assert len(pop._funcs) == 0 and len(pop._terms) == 0
    assert isinstance(ind[0], Sin) and isinstance(ind[1], Add)
    assert ind[2].variable == 0 and ind[3].variable == 2
    ind = Individual(ind)
    print(X.shape, ind.individual)
    hy = ind.decision_function(X)
    assert hy.isfinite()
    default_nargs()
Ejemplo n.º 21
0
def test_init_evodag_extras():
    from EvoDAG import EvoDAG
    from test_command_line import default_nargs
    m = EvoDAG.init(seed=10, popsize=10,
                    early_stopping_rounds=10).fit(X, cl)
    assert m.popsize == 10
    default_nargs()
Ejemplo n.º 22
0
def test_finite():
    from EvoDAG import EvoDAG
    evo = EvoDAG.init()
    evo._finite = False
    evo.fit(X, cl)
    hy = evo.predict(X)
    assert (hy == cl).mean() > 0.9
Ejemplo n.º 23
0
def init_evodag(seed_args_X_y_test):
    seed, args, X, y, test, dirname = seed_args_X_y_test
    if dirname is not None:
        output = os.path.join(dirname, '%s.evodag' % seed)
        if os.path.isfile(output):
            with gzip.open(output) as fpt:
                try:
                    return pickle.load(fpt)
                except Exception:
                    pass
    m = EvoDAG(seed=seed, **args).fit(X, y, test_set=test)
    m = m.model()
    gc.collect()
    if dirname is not None:
        with gzip.open(output, 'w') as fpt:
            pickle.dump(m, fpt)
    return m
Ejemplo n.º 24
0
def test_popsize_nvar():
    from EvoDAG import EvoDAG
    y = cl.copy()
    gp = EvoDAG.init(popsize='nvar', time_limit=5)
    print(X.shape)
    gp.fit(X, y)
    default_nargs()
    assert gp.population._popsize == (X.shape[1] + len(gp._input_functions))
Ejemplo n.º 25
0
def test_model_nvar():
    from EvoDAG import EvoDAG
    y = cl.copy()
    gp = EvoDAG(classifier=True,
                multiple_outputs=True,
                popsize=5,
                share_inputs=True)
    gp.fit(X, y)
    assert gp._share_inputs
    m = gp.model()
    print(X.shape)
    assert m.nvar == X.shape[1]
    try:
        m.predict(X[:, :3])
        assert False
    except RuntimeError:
        pass
Ejemplo n.º 26
0
def test_add_repeated_args():
    from EvoDAG import EvoDAG
    from EvoDAG.node import Add, Min, Max
    y = cl.copy()
    for ff in [Add, Min, Max]:
        ff.nargs = 10
        gp = EvoDAG(
            generations=np.inf,
            tournament_size=2,
            early_stopping_rounds=100,
            time_limit=0.9,
            # multiple_outputs=True,
            classifier=False,
            all_inputs=True,
            function_set=[ff],
            pr_variable=1,
            seed=0,
            popsize=10000)
        gp.X = X
        # gp.nclasses(y)
        gp.y = y
        gp.create_population()
        print(gp.population.population)
        node = gp.random_offspring()
        print(node, node._variable, X.shape)
        assert len(node._variable) <= X.shape[1]
        ff.nargs = 2
Ejemplo n.º 27
0
def test_finite():
    from EvoDAG import EvoDAG
    evo = EvoDAG.init()
    evo._finite = False
    evo.fit(X, cl)
    m = evo.model()
    hy = m.predict(X)
    print((hy == cl).mean(), [x.full_array() for x in m.decision_function(np.array(X))])
    assert (hy == cl).mean() > 0.9
Ejemplo n.º 28
0
def test_X_list():
    from EvoDAG import EvoDAG
    from test_command_line import default_nargs
    m = EvoDAG.init(seed=10, popsize=10,
                    early_stopping_rounds=10).fit(X.tolist(), cl)
    assert m.popsize == 10
    default_nargs()
    print(X.shape, len(m.X))
    assert len(m.X) == 4
Ejemplo n.º 29
0
def test_SteadyState_generation():
    from EvoDAG import EvoDAG
    y = cl.copy()
    y[y != 1] = -1
    gp = EvoDAG(population_class='SteadyState',
                all_inputs=True,
                classifier=False,
                early_stopping_rounds=1,
                popsize=2)
    gp.X = X
    gp.y = y
    gp.create_population()
    for i in range(3):
        gp.replace(gp.random_offspring())
    assert gp.population.generation == 2
Ejemplo n.º 30
0
 def process_params(a):
     from EvoDAG import EvoDAG
     fs_class = {}
     function_set = []
     for x in EvoDAG()._function_set:
         fs_class[x.__name__] = x
     args = {}
     for k, v in a.items():
         if k in fs_class:
             if not isinstance(v, bool):
                 fs_class[k].nargs = v
             if v:
                 function_set.append(fs_class[k])
         else:
             args[k] = v
         fs_evo = EvoDAG()._function_set
         fs_evo = filter(lambda x: x in function_set, fs_evo)
         args['function_set'] = [x for x in fs_evo]
     return args
Ejemplo n.º 31
0
def test_macro_F1():
    from EvoDAG.cython_utils import Score
    from EvoDAG import EvoDAG
    y = cl.copy()
    gp = EvoDAG(generations=np.inf,
                tournament_size=2,
                early_stopping_rounds=100,
                time_limit=0.9,
                multiple_outputs=True,
                seed=2,
                popsize=1000)
    gp.y = y
    gp.X = X
    gp.create_population()
    off = gp.random_offspring()
    hy = SparseArray.argmax(off.hy)
    index = np.array(gp._mask_ts.index)
    y = np.array(gp._y_klass.full_array())[index]
    hy = np.array(hy.full_array())[index]
    nclasses = gp._bagging_fitness.nclasses
    precision = np.array([(y[hy == k] == k).mean() for k in range(nclasses)])
    recall = np.array([(hy[y == k] == k).mean() for k in range(nclasses)])
    print(precision, recall)
    f1 = Score(nclasses)
    mf1, mf1_v = f1.a_F1(gp._y_klass, SparseArray.argmax(off.hy),
                         gp._mask_ts.index)
    for x, y in zip(precision, f1.precision):
        if not np.isfinite(x):
            continue
        assert_almost_equals(x, y)
    for x, y in zip(recall, f1.recall):
        if not np.isfinite(x):
            continue
        assert_almost_equals(x, y)
    _ = (2 * precision * recall) / (precision + recall)
    m = ~np.isfinite(_)
    _[m] = 0
    assert_almost_equals(np.mean(_), mf1)
    print(f1.precision, f1.recall, mf1, mf1_v)
    gp._fitness_function = 'macro-F1'
    gp._bagging_fitness.set_fitness(off)
    assert_almost_equals(off.fitness, mf1 - 1)
    assert_almost_equals(off.fitness_vs, mf1_v - 1)
    index = np.array(gp._mask_ts.full_array()) == 0
    y = np.array(gp._y_klass.full_array())[index]
    hy = SparseArray.argmax(off.hy)
    hy = np.array(hy.full_array())[index]
    precision = np.array([(y[hy == k] == k).mean() for k in range(nclasses)])
    recall = np.array([(hy[y == k] == k).mean() for k in range(nclasses)])
    _ = (2 * precision * recall) / (precision + recall)
    m = ~np.isfinite(_)
    _[m] = 0
    assert_almost_equals(np.mean(_) - 1, off.fitness_vs)
Ejemplo n.º 32
0
def test_process_params():
    from EvoDAG.utils import RandomParameterSearch
    from EvoDAG import EvoDAG
    rs = RandomParameterSearch(npoints=1)
    args = [x for x in rs][0]
    evo = EvoDAG(**rs.process_params(args))
    params = evo.get_params()
    for k, v in args.items():
        if k in params:
            if k == 'generations':
                v = np.inf
            print(k, v, params[k])
            if isinstance(v, list):
                for a, b in zip(v, params[k]):
                    assert a == b.__name__
            elif hasattr(params[k], '__name__'):
                assert v == params[k].__name__
            else:
                assert v == params[k]
Ejemplo n.º 33
0
def test_all_init_popsize():
    from EvoDAG import EvoDAG
    y = cl.copy()
    y[y != 1] = -1
    gp = EvoDAG(population_class='Generational',
                all_inputs=True,
                early_stopping_rounds=1,
                popsize=2)
    gp.X = X
    gp.y = y
    gp.create_population()
    assert gp.init_popsize == len(gp.X)
    gp = EvoDAG(population_class='Generational',
                # all_inputs=True,
                early_stopping_rounds=1,
                popsize=2)
    gp.X = X
    gp.y = y
    gp.create_population()
    assert gp.init_popsize == gp.popsize
Ejemplo n.º 34
0
def test_SteadyState_generation():
    from EvoDAG import EvoDAG
    y = cl.copy()
    y[y != 1] = -1
    gp = EvoDAG(population_class='SteadyState',
                all_inputs=True,
                early_stopping_rounds=1,
                popsize=2)
    gp.X = X
    gp.y = y
    gp.create_population()
    for i in range(3):
        gp.replace(gp.random_offspring())
    assert gp.population.generation == 2
Ejemplo n.º 35
0
def test_all_inputs2():
    from EvoDAG import EvoDAG
    y = cl.copy()
    y[y != 1] = -1
    gp = EvoDAG(population_class='Generational',
                all_inputs=True,
                popsize=3)
    gp.X = X
    gp.y = y
    gp.create_population()
    print(len(gp.population.population), len(gp.X))
    assert len(gp.population.population) == len(gp.X)
    for i in range(gp.popsize):
        a = gp.random_offspring()
        gp.replace(a)
    assert len(gp.population.population) == gp.popsize
Ejemplo n.º 36
0
def test_all_inputs():
    from EvoDAG import EvoDAG
    y = cl.copy()
    y[y != 1] = -1
    for pc in ['Generational', 'SteadyState']:
        gp = EvoDAG(population_class=pc,
                    all_inputs=True,
                    popsize=10)
        gp.X = X
        gp.y = y
        gp.create_population()
        assert len(gp.population.population) < 10
        for i in range(gp.population.popsize,
                       gp.population._popsize):
            a = gp.random_offspring()
            gp.replace(a)
        assert len(gp.population.population) == 10
Ejemplo n.º 37
0
def test_clean():
    from EvoDAG import EvoDAG
    y = cl.copy()
    y[y != 1] = -1
    for pc in ['Generational', 'SteadyState']:
        gp = EvoDAG(population_class=pc,
                    popsize=5)
        gp.X = X
        gp.y = y
        gp.create_population()
        for i in range(10):
            v = gp.random_offspring()
            gp.replace(v)
        pop = gp.population.population
        esi = gp.population.estopping
        for i in gp.population._hist:
            print(i == esi, i in pop, i, '-'*10, i.fitness)
            if i == esi:
                assert i.hy is not None
            elif i in pop:
                assert i.hy is not None
        assert gp.population.estopping.hy is not None
Ejemplo n.º 38
0
 def store_model(self, kw):
     if self.data.ensemble_size == 1:
         self.evo = EvoDAG(**kw).fit(self.X, self.y, test_set=self.Xtest)
         self.model = self.evo.model()
     else:
         seed = self.data.seed
         esize = self.data.ensemble_size
         args = [(x, kw, self.X, self.y, self.Xtest)
                 for x in range(seed, seed+esize)]
         if self.data.cpu_cores == 1:
             evo = [init_evodag(x) for x in tqdm(args, total=len(args))]
         else:
             p = Pool(self.data.cpu_cores, maxtasksperchild=1)
             evo = [x for x in tqdm(p.imap_unordered(init_evodag, args),
                                    total=len(args))]
             p.close()
         self.model = Ensemble(evo)
     model_file = self.get_model_file()
     with gzip.open(model_file, 'w') as fpt:
         pickle.dump(self.model, fpt)
         pickle.dump(self.word2id, fpt)
         pickle.dump(self.label2id, fpt)
Ejemplo n.º 39
0
def test_generational_generation():
    from EvoDAG.population import Generational
    from EvoDAG import EvoDAG
    gp = EvoDAG(population_class='Generational',
                popsize=10)
    gp.X = X
    y = cl.copy()
    y[y != 1] = -1
    gp.y = y
    gp.create_population()
    assert isinstance(gp.population, Generational)
    p = []
    for i in range(gp.popsize-1):
        a = gp.random_offspring()
        p.append(a)
        gp.replace(a)
    assert len(gp.population._inner) == (gp.popsize - 1)
    a = gp.random_offspring()
    p.append(a)
    gp.replace(a)
    assert len(gp.population._inner) == 0
    for a, b in zip(gp.population.population, p):
        assert a == b
Ejemplo n.º 40
0
def init_evodag(seed_args_X_y_test):
    seed, args, X, y, test = seed_args_X_y_test
    m = EvoDAG(seed=seed, **args).fit(X, y, test_set=test)
    m = m.model()
    gc.collect()
    return m
Ejemplo n.º 41
0
def test_models_fitness_vs():
    from EvoDAG import EvoDAG
    evo = EvoDAG(popsize=10, early_stopping_rounds=2).fit(X, cl)
    l_fs = [x.fitness_vs for x in evo.model().models]
    assert evo.model().fitness_vs == np.median(l_fs)
Ejemplo n.º 42
0
def test_random_generations():
    from EvoDAG import EvoDAG
    from EvoDAG.population import SteadyState

    class P(SteadyState):
        def random_selection(self, negative=False):
            raise RuntimeError('!')
    y = cl.copy()
    y[y != 1] = -1
    for pop in ['SteadyState', 'Generational', P]:
        gp = EvoDAG(population_class=pop,
                    all_inputs=True, random_generations=1,
                    early_stopping_rounds=1, popsize=2)
        gp.X = X
        gp.y = y
        gp.create_population()
        print(gp.population._random_generations)
        assert gp.population._random_generations == 1
        if pop == P:
            try:
                ind = gp.random_offspring()
                gp.replace(ind)
                assert False
            except RuntimeError:
                pass
        else:
            for i in range(3):
                gp.replace(gp.random_offspring())
            assert gp.population.generation == 2
Ejemplo n.º 43
0
class CommandLine(object):
    def version(self):
        pa = self.parser.add_argument
        pa('--version',
           action='version', version='EvoDAG %s' % evodag.__version__)

    def output_file(self):
        self.parser.add_argument('-o', '--output-file',
                                 help='File to store the test set',
                                 dest='output_file',
                                 default=None,
                                 type=str)

    def ensemble(self):
        self.parser.add_argument('-n', '--ensemble-size',
                                 help='Ensemble size',
                                 dest='ensemble_size',
                                 default=1,
                                 type=int)

    def cores(self):
        self.parser.add_argument('-u', '--cpu-cores',
                                 help='Number of cores',
                                 dest='cpu_cores',
                                 default=1,
                                 type=int)

    def test_set(self):
        cdn = 'File containing the test set on csv.'
        self.parser.add_argument('-t', '--test_set',
                                 default=None, type=str,
                                 help=cdn)

    def init_params(self):
        pa = self.parser.add_argument
        g = self.parser.add_mutually_exclusive_group(required=True)
        g.add_argument('-C', '--classifier', dest='classifier',
                       help='The task is classification (default)',
                       default=True,
                       action="store_true")
        g.add_argument('-R', '--regressor', dest='regressor',
                       help='The task is regression',
                       action="store_true")
        pa('-e', '--early_stopping_rounds', dest='early_stopping_rounds',
           type=int,
           help='Early stopping rounds')
        pa('-p', '--popsize', dest='popsize',
           type=int, help='Population size')
        pa('-s', '--seed', dest='seed',
           default=0,
           type=int, help='Seed')
        pa('-j', '--json', dest='json',
           action="store_true",
           help='Whether the inputs are in json format',
           default=False)
        pa('--evolution', dest='population_class',
           help="Type of evolution (SteadyState|Generational)",
           type=str)
        pa('--all-inputs', dest='all_inputs',
           help="The initial population has all the available inputs ",
           action="store_true")
        pa('--time-limit', dest='time_limit',
           help='Time limit in seconds', type=int)
        pa('--random-generations', dest='random_generations',
           help='Number of random generations', type=int)

    def training_set(self):
        cdn = 'File containing the training set on csv.'
        self.parser.add_argument('training_set',
                                 nargs='?',
                                 default=None,
                                 help=cdn)

    def parse_args(self):
        self.data = self.parser.parse_args()
        if hasattr(self.data, 'regressor') and self.data.regressor:
            self.data.classifier = False
        self.main()

    def convert(self, x):
        try:
            return float(x)
        except ValueError:
            if x not in self.word2id:
                self.word2id[x] = len(self.word2id)
            return self.word2id[x]

    def convert_label(self, x):
        try:
            return float(x)
        except ValueError:
            if x not in self.label2id:
                self.label2id[x] = len(self.label2id)
            return self.label2id[x]

    def read_data(self, fname):
        with open(fname, 'r') as fpt:
            l = fpt.readlines()
        X = []
        for i in l:
            x = i.rstrip().lstrip()
            if len(x):
                X.append([i for i in x.split(',')])
        return X

    @staticmethod
    def _num_terms(a):
        if 'num_terms' in a:
            num_terms = a['num_terms']
        else:
            num_terms = len(a)
            if 'klass' in a:
                num_terms -= 1
        return num_terms

    def read_data_json(self, fname):
        import json
        X = None
        y = []
        if fname.endswith('.gz'):
            with gzip.open(fname, 'rb') as fpt:
                l = fpt.readlines()
        else:
            with open(fname, 'r') as fpt:
                l = fpt.readlines()
        for row, d in enumerate(l):
            try:
                a = json.loads(str(d, encoding='utf-8'))
            except TypeError:
                a = json.loads(d)
            if X is None:
                X = [list() for i in range(self._num_terms(a))]
            for k, v in a.items():
                try:
                    k = int(k)
                    X[k].append((row, self.convert(v)))
                except ValueError:
                    if k == 'klass' or k == 'y':
                        y.append(self.convert_label(v))
        num_rows = len(l)
        X = [SparseArray.init_index_data([i[0] for i in x],
                                         [i[1] for i in x],
                                         num_rows) for x in X]
        if len(y) == 0:
            y = None
        else:
            y = np.array(y)
        return X, y

    def read_training_set(self):
        if self.data.training_set is None:
            return
        if not self.data.json:
            d = self.read_data(self.data.training_set)
            X = []
            y = []
            for x in d:
                X.append([self.convert(i) for i in x[:-1]])
                y.append(self.convert_label(x[-1]))
            self.X = np.array(X)
            self.y = np.array(y)
            return True
        else:
            X, y = self.read_data_json(self.data.training_set)
            self.X = X
            self.y = y
            return True

    def read_test_set(self):
        if self.data.test_set is None:
            return False
        if not self.data.json:
            X = self.read_data(self.data.test_set)
            self.Xtest = np.array([[self.convert(i) for i in x] for x in X])
            return True
        else:
            X, _ = self.read_data_json(self.data.test_set)
            self.Xtest = X
            return True

    def get_model_file(self):
        if self.data.model_file is None:
            a = self.data.training_set.split('.')[0]
            self.data.model_file = a + '.evodag.gz'
        return self.data.model_file

    def store_model(self, kw):
        if self.data.ensemble_size == 1:
            self.evo = EvoDAG(**kw).fit(self.X, self.y, test_set=self.Xtest)
            self.model = self.evo.model()
        else:
            seed = self.data.seed
            esize = self.data.ensemble_size
            args = [(x, kw, self.X, self.y, self.Xtest)
                    for x in range(seed, seed+esize)]
            if self.data.cpu_cores == 1:
                evo = [init_evodag(x) for x in tqdm(args, total=len(args))]
            else:
                p = Pool(self.data.cpu_cores, maxtasksperchild=1)
                evo = [x for x in tqdm(p.imap_unordered(init_evodag, args),
                                       total=len(args))]
                p.close()
            self.model = Ensemble(evo)
        model_file = self.get_model_file()
        with gzip.open(model_file, 'w') as fpt:
            pickle.dump(self.model, fpt)
            pickle.dump(self.word2id, fpt)
            pickle.dump(self.label2id, fpt)

    def get_output_file(self):
        if self.data.output_file is None:
            self.data.output_file = self.data.test_set + '.evodag.csv'
            # if self.data.json:
            #     self.data.output_file += '.json'
            # else:
            #     self.data.output_file += '.csv'
        return self.data.output_file

    def id2label(self, x):
        if not self.data.classifier:
            return x
        if len(self.label2id) == 0:
            return x
        i2w = dict([(i[1], i[0]) for i in self.label2id.items()])
        return [i2w[int(i)] for i in x]

    def main(self):
        pass