Esempio n. 1
0
File: gp.py Progetto: jmmcd/PODI
def semantic_geometric_mutate(t, ms=0.001, rt_size=3,
                              one_tree=False, rt_method="grow"):
    """Semantic geometric mutation as defined by Moraglio et al:

    tm = t + ms * (tr1 - tr2)

    where ms is the mutation step (make it small for local search),
    and tr1 and tr2 are randomly-generated trees.

    Set one_tree=True to use tm = t + ms * tr1. Make sure ms is
    symmetric about zero in that case.

    Set rt_method="grow" to use standard GP grow method. rt_size
    will give max depth. Use "bubble_down" to generate using
    bubble-down method. rt_size will give number of nodes.
    """

    if rt_method == "grow":
        tr1 = grow(rt_size, random)
    else:
        tr1 = bd.bubble_down(rt_size, random)[0]
    if one_tree:
        return ['+', t, ['*', ms, tr1]]
    if rt_method == "grow":
        tr2 = grow(rt_size, random)
    elif rt_method == "bubble_down":
        tr2 = bd.bubble_down(rt_size, random)[0]
    else:
        raise ValueError
    return ['+', t, ['*', ms, ['-', tr1, tr2]]]
Esempio n. 2
0
File: lid.py Progetto: jmmcd/PODI
import random
import bubble_down

vars = list("x")
fns = {"j": 2}

class Lid:

    def __init__(self, target_depth, target_terminals=256, weight_depth=50, weight_terminals=50):
         self.target_depth = float(target_depth)
         self.target_terminals = target_terminals
         self.weight_depth = weight_depth
         self.weight_terminals = float(weight_terminals)
         total_weight = weight_depth + self.weight_terminals
         if total_weight != 100.0:
             raise ValueError('Lid weight_depth + weight_terminals != 100', total_weight)

    def __call__(self, ind):
        tree, nnodes, actual_depth = ind
        actual_terminals = 1 + nnodes / 2 # truncating division
        metric_depth = self.weight_depth * (
            1.0 - float(abs(self.target_depth - actual_depth))/self.target_depth)
        metric_terminals = 0
        if self.target_depth == actual_depth:
            metric_terminals = self.weight_terminals * (
                1.0 - float(abs(self.target_terminals - actual_terminals))/self.target_terminals)
        return metric_depth + metric_terminals

lid = Lid(10)
print(lid(bubble_down.bubble_down(25, random)))
Esempio n. 3
0
File: lid.py Progetto: squeakus/PODI
import bubble_down

vars = list("x")
fns = {"j": 2}

class Lid:

    def __init__(self, target_depth, target_terminals=256, weight_depth=50, weight_terminals=50):
         self.target_depth = float(target_depth)
         self.target_terminals = target_terminals
         self.weight_depth = weight_depth
         self.weight_terminals = float(weight_terminals)
         total_weight = weight_depth + self.weight_terminals
         if total_weight != 100.0:
             raise ValueError('Lid weight_depth + weight_terminals != 100', total_weight)

    def __call__(self, ind):
        tree, nnodes, actual_depth = ind
        actual_terminals = 1 + nnodes / 2 # truncating division
        metric_depth = self.weight_depth * (
            1.0 - float(abs(self.target_depth - actual_depth))/self.target_depth)
        metric_terminals = 0
        if self.target_depth == actual_depth:
            metric_terminals = self.weight_terminals * (
                1.0 - float(abs(self.target_terminals - actual_terminals))/self.target_terminals)
        return metric_depth + metric_terminals

lid = Lid(10)
print(lid(bubble_down.bubble_down(25)))
Esempio n. 4
0
File: gp.py Progetto: jmmcd/PODI
def hillclimb(fitness_fn_key, mutation_type="optimal_ms",
              rt_method="grow", rt_size=3,
              ngens=200, popsize=1, init_popsize=1, print_every=10):
    """Hill-climbing optimisation. """

    fitness_fn = fitness.benchmarks(fitness_fn_key)
    extra_fitness_fn = fitness.benchmarks(fitness_fn_key + "_class")
    set_fns_leaves(fitness_fn.arity)
    evals = 0

    raw_returns = np.genfromtxt("/Users/jmmcd/Dropbox/GSGP-ideas-papers/finance/" +
                                fitness_fn_key + ".txt").T[0][-418:]

    print("#generation evaluations fitness_rmse fitness_rmse_test class_acc class_acc_test returns_50 sig_50 returns_100 sig_100 returns_end sig_end best_phenotype_length best_phenotype")
    # Generate an initial solution and make sure it doesn't return an
    # error because if it does, in GSGP that error will always be present.
    si_out = None
    ft = float(sys.maxint)
    while si_out is None:
        if rt_method == "grow":
            s = [grow(rt_size, random) for i in range(init_popsize)]
        elif rt_method == "bubble_down":
            s = [bd.bubble_down(rt_size, random)[0] for i in range(init_popsize)]
        else:
            raise ValueError

        for si in s:
            # Evaluate child
            fnsi = make_fn(si)
            fsi, si_out = fitness_fn.get_semantics(fnsi)

            # Keep the child only if better
            if fsi < ft:
                t, ft, fnt = si, fsi, fnsi
        evals += init_popsize

    for gen in xrange(ngens):

        # make a lot of new individuals by mutation
        if mutation_type == "GSGP-optimal-ms":
            # Mutate and differentiate to get the best possibility
            s = [semantic_geometric_mutate_differentiate(t, fitness_fn,
                                                         rt_size=rt_size,
                                                         rt_method=rt_method)
                 for i in range(popsize)]

        elif mutation_type == "GSGP":
            # ms=0.001 as in Moraglio
            s = [semantic_geometric_mutate(t, 0.001,
                                           rt_size=rt_size,
                                           one_tree=False,
                                           rt_method=rt_method)
                 for i in range(popsize)]

        elif mutation_type == "GSGP-one-tree":
            # mutation step size randomly chosen
            s = [semantic_geometric_mutate(t, np.random.normal(),
                                           rt_size=rt_size,
                                           one_tree=True,
                                           rt_method=rt_method)
                 for i in range(popsize)]

        elif mutation_type == "GP":
            # don't use rt_size since it's = 2. use 12, the default
            s = [subtree_mutate(t)
                 for i in range(popsize)]
        else:
            raise ValueError("Unknown mutation type " + mutation_type)

        # test the new individuals and keep only the single best
        for si in s:
            # Evaluate child
            fnsi = make_fn(si)
            fsi, si_out = fitness_fn.get_semantics(fnsi)

            # Keep the child only if better
            if fsi < ft:
                t, ft, fnt = si, fsi, fnsi

        test_rmse, yhat_test = fitness_fn.get_semantics(fnt, test=True)

        evals += popsize
        if gen % print_every == 0:
            length = iter_len(traverse(t))
            # This is horrible: if t is just a single variable eg x0,
            # then str(t) -> x0, instead of 'x0'. Hack around it.
            if isatom(t):
                str_t = "'" + t + "'"
            else:
                str_t = str(t)

            returns, sig_50, sig_100, sig_end = accum_returns(raw_returns, yhat_test)
            print("%d %d %f %f %f %f %f %d %f %d %f %d %d : %s" % (
                    gen, evals,
                    ft, test_rmse,
                    extra_fitness_fn(fnt),
                    extra_fitness_fn.test(fnt),
                    returns[50],
                    sig_50,
                    returns[100],
                    sig_100,
                    returns[417],
                    sig_end,
                    length, str_t))

    print "ACCUMULATE RETURNS"
    for val in returns: print val
Esempio n. 5
0
File: gp.py Progetto: jmmcd/PODI
def semantic_geometric_mutate_differentiate(t, fitness_fn, rt_size=3,
                                            rt_method="grow"):
    """Semantic geometric mutation with differentiation:

    tm = t + ms * tr

    where tr is a randomly-generated tree and ms is the mutation step,
    which can be negative, found by diffentiating the new error
    RMSE(y, t + ms * tr) with respect to ms. To make this work the
    mutation operator needs to be able to evaluate, so we have to pass
    in the fitness function.

    Set rt_method="grow" to use standard GP grow method. rt_size
    will give max depth. Use "bubble_down" to generate using
    bubble-down method. rt_size will give number of nodes.

    The optimum mutation step ms is such that RMSE is minimised. But
    minimising RMSE is equivalent to minimising mean square error
    (MSE):

    MSE = mean((y - (t + ms*tr))**2)
        = mean(((y-t) - ms*tr)**2)
        = mean((y-t)**2 - 2*(y-t)*ms*tr + ms**2*tr**2)

    Differentiate wrt ms:

    d(MSE)/d(ms) = mean(-2*(y-t)*tr + 2*ms*tr**2)
                 = -2*mean((y-t)*tr) + 2*ms*mean(tr**2)

    This is zero when:

    2*mean((y-t)*tr) = 2*ms*mean(tr**2)

    Therefore the optimum ms is:

    ms = mean((y-t)*tr) / mean(tr**2)"""


    # Generate a tree tr and make sure it won't return all zeros,
    # which would trigger a divide-by-zero. Start with all zeros to
    # get into the while loop.
    tr_out = None
    while (tr_out is None) or (np.mean(tr_out**2) < 0.000001):
        if rt_method == "grow":
            tr = grow(rt_size, random)
        elif rt_method == "bubble_down":
            tr = bd.bubble_down(rt_size, random)[0]
        else:
            raise ValueError
        _, tr_out = fitness_fn.get_semantics(make_fn(tr))
        #print(s)
        # if s_tr[1] is not None and np.sum(s_tr[1]) > 0.0000001:
        #     tr_out = s_tr[1]
        # else:
        #     continue

    _, t_out = fitness_fn.get_semantics(make_fn(t)) # should be cached already
    y = fitness_fn.train_y

    # formula from above comment
    ms = np.mean((y-t_out)*tr_out) / np.mean(tr_out**2)

    # TODO if ms is close to zero, we could reject the step and try
    # again, for a kind of ad-hoc regularisation. The threshold could
    # be annealed during the run, perhaps. For now, just accept the
    # step regardless.

    return ['+', t, ['*', ms, tr]]