Beispiel #1
0
def test_lexicase_shapes():
    """test_selection.py: lexicase selection returns correct shape"""
    few = FEW(seed_with_ml=False, population_size=257)
    few.term_set = [node('x', loc=0)]
    pop = few.init_pop()
    offspring, locs = few.lexicase(pop.individuals)
    assert len(offspring) == 257

    # smaller popsize than tournament size
    few = FEW(seed_with_ml=False, population_size=2)
    few.term_set = [node('x', loc=0)]
    pop = few.init_pop()
    offspring, locs = few.lexicase(pop.individuals)
    assert len(offspring) == 2
Beispiel #2
0
def test_epsilon_lexicase_shapes():
    """test_selection.py: epsilon lexicase selection returns correct shape"""
    np.random.seed(42)
    few = FEW(seed_with_ml=False, population_size=257, lex_size=False)
    few.term_set = [node('x', loc=0)]
    pop = few.init_pop()
    offspring = few.epsilon_lexicase(np.random.rand(257, 100), [])
    assert len(offspring) == 257

    # smaller popsize than tournament size
    few = FEW(seed_with_ml=False, population_size=2, lex_size=False)
    few.term_set = [node('x', loc=0)]
    pop = few.init_pop()
    offspring = few.epsilon_lexicase(np.random.rand(2, 100), [])
    assert len(offspring) == 2
Beispiel #3
0
def test_mutate_makes_valid_program():
    """test_variation.py: mutation makes valid programs """
    func_set = [
        node('+'),
        node('-'),
        node('*'),
        node('/'),
        node('sin'),
        node('cos'),
        node('exp'),
        node('log'),
        node('^2'),
        node('^3'),
        node('sqrt')
    ]
    # terminal set
    term_set = []
    # numbers represent column indices of features
    term_set = [node('x', loc=i) for i in np.arange(10)]
    term_set += [node('k', value=np.random.rand()) for i in np.arange(10)]
    # program
    p = [
        node('k', value=5),
        node('x', loc=6),
        node('/'),
        node('k', value=7),
        node('x', loc=8),
        node('*'),
        node('-')
    ]
    # test 1000 mutation events
    few = FEW()
    for i in np.arange(1000):
        few.mutate(p, func_set, term_set)
        assert is_valid_program(p)
Beispiel #4
0
def test_cross_makes_valid_program():
    """test_variation.py: crossover makes valid programs """
    # np.random.seed(65)
    # I = (a+b)*x
    p1 = [
        node('x', loc=1),
        node('x', loc=2),
        node('+'),
        node('x', loc=3),
        node('*')
    ]
    # J = (x/z)-(n*b)
    p2 = [
        node('x', loc=1),
        node('x', loc=2),
        node('/'),
        node('k', value=3.7),
        node('x', loc=4),
        node('*'),
        node('-')
    ]
    # test 1000 crossover events
    few = FEW()
    for i in np.arange(1000):
        few.cross(p1, p2)
        assert is_valid_program(p1) and is_valid_program(p2)
Beispiel #5
0
def test_out_shapes():
    """test_evaluation.py: program output is correct size """
    # load test data set
    boston = load_boston()
    # boston.data = boston.data[::10]
    # boston.target = boston.target[::10]
    n_features = boston.data.shape[1]
    # function set

    # terminal set
    term_set = []
    # numbers represent column indices of features
    for i in np.arange(n_features):
        term_set.append(node('x', loc=i))  # features
        # term_set.append(('k',0,np.random.rand())) # ephemeral random constants

    # initialize population
    pop_size = 5
    few = FEW(population_size=pop_size, seed_with_ml=False)
    few.term_set = term_set
    few.n_features = n_features
    pop = few.init_pop()

    pop.X = np.asarray(
        list(map(lambda I: few.out(I, boston.data), pop.individuals)))

    #pop.X = out(pop.individuals[0],boston.data,boston.target)
    print("pop.X.shape:", pop.X.shape)
    print("boston.target.shape", boston.target.shape)
    assert pop.X.shape == (pop_size, boston.target.shape[0])
Beispiel #6
0
def test_separation():
    """test_evaluation: separation"""
    # perfect separation
    x = np.hstack((np.zeros(50), np.ones(50)))
    y = np.hstack((np.zeros(50), np.ones(50)))
    few = FEW()
    mean_separation = few.separation(x, y)
    sample_separation = few.separation(x, y, samples=True)
    print('mean_separation:', mean_separation)
    print('sample_separation', sample_separation)
    assert (mean_separation == 1)
    assert (np.mean(sample_separation) == mean_separation)

    # perfect separation
    x = np.hstack((np.ones(50), np.zeros(50)))
    y = np.hstack((np.zeros(50), np.ones(50)))

    mean_separation = few.separation(x, y)
    sample_separation = few.separation(x, y, samples=True)
    print('mean_separation:', mean_separation)
    print('sample_separation', sample_separation)
    assert (mean_separation == 1)
    assert (np.mean(sample_separation) == mean_separation)

    # half separation
    x = np.hstack((np.ones(25), np.zeros(25), np.ones(25), np.zeros(25)))
    y = np.hstack((np.zeros(50), np.ones(50)))

    mean_separation = few.separation(x, y)
    sample_separation = few.separation(x, y, samples=True)
    print('mean_separation:', mean_separation)
    print('sample_separation', sample_separation)
    assert (mean_separation == 0.25)
    assert (np.mean(sample_separation) == mean_separation)
Beispiel #7
0
def test_calc_fitness_shape():
    """test_evaluation.py: calc_fitness correct shapes """
    # load test data set
    boston = load_boston()
    # boston.data = boston.data[::10]
    # boston.target = boston.target[::10]
    n_features = boston.data.shape[1]
    # terminal set
    term_set = []
    # numbers represent column indices of features
    for i in np.arange(n_features):
        term_set.append(node('x', loc=i))  # features
        # term_set.append(('k',0,np.random.rand())) # ephemeral random constants

    # initialize population
    pop_size = 5
    few = FEW(population_size=pop_size, seed_with_ml=False)
    few.term_set = term_set
    few.n_features = n_features
    pop = few.init_pop()

    pop.X = np.asarray(
        list(map(lambda I: few.out(I, boston.data), pop.individuals)))

    fitnesses = few.calc_fitness(pop.X, boston.target, 'mse', 'tournament')
    assert len(fitnesses) == len(pop.individuals)

    # test vectorized fitnesses
    vec_fitnesses = few.calc_fitness(pop.X, boston.target, 'mse', 'lexicase')
    fitmat = np.asarray(vec_fitnesses)
    print("fitmat.shape:", fitmat.shape)
    assert fitmat.shape == (len(pop.individuals), boston.target.shape[0])
Beispiel #8
0
def test_lex_size():
    """test_selection.py: lex_size flag on/off"""

    few = FEW(seed_with_ml=False, population_size=257, lex_size=True)

    Fitness_mat = np.random.rand(257, 10)
    size_mat = np.random.randint(1, 100, size=257)

    locs = few.epsilon_lexicase(Fitness_mat,
                                size_mat,
                                num_selections=100,
                                survival=True)
    assert len(locs) == 100

    few = FEW(seed_with_ml=False, population_size=257, lex_size=False)

    Fitness_mat = np.random.rand(257, 10)
    size_mat = np.random.rand(257, 1)

    locs = few.epsilon_lexicase(Fitness_mat,
                                size_mat,
                                num_selections=100,
                                survival=True)
    assert len(locs) == 100
Beispiel #9
0
def test_lexicase_survival_shapes():
    """test_selection.py: lexicase survival returns correct shape"""
    # func_set = [node('+'), node('-'), node('*'), node('/'), node('sin'),
    #                  node('cos'), node('exp'),node('log'), node('^2'),
    #                  node('^3'), node('sqrt')]
    # terminal set
    term_set = []
    n_features = 3
    # numbers represent column indices of features
    # for i in np.arange(n_features):
    #     term_set.append(node('x',loc=i)) # features
    term_set = [node('x', loc=i) for i in np.arange(n_features)]
    # term_set.append(('erc',0,np.random.rand())) # ephemeral random constants

    few = FEW(seed_with_ml=False, population_size=257)
    few.term_set = term_set
    pop = few.init_pop()

    for i in pop.individuals:
        i.fitness_vec = list(np.random.rand(10, 1))

    offspring, locs = few.lexicase(pop.individuals,
                                   num_selections=100,
                                   survival=True)
    assert len(offspring) == 100

    # smaller popsize than tournament size
    ew = FEW(seed_with_ml=False, population_size=2)
    few.term_set = term_set
    pop = few.init_pop()
    for i in pop.individuals:
        i.fitness_vec = np.random.rand(10, 1)
    offspring, locs = few.lexicase(pop.individuals,
                                   num_selections=1,
                                   survival=True)
    assert len(offspring) == 1
Beispiel #10
0
def test_pop_init():
    """test_population.py: population initialization makes valid trees """
    # define function set
    # function set
    # func_set = [('+',2),('-',2),('*',2),('/',2),('sin',1),('cos',1),('exp',1),('log',1)]
    # terminal set
    term_set = []
    n_features = 3
    # numbers represent column indices of features
    term_set = [node('x',loc=i) for i in np.arange(n_features)]
        # term_set.append(('erc',0,np.random.rand())) # ephemeral random constants
    few = FEW(seed_with_ml=False)
    few.term_set = term_set
    few.n_features=n_features
    pop = few.init_pop()

    for I in pop.individuals:
        assert is_valid_program(I.stack)
Beispiel #11
0
def test_inertia():
    """test_evaluation.py: inertia works"""
    import pdb
    # perfect inertia
    x = np.hstack((np.zeros(50), np.ones(50)))
    y = np.hstack((np.zeros(50), np.ones(50)))
    few = FEW()
    mean_inertia = few.inertia(x, y)
    sample_inertia = few.inertia(x, y, samples=True)
    assert (mean_inertia == 0)
    assert (np.mean(sample_inertia) == mean_inertia)

    # half inertia
    x = np.hstack((np.ones(25), np.zeros(25), np.ones(25), np.zeros(25)))
    y = np.hstack((np.zeros(50), np.ones(50)))

    mean_inertia = few.inertia(x, y)
    sample_inertia = few.inertia(x, y, samples=True)
    print('mean_inertia:', mean_inertia)
    print('sample_inertia', sample_inertia)
    assert (mean_inertia == 0.25)
    assert (np.mean(sample_inertia) == mean_inertia)
Beispiel #12
0
from few import FEW
import pandas as pd
from sklearn.model_selection import train_test_split

dataset = 'd_enc.txt'

input_data = pd.read_csv(dataset,sep=None,engine='python')

#generate train/test split
train_i, test_i = train_test_split(input_data.index, train_size=0.75, test_size=0.25)

# training data
X_train = input_data.loc[train_i].drop('label', axis=1).values
Y_train = input_data.loc[train_i, 'label'].values

#testing data
X_test = input_data.loc[test_i].drop('label', axis=1).values
Y_test = input_data.loc[test_i, 'label'].values

few = FEW(verbosity=1)
few.fit(X_train,Y_train)

print('\nTraining accuracy: {}'.format(few.score(X_train, Y_train)))
print('Holdout accuracy: {}'.format(few.score(X_test, Y_test)))
print('\Model: {}'.format(few.print_model()))
Beispiel #13
0
def test_out_is_correct():
    """test_evaluation.py: output matches known function outputs """

    boston = load_boston()
    n_features = boston.data.shape[1]
    X = boston.data
    Y = boston.target
    p1 = Ind()
    p2 = Ind()
    p3 = Ind()
    p4 = Ind()
    p5 = Ind()

    p1.stack = [
        node('x', loc=4),
        node('x', loc=5),
        node('-'),
        node('k', value=0.175),
        node('log'),
        node('-')
    ]

    p2.stack = [node('x', loc=7), node('x', loc=8), node('*')]

    p3.stack = [
        node('x', loc=0),
        node('exp'),
        node('x', loc=5),
        node('x', loc=7),
        node('*'),
        node('/')
    ]

    p4.stack = [node('x', loc=12), node('sin')]

    p5.stack = [
        node('k', value=178.3),
        node('x', loc=8),
        node('*'),
        node('x', loc=7),
        node('cos'),
        node('+')
    ]
    few = FEW()
    y1 = few.safe(np.log(0.175) - (X[:, 5] - X[:, 4]))
    y2 = few.safe(X[:, 7] * X[:, 8])
    y3 = few.safe(divs(X[:, 5] * X[:, 7], np.exp(X[:, 0])))
    y4 = few.safe(np.sin(X[:, 12]))
    y5 = few.safe(178.3 * X[:, 8] + np.cos(X[:, 7]))

    # y1,y2,y3,y4,y5 = safe(y1),safe(y2),safe(y3),safe(y4),safe(y5)
    few = FEW()
    assert np.array_equal(y1, few.out(p1, X))
    print("y1 passed")
    assert np.array_equal(y2, few.out(p2, X))
    print("y2 passed")
    assert np.array_equal(y3, few.out(p3, X))
    print("y3 passed")
    # print("y4:",y4,"y4hat:",few.out(p4,X,Y))
    assert np.array_equal(y4, few.out(p4, X))
    print("y4 passed")
    assert np.array_equal(y5, few.out(p5, X))
Beispiel #14
0
from few import FEW
import pandas as pd
from sklearn.model_selection import train_test_split

dataset = 'data/d_enc.txt'

input_data = pd.read_csv(dataset, sep=None, engine='python')

#generate train/test split
train_i, test_i = train_test_split(input_data.index,
                                   train_size=0.75,
                                   test_size=0.25,
                                   random_state=10)

# training data
X_train = input_data.loc[train_i].drop('label', axis=1).values
Y_train = input_data.loc[train_i, 'label'].values

#testing data
X_test = input_data.loc[test_i].drop('label', axis=1).values
Y_test = input_data.loc[test_i, 'label'].values

few = FEW(random_state=10, verbosity=1)
few.fit(X_train, Y_train)

print('\nTraining accuracy: {}'.format(few.score(X_train, Y_train)))
print('Holdout accuracy: {}'.format(few.score(X_test, Y_test)))
print('\Model: {}'.format(few.print_model()))