def runTest(self): from dcgpy import symbolic_regression, generate_koza_quintic, kernel_set_double import pygmo as pg X, Y = generate_koza_quintic() udp = symbolic_regression(points=X, labels=Y, rows=1, cols=20, levels_back=21, arity=2, kernels=kernel_set_double(["sum", "diff"])(), n_eph=2, multi_objective=False, parallel_batches=0) prob = pg.problem(udp) pop = pg.population(prob, 10) udp.pretty(pop.champion_x) udp.prettier(pop.champion_x) # Unconstrained self.assertEqual(prob.get_nc(), 0) self.assertEqual(prob.get_nic(), 0) # Single objective self.assertEqual(prob.get_nobj(), 1) # Dimensions self.assertEqual(prob.get_nix(), 20 * (2 + 1) + 1) self.assertEqual(prob.get_nx(), 2 + prob.get_nix()) # Has gradient and hessians self.assertEqual(prob.has_gradient(), True) self.assertEqual(prob.has_hessians(), True)
def runTest(self): from dcgpy import symbolic_regression, generate_koza_quintic, kernel_set_double, gd4cgp import pygmo as pg import pickle X, Y = generate_koza_quintic() # Interface for the UDPs udp = symbolic_regression(points=X, labels=Y, rows=1, cols=20, levels_back=21, arity=2, kernels=kernel_set_double( ["sum", "diff", "mul", "pdiv"])(), n_eph=2, multi_objective=False, parallel_batches=0) prob = pg.problem(udp) pop = pg.population(prob, 10) # Interface for the UDAs uda = gd4cgp(max_iter=10, lr=0.1, lr_min=1e-6) algo = pg.algorithm(uda) algo.set_verbosity(0) # Testing some evolutions pop = algo.evolve(pop) # In parallel archi = pg.archipelago(prob=prob, algo=algo, n=16, pop_size=4) archi.evolve() archi.wait_check() # Pickling. self.assertTrue(repr(algo) == repr(pickle.loads(pickle.dumps(algo))))
def run(dataset_train, dataset_test, cols, gen): ss = dcgpy.kernel_set_double([ "sum", "diff", "mul", "pdiv", "sin", "cos", "tanh", "log", "exp", "psqrt" ]) Xtrain, ytrain = dataset_train[:, :-1], dataset_train[:, -1] Xtest, ytest = dataset_test[:, :-1], dataset_test[:, -1] udp = dcgpy.symbolic_regression(points=Xtrain, labels=ytrain[:, np.newaxis], kernels=ss(), rows=1, cols=cols, levels_back=21, arity=2, n_eph=3, multi_objective=False, parallel_batches=0) uda = dcgpy.es4cgp(gen=gen) #, mut_n = 1) algo = pg.algorithm(uda) pop = pg.population(udp, 4) pop = algo.evolve(pop) return RMSE(udp.predict(Xtrain, pop.champion_x), ytrain), RMSE(udp.predict(Xtest, pop.champion_x), ytest)
def encode_ffnn(inputs, outputs, layers_size, kernels, levels_back): """ encode_ffnn(inputs, outputs, layers, kernels, levels_back) Encodes a feed forward neural network as a dCGPANN expression. While there are infinite ways to perform such an encoding this function generates one of the possible ones. Args: inputs (``int``): number of inputs outputs (``int``): number of outputs layers_size (``List[int]``): size of hidden neural layers kernels (``List[string])``: list containing the non linearity name for each hidden neural layer plus the one for the output layer levels_back (``int``): number of levels-back in the cartesian program Returns: A ``expression_ann_double`` encoding the requested network. Raises: ValueError: if the kernel list contains unknown kernel names or if layers_size and kernels are malformed """ from dcgpy import expression_ann_double, kernel_set_double if (len(kernels) != len(layers_size) + 1): raise ValueError( "The size of layers_size must be one less than the size of kernels (as this also includes the output nonlinearity) " ) # We create the list of possible kernels kernel_list = kernel_set_double(list(set(kernels)))() # We compute the cartesian substrate able to host the network arity = [inputs] + layers_size extended_layer_size = layers_size + [outputs] cols = len(extended_layer_size) rows = max(extended_layer_size) # We create a dCGPANN expression that is able to encode the FFNN retval = expression_ann_double(inputs, outputs, rows, cols, levels_back, arity, kernel_list) # We hand write the chromosome. start_prev_col = [0] + [inputs + c * rows for c in range(cols)] x = retval.get() for c in range(cols): kernel_id = list(set(kernels)).index(kernels[c]) for r in range(extended_layer_size[c]): node_id = inputs + c * rows + r g_idx = retval.get_gene_idx()[node_id] x[g_idx] = kernel_id for conn in range(arity[c]): x[g_idx + conn + 1] = start_prev_col[c] + conn # And the output for i in range(outputs): x[-outputs + i] = start_prev_col[c + 1] + i retval.set(x) return retval
def main(): # Some necessary imports. import dcgpy import pygmo as pg # Sympy is nice to have for basic symbolic manipulation. from sympy import init_printing from sympy.parsing.sympy_parser import parse_expr init_printing() # Fundamental for plotting. from matplotlib import pyplot as plt # We load our data from some available ones shipped with dcgpy. # In this particular case we use the problem chwirut2 from # (https://www.itl.nist.gov/div898/strd/nls/data/chwirut2.shtml) X, Y = dcgpy.generate_chwirut2() # And we plot them as to visualize the problem. _ = plt.plot(X, Y, '.') _ = plt.title('54 measurments') _ = plt.xlabel('metal distance') _ = plt.ylabel('ultrasonic response') # We define our kernel set, that is the mathematical operators we will # want our final model to possibly contain. What to choose in here is left # to the competence and knowledge of the user. A list of kernels shipped with dcgpy # can be found on the online docs. The user can also define its own kernels (see the corresponding tutorial). ss = dcgpy.kernel_set_double(["sum", "diff", "mul", "pdiv"]) # We instantiate the symbolic regression optimization problem (note: many important options are here not # specified and thus set to their default values) udp = dcgpy.symbolic_regression(points=X, labels=Y, kernels=ss()) print(udp) # We instantiate here the evolutionary strategy we want to use to search for models. uda = dcgpy.es4cgp(gen=10000, max_mut=2) prob = pg.problem(udp) algo = pg.algorithm(uda) # Note that the screen output will happen on the terminal, not on your Jupyter notebook. # It can be recovered afterwards from the log. algo.set_verbosity(10) pop = pg.population(prob, 4) pop = algo.evolve(pop) # Lets have a look to the symbolic representation of our model (using sympy) parse_expr(udp.prettier(pop.champion_x)) # And lets see what our model actually predicts on the inputs Y_pred = udp.predict(X, pop.champion_x) # Lets comapre to the data _ = plt.plot(X, Y_pred, 'r.') _ = plt.plot(X, Y, '.', alpha=0.2) _ = plt.title('54 measurments') _ = plt.xlabel('metal distance') _ = plt.ylabel('ultrasonic response') # Here we get the log of the latest call to the evolve log = algo.extract(dcgpy.es4cgp).get_log() gen = [it[0] for it in log] loss = [it[2] for it in log] # And here we plot, for example, the generations against the best loss _ = plt.semilogy(gen, loss) _ = plt.title('last call to evolve') _ = plt.xlabel('metal distance') _ = plt.ylabel('ultrasonic response')
# We load our data from some available ones shipped with dcgpy. # In this particular case we use the problem chwirut2 from # (https://www.itl.nist.gov/div898/strd/nls/data/chwirut2.shtml) X, Y = dcgpy.generate_chwirut2() # And we plot them as to visualize the problem. _ = plt.plot(X, Y, '.') _ = plt.title('54 measurments') _ = plt.xlabel('metal distance') _ = plt.ylabel('ultrasonic response') # We define our kernel set, that is the mathematical operators we will # want our final model to possibly contain. What to choose in here is left # to the competence and knowledge of the user. A list of kernels shipped with dcgpy # can be found on the online docs. The user can also define its own kernels (see the corresponding tutorial). ss = dcgpy.kernel_set_double(["sum", "diff", "mul", "pdiv"]) # We instantiate the symbolic regression optimization problem (note: many important options are here not # specified and thus set to their default values) udp = dcgpy.symbolic_regression(points = X, labels = Y, kernels=ss()) print(udp) # We instantiate here the evolutionary strategy we want to use to search for models. uda = dcgpy.es4cgp(gen = 10000, mut_n = 2) prob = pg.problem(udp) algo = pg.algorithm(uda) # Note that the screen output will happen on the terminal, not on your Jupyter notebook. # It can be recovered afterwards from the log. algo.set_verbosity(10)