def err_func(outs, inps, dims, gene, nr_of_pars, op_table): """ Example of an error function, without parameters or anything fancy. Just a minimization of the L2-norm. """ dims = len(inps[0]) cgp = CGP(dims, op_table, gene) n = len(inps) assert len(outs) == n s = 0.0 for i in range(n): tmp = outs[i] - cgp.eval(inps[i]) s += tmp*tmp s /= float(n) return sqrt(s)
Operation("log"), Operation("/") ] h = 1.0e-9 err = 0.0 counter = 0.0 for _ in range(10): nr_of_nodes = randint(1, 15) dims = randint(1, 4) gene = create_random_gene(dims, len(op_table), nr_of_nodes) cgp = CGP(dims, op_table, gene, nr_of_parameters=0) for _ in range(10): pnt = [gauss(0, 10) for _ in range(dims)] for d in range(dims): pnt_shift = list(pnt) pnt_shift[d] += h numerical_der = (cgp.eval(pnt_shift) - cgp.eval(pnt)) / h analytical_der = cgp.eval(pnt, derivative=True, der_dir=d) diff = analytical_der[1] - numerical_der err += diff * diff counter += 1.0 err = sqrt(err / counter) print("Test 2 error:", err) print("Both errors should be below 1.0e-5")
def starting_point_approximation(func, nr_of_parameters, parameter_ranges, optimizer, max_iter=1000, multi_starts=2, nr_of_samples_per_parameter=25, nr_of_parameters_in_cgp=3, max_time=None, symbolic_der=None): assert nr_of_samples_per_parameter > 1 assert nr_of_parameters >= 0 if nr_of_parameters == 0: # I guess we don't really need an approximation # function for the starting point in this case. I mean, there will # only be one value for the root. print("There are no parameters in the input function! Then there is no need for this program.") assert False else: # Make sure that the input data even makes sense. assert len(parameter_ranges) == nr_of_parameters for tmp in parameter_ranges: assert len(tmp) == 2 assert tmp[0] < tmp[1] # Calculate the total number of samples. nr_of_samples = 1 for _ in range(nr_of_parameters): nr_of_samples *= nr_of_samples_per_parameter # Generate random parameter points in the given range. # TODO: this is stupid, change later! We really should sample on a nice # Cartesian grid. parameter_samples = [[0.0 for _ in range(nr_of_parameters)] for _ in range(nr_of_samples)] for i in range(nr_of_samples): for d in range(nr_of_parameters): parameter_samples[i][d] = random()*(parameter_ranges[d][1]-parameter_ranges[d][0])+parameter_ranges[d][0] # Let's get the derivative as well. if symbolic_der == None: func_der = lambda x, a: (func([x[0]+1.0e-11] , a) - func([x[0]],a))/1.0e-11 else: func_der = symbolic_der # Step 1 # For each point, find the x val that is the (or a) root. # Do this using Newton-Raphson. root_samples_and_errors = [root_finders(func, func_der, parameter_samples[i]) for i in range(nr_of_samples)] # Remove all points that didn't converge converge_thresh = 1.0e-8 remove_idxs = [] counter = 0 for i in range(nr_of_samples): err = root_samples_and_errors[counter][1] if err > converge_thresh: tmp = root_samples_and_errors.pop(counter) parameter_samples.pop(counter) assert tmp[1] > converge_thresh counter -= 1 counter += 1 root_samples = [tmp[0] for tmp in root_samples_and_errors] errors_samples = [tmp[1] for tmp in root_samples_and_errors] assert max(errors_samples) <= converge_thresh filtered_quota = 1.0-len(root_samples)/float(nr_of_samples) print("How many were filtered:", 100*(1.0-len(root_samples)/float(nr_of_samples)),"%") if filtered_quota > 5: # TODO: Do something in this case assert False # Step 2 # Run a symbolic regression to find a good approximation for the root. # This is used as a starting point. (cgp, best_err, parameters) = starting_point_approximation_symbolic_regression(root_samples, parameter_samples, optimizer, max_iter=max_iter, nr_of_parameters=nr_of_parameters_in_cgp, max_time=max_time) # Step 2 and a half # The symbolic regression (tries to) ignore all constant solutions, so those should be checked as well. mean = sum(root_samples)/float(len(root_samples)) error_from_mean = sqrt(sum((r-mean)*(r-mean) for r in root_samples) / float(len(root_samples))) if error_from_mean < best_err: print("DOING THE CONST THING:", error_from_mean) # Create a new gene that represents a constant func new_gene = [0] * len(cgp.gene) new_gene[-1] = cgp.dims+0 cgp = CGP(cgp.dims, cgp.op_table, new_gene, nr_of_parameters=1) parameters = [mean] for _ in range(10): assert cgp.eval([random() for _ in range(cgp.dims)], parameters=parameters) == mean best_err = error_from_mean return (cgp, best_err, parameters)