def test_mnist(): # Gradient check using MNIST (train_x, _), (_, _) = mnist.load_data() train_x = train_x / 255 # Normalizing images # plotter.plot_mnist(train_x, "original") # Show original mnist images num_img, img_dim, _ = train_x.shape # Get number of images and # pixels per square img num_features = 500 mnist_in = np.reshape( train_x, (img_dim * img_dim, num_img)) # Reshape images to match autoencoder input ga = Algorithm(x=mnist_in, num_features=num_features, debug=1, pop_size=20) w_out, best_cost, logs = ga.run() print( f"Average time/generation (sec): {sum(logs['times']) / len(logs['times'])}" ) print(f"Total time to run GA (sec): {logs['times']}") ae = AutoEncoder(mnist_in, num_features, random_seed=1234, use_gpu=True) z, _ = ae.psi(w_out) phi_w_img = ae.phi(w_out) # Calculate phi(W) new_mnist = z @ phi_w_img # Recreate original images using Z and phi(W) new_imgs = np.reshape( new_mnist, train_x.shape) # Reshape new images have original shape plotter.plot_mnist(new_imgs, f"{num_features}_features_ga") # Show new images # print(loss_values) plotter.plot_loss(logs['min'], "MNIST_Gradient_Loss_Over_Generations")
def test_random(): # Sanity test to make sure that feature number positively impacts least squares error. num_points = 100 num_data_per_point = 55 learning_rate = 0.5 x_in = np.random.normal(size=(num_data_per_point, num_points)) for num_features in [1, 5, 10, 15, 20, 40, 70]: ae = AutoEncoder(x_in, num_features, random_seed=1234) w_in = np.random.normal(size=(num_data_per_point, num_features)) z_out, least_squares_test = ae.psi(w_in) print( f"(# features : Least squares error = ({num_features} : {least_squares_test})" ) print("Starting gradient decent...") loss_values = [] # Keep track of loss values over epochs for epoch in range(1000): z_grd, ls_grd, grd = ae.calc_g( w_in) # Calculate Z, Error, and Gradient Matrix w_in = w_in - (learning_rate * grd ) # Update W using Gradient Matrix loss_values.append(ls_grd) # Log loss print(f"Epoch: {epoch}\t----------\tLoss: {ls_grd}") # print(loss_values) plotter.plot_loss( loss_values, f"Gradient Loss Over Epochs (test) (num_features: {num_features})")
class Algorithm: def __init__(self, **args): args = Namespace(**args) self.toolbox = base.Toolbox() self.stats = tools.Statistics(key=lambda ind: ind.fitness.values[0]) self.stats.register("avg", np.mean) self.stats.register("std", np.std) self.stats.register("min", np.min) self.stats.register("max", np.max) # if not pool: # self.map_func = map # else: # self.map_func = pool.map self.map_func = map if not hasattr(args, 'x'): raise ValueError( "variable 'x' must be given as numpy array of shape (n x N)") else: x = args.x if not hasattr(args, 'num_features'): raise ValueError("variable 'num_features' must be given") else: num_features = args.num_features if not hasattr(args, 'mu'): args.mu = 0.5 if not hasattr(args, 'sigma'): args.sigma = 0.5 if not hasattr(args, 'alpha'): args.alpha = 0.9 if not hasattr(args, 'indpb'): args.indpb = 0.1 if not hasattr(args, 'tournsize'): args.tournsize = 2 if not hasattr(args, 'debug'): self.debug = 0 else: self.debug = args.debug if not hasattr(args, 'pop_size'): self.pop_size = 300 else: self.pop_size = args.pop_size if not hasattr(args, 'number_generations'): self.num_gen = 100 else: self.num_gen = args.number_generations if not hasattr(args, 'cxpb'): self.cxpb = 0.9 else: self.cxpb = args.cxpb if not hasattr(args, 'mutpb'): self.mutpb = 0.1 else: self.mutpb = args.mutpb self.ae = AutoEncoder(x, num_features, random_seed=1234, use_gpu=True) self.w_shape = (x.shape[0], num_features) # Set up ways to define individuals in the population self.toolbox.register("attr_x", np.random.normal, 0, 1) self.toolbox.register("individual", tools.initRepeat, creator.Individual, self.toolbox.attr_x, num_features * x.shape[0]) self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual) # Set up ways to change population self.toolbox.register("mate", tools.cxBlend, alpha=args.alpha) self.toolbox.register("mutate", tools.mutGaussian, mu=args.mu, sigma=args.sigma, indpb=args.indpb) self.toolbox.register("select", tools.selTournament, tournsize=args.tournsize) # Fitness evaluation methods (must return iterable) # Remember, we want to minimize these functions, so to hurt them we need to return # large positive numbers. # ===================================================================================== def _evaluate(self, individual): w = np.reshape(individual, self.w_shape) # w = np.asarray(individual).transpose() _, cost = self.ae.psi(w) return (cost, ) # ===================================================================================== def run(self): """ Run a genetic algorithm with the given evaluation function and input parameters. Main portion of code for this method found from Deap example at URL: https://deap.readthedocs.io/en/master/overview.html Parameters ---------- None Returns ------- best_individual: List The best individual found out of all iterations fitness: Float The best_individual's fitness value logbook : Dictionary A dictionary of arrays for iterations, min, max, average, and std. dev. for each iteration. """ pop = self.toolbox.population(n=self.pop_size) hof = tools.HallOfFame(25, similar=np.allclose) logbook = tools.Logbook() # Evaluate the entire population fitnesses = list(self.map_func(self._evaluate, pop)) for ind, fit in zip(pop, fitnesses): ind.fitness.values = fit ind.generation = 0 record = self.stats.compile(pop) if self.stats else {} logbook.record(gen=0, **record) times = [] for g in range(self.num_gen): start_time = time.time() # Select the next generation individuals (with replacement) offspring = self.toolbox.select(pop, len(pop)) # Clone the selected individuals (since selection only took references rather than values) offspring = list(map(self.toolbox.clone, offspring)) # Apply crossover and mutation on the offspring for child1, child2 in zip(offspring[::2], offspring[1::2]): if random.random() < self.cxpb: self.toolbox.mate(child1, child2) del child1.fitness.values del child2.fitness.values for mutant in offspring: if random.random() < self.mutpb: self.toolbox.mutate(mutant) del mutant.fitness.values # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in offspring if not ind.fitness.valid] fitnesses = list(self.map_func(self._evaluate, invalid_ind)) if self.debug >= 2: print( "Generation %i has (min, max) fitness values: (%.3f, %.3f)" % (g, min(fitnesses)[0], max(fitnesses)[0])) elif self.debug == 1: plotter.print_progress_bar( g + 1, self.num_gen, suffix= f"Complete--(Gen: fitness): ({g + 1}, {min(fitnesses)[0]:.3f})" ) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit ind.generation = g + 1 # The population is entirely replaced by the offspring pop[:] = offspring hof.update(pop) record = self.stats.compile(pop) if self.stats else {} logbook.record(gen=g + 1, **record) times.append(time.time() - start_time) if self.debug >= 0: print("Problem results:") print( f"\tBest individual seen fitness value:\t\t{hof[0].fitness.values[0]:3f}" ) print( f"\tBest individual seen generation appeared in:\t{hof[0].generation}" ) gen, min_results, max_results, avg, std = logbook.select( "gen", "min", "max", "avg", "std") return hof[0], hof[0].fitness.values[0], { "iterations": gen, "min": min_results, "max": max_results, "avg": avg, "std": std, "times": times }