Esempio n. 1
0
def test_mnist():
    # Gradient check using MNIST
    (train_x, _), (_, _) = mnist.load_data()
    train_x = train_x / 255  # Normalizing images
    # plotter.plot_mnist(train_x, "original")                           # Show original mnist images

    num_img, img_dim, _ = train_x.shape  # Get number of images and # pixels per square img
    num_features = 500
    mnist_in = np.reshape(
        train_x, (img_dim * img_dim,
                  num_img))  # Reshape images to match autoencoder input
    ga = Algorithm(x=mnist_in, num_features=num_features, debug=1, pop_size=20)
    w_out, best_cost, logs = ga.run()

    print(
        f"Average time/generation (sec): {sum(logs['times']) / len(logs['times'])}"
    )
    print(f"Total time to run GA (sec): {logs['times']}")

    ae = AutoEncoder(mnist_in, num_features, random_seed=1234, use_gpu=True)
    z, _ = ae.psi(w_out)
    phi_w_img = ae.phi(w_out)  # Calculate phi(W)
    new_mnist = z @ phi_w_img  # Recreate original images using Z and phi(W)
    new_imgs = np.reshape(
        new_mnist, train_x.shape)  # Reshape new images have original shape
    plotter.plot_mnist(new_imgs,
                       f"{num_features}_features_ga")  # Show new images

    # print(loss_values)
    plotter.plot_loss(logs['min'], "MNIST_Gradient_Loss_Over_Generations")
def test_random():
    # Sanity test to make sure that feature number positively impacts least squares error.
    num_points = 100
    num_data_per_point = 55
    learning_rate = 0.5
    x_in = np.random.normal(size=(num_data_per_point, num_points))
    for num_features in [1, 5, 10, 15, 20, 40, 70]:
        ae = AutoEncoder(x_in, num_features, random_seed=1234)
        w_in = np.random.normal(size=(num_data_per_point, num_features))
        z_out, least_squares_test = ae.psi(w_in)
        print(
            f"(# features : Least squares error = ({num_features} : {least_squares_test})"
        )
        print("Starting gradient decent...")
        loss_values = []  # Keep track of loss values over epochs
        for epoch in range(1000):
            z_grd, ls_grd, grd = ae.calc_g(
                w_in)  # Calculate Z, Error, and Gradient Matrix
            w_in = w_in - (learning_rate * grd
                           )  # Update W using Gradient Matrix
            loss_values.append(ls_grd)  # Log loss
            print(f"Epoch: {epoch}\t----------\tLoss: {ls_grd}")

        # print(loss_values)
        plotter.plot_loss(
            loss_values,
            f"Gradient Loss Over Epochs (test) (num_features: {num_features})")
Esempio n. 3
0
class Algorithm:
    def __init__(self, **args):

        args = Namespace(**args)

        self.toolbox = base.Toolbox()

        self.stats = tools.Statistics(key=lambda ind: ind.fitness.values[0])
        self.stats.register("avg", np.mean)
        self.stats.register("std", np.std)
        self.stats.register("min", np.min)
        self.stats.register("max", np.max)

        # if not pool:
        #     self.map_func = map
        # else:
        #     self.map_func = pool.map
        self.map_func = map

        if not hasattr(args, 'x'):
            raise ValueError(
                "variable 'x' must be given as numpy array of shape (n x N)")
        else:
            x = args.x

        if not hasattr(args, 'num_features'):
            raise ValueError("variable 'num_features' must be given")
        else:
            num_features = args.num_features

        if not hasattr(args, 'mu'):
            args.mu = 0.5

        if not hasattr(args, 'sigma'):
            args.sigma = 0.5

        if not hasattr(args, 'alpha'):
            args.alpha = 0.9

        if not hasattr(args, 'indpb'):
            args.indpb = 0.1

        if not hasattr(args, 'tournsize'):
            args.tournsize = 2

        if not hasattr(args, 'debug'):
            self.debug = 0
        else:
            self.debug = args.debug

        if not hasattr(args, 'pop_size'):
            self.pop_size = 300
        else:
            self.pop_size = args.pop_size

        if not hasattr(args, 'number_generations'):
            self.num_gen = 100
        else:
            self.num_gen = args.number_generations

        if not hasattr(args, 'cxpb'):
            self.cxpb = 0.9
        else:
            self.cxpb = args.cxpb

        if not hasattr(args, 'mutpb'):
            self.mutpb = 0.1
        else:
            self.mutpb = args.mutpb

        self.ae = AutoEncoder(x, num_features, random_seed=1234, use_gpu=True)
        self.w_shape = (x.shape[0], num_features)

        # Set up ways to define individuals in the population
        self.toolbox.register("attr_x", np.random.normal, 0, 1)
        self.toolbox.register("individual", tools.initRepeat,
                              creator.Individual, self.toolbox.attr_x,
                              num_features * x.shape[0])
        self.toolbox.register("population", tools.initRepeat, list,
                              self.toolbox.individual)

        # Set up ways to change population
        self.toolbox.register("mate", tools.cxBlend, alpha=args.alpha)
        self.toolbox.register("mutate",
                              tools.mutGaussian,
                              mu=args.mu,
                              sigma=args.sigma,
                              indpb=args.indpb)
        self.toolbox.register("select",
                              tools.selTournament,
                              tournsize=args.tournsize)

    # Fitness evaluation methods (must return iterable)
    # Remember, we want to minimize these functions, so to hurt them we need to return
    # large positive numbers.
    # =====================================================================================
    def _evaluate(self, individual):
        w = np.reshape(individual, self.w_shape)
        # w = np.asarray(individual).transpose()
        _, cost = self.ae.psi(w)
        return (cost, )

    # =====================================================================================

    def run(self):
        """
        Run a genetic algorithm with the given evaluation function and input parameters.
        Main portion of code for this method found from Deap example at URL:
        https://deap.readthedocs.io/en/master/overview.html

        Parameters
        ----------
        None

        Returns
        -------
        best_individual: List
          The best individual found out of all iterations
        fitness: Float
          The best_individual's fitness value
        logbook : Dictionary
          A dictionary of arrays for iterations, min, max, average, and std. dev. for each iteration.

        """
        pop = self.toolbox.population(n=self.pop_size)
        hof = tools.HallOfFame(25, similar=np.allclose)
        logbook = tools.Logbook()

        # Evaluate the entire population
        fitnesses = list(self.map_func(self._evaluate, pop))
        for ind, fit in zip(pop, fitnesses):
            ind.fitness.values = fit
            ind.generation = 0

        record = self.stats.compile(pop) if self.stats else {}
        logbook.record(gen=0, **record)
        times = []

        for g in range(self.num_gen):
            start_time = time.time()
            # Select the next generation individuals (with replacement)
            offspring = self.toolbox.select(pop, len(pop))
            # Clone the selected individuals (since selection only took references rather than values)
            offspring = list(map(self.toolbox.clone, offspring))

            # Apply crossover and mutation on the offspring
            for child1, child2 in zip(offspring[::2], offspring[1::2]):
                if random.random() < self.cxpb:
                    self.toolbox.mate(child1, child2)
                    del child1.fitness.values
                    del child2.fitness.values

            for mutant in offspring:
                if random.random() < self.mutpb:
                    self.toolbox.mutate(mutant)
                    del mutant.fitness.values

            # Evaluate the individuals with an invalid fitness
            invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
            fitnesses = list(self.map_func(self._evaluate, invalid_ind))

            if self.debug >= 2:
                print(
                    "Generation %i has (min, max) fitness values: (%.3f, %.3f)"
                    % (g, min(fitnesses)[0], max(fitnesses)[0]))
            elif self.debug == 1:
                plotter.print_progress_bar(
                    g + 1,
                    self.num_gen,
                    suffix=
                    f"Complete--(Gen: fitness): ({g + 1}, {min(fitnesses)[0]:.3f})"
                )

            for ind, fit in zip(invalid_ind, fitnesses):
                ind.fitness.values = fit
                ind.generation = g + 1

            # The population is entirely replaced by the offspring
            pop[:] = offspring
            hof.update(pop)
            record = self.stats.compile(pop) if self.stats else {}
            logbook.record(gen=g + 1, **record)
            times.append(time.time() - start_time)

        if self.debug >= 0:
            print("Problem results:")
            print(
                f"\tBest individual seen fitness value:\t\t{hof[0].fitness.values[0]:3f}"
            )
            print(
                f"\tBest individual seen generation appeared in:\t{hof[0].generation}"
            )

        gen, min_results, max_results, avg, std = logbook.select(
            "gen", "min", "max", "avg", "std")
        return hof[0], hof[0].fitness.values[0], {
            "iterations": gen,
            "min": min_results,
            "max": max_results,
            "avg": avg,
            "std": std,
            "times": times
        }