Beispiel #1
0
def main(config):
    environment = gym.make(config.env)
    state_shape = environment.observation_space.low.shape
    num_hidden = config.num_hidden
    num_actions = environment.action_space.n
    agent = Agent(MLP(state_shape[0], num_hidden, num_actions))
    best_agent = Agent(MLP(state_shape[0], num_hidden, num_actions))
    agent.randomize(config.gene_weight_ratio, config.freq_weight_ratio,
                    config.v_init)
    agent.update_model()
    print(agent.genome)
    genepool = GenePool()
    if config.clear_store:
        genepool.clear()
    num_episodes = 0
    while True:
        print('Starting episode {}'.format(num_episodes))
        best_genomes = genepool.top_n(config.num_best)
        if not config.best:
            reward, steps = run_episode(agent, environment, config)
            print('Reward {} in {} steps'.format(reward, steps))
            genepool.report_score(agent.genome, reward)
            update_agent(agent, reward, best_genomes, config)
        if best_genomes and np.random.uniform() < 0.1 or config.best:
            best_genome, _ = best_genomes[0]  #random.choice(best_genomes)
            best_agent.load_genome(best_genome)
            best_agent.update_model()
            best_reward, steps = run_episode(best_agent, environment, config)
            if not config.best:
                genepool.report_score(best_agent.genome, best_reward)

        num_episodes += 1
    def run(self):
        genepool = GenePool(redis_params=self.config.redis_params)
        environment = gym.make(config.env)

        state_shape = (1, 100, 100)
        num_hidden = self.config.num_hidden
        num_actions = environment.action_space.n
        base_filters = 16
        model_class = dict(mlp=MLP, cnn=CNN)[self.config.model]
        model = model_class(state_shape, base_filters, num_hidden, num_actions)
        agent = Agent(model)
        agent.randomize(config.gene_weight_ratio, config.freq_weight_ratio,
                        config.v_init)
        agent.update_model()
        print(agent.summary())

        if self.config.exhibition:
            self.exhibition(agent, environment, genepool)
        else:

            def run_episode_with_args(agent, env):
                def f(genome):
                    return self.run_episode(agent, genome, env)

                return f

            worker = Optimizer(config, model, agent, genepool)
            worker.run(run_episode_with_args(agent, environment))
Beispiel #3
0
def main(config):
    agent_g = Agent(netG, cuda=config.cuda)
    agent_g.randomize(config.gene_weight_ratio, config.freq_weight_ratio,
                      config.v_init)
    agent_g.update_model()
    genepool_g = GenePool(key='g_genes_vggmse')
    if config.clear_store:
        genepool_g.clear()
    num_episodes = 0
    while True:
        print('Starting generator episode')
        reward = run_generator_episode(agent_g, vgg_features, dataloader,
                                       config)
        print('Reward {}'.format(reward, ))
        update_agent(agent_g, reward, genepool_g, config)
        num_episodes += 1
if __name__ == '__main__':
    import argparse
    import multiprocessing

    argparser = argparse.ArgumentParser()
    argparser.add_argument('--env', default='SpaceInvaders-v0')
    argparser.add_argument('--render', action='store_true')
    argparser.add_argument('--num-hidden', type=int, default=64)
    argparser.add_argument('--exhibition', action='store_true')
    argparser.add_argument('--random-start', type=int, default=30)
    argparser.add_argument('--model', default='mlp')
    argparser.add_argument('--base-agent-id', type=int, default=0)
    Optimizer.add_config_to_parser(argparser)
    config = argparser.parse_args()

    if config.clear_store:
        genepool = GenePool(redis_params=config.redis_params)
        genepool.clear()

    if config.exhibition:
        main(config)
    else:
        processes = []
        for agent_i in range(config.num_agents):
            p = multiprocessing.Process(target=main, args=(config, ))
            p.start()
            processes.append(p)
        for p in processes:
            p.join()
Beispiel #5
0
    argparser = argparse.ArgumentParser()
    argparser.add_argument('--env', default='Pong-ram-v0')
    argparser.add_argument('--min-genepool', type=int, default=2)
    argparser.add_argument('--num-best', type=int, default=20)
    argparser.add_argument('--render', action='store_true')
    argparser.add_argument('--clear-store', action='store_true')
    argparser.add_argument('--gene-weight-ratio', type=float, default=0.005)
    argparser.add_argument('--freq-weight-ratio', type=float, default=1.)
    argparser.add_argument('--i-sigma', type=float, default=1.)
    argparser.add_argument('--v-sigma', type=list_of(float), default=1.)
    argparser.add_argument('--v-init', type=list_of(float), default=(-1., 1.))
    argparser.add_argument('--num-hidden', type=int, default=32)
    argparser.add_argument('--best', action='store_true')
    argparser.add_argument('--num-agents', type=int, default=10)
    config = argparser.parse_args()

    genepool = GenePool()
    if config.clear_store:
        genepool.clear()

    if config.best:
        main(config)
    else:
        processes = []
        for _ in range(config.num_agents):
            p = multiprocessing.Process(target=main, args=(config, ))
            p.start()
            processes.append(p)
        for p in processes:
            p.join()
Beispiel #6
0
def main(config):
    agent_d = Agent(make_model_d(), cuda=config.cuda)
    agent_g = Agent(make_model_g(), cuda=config.cuda)
    best_agent_d = Agent(make_model_d(), cuda=config.cuda)
    best_agent_g = Agent(make_model_g(), cuda=config.cuda)
    for agent in (agent_d, agent_g, best_agent_d, best_agent_g):
        agent.randomize(config.gene_weight_ratio, config.freq_weight_ratio,
                        config.v_init)
        agent.update_model()
    genepool_d = GenePool(key='d_genes')
    genepool_g = GenePool(key='g_genes')
    if config.clear_store:
        genepool_d.clear()
        genepool_g.clear()
    num_episodes = 0
    while True:
        print('Starting discriminator episode')
        reward = run_discriminator_episode(agent_d, best_agent_g, dataloader,
                                           config)
        print('Reward {}'.format(reward, ))
        best_genomes_d = update_agent(agent_d, reward, genepool_d, config)
        if best_genomes_d:  # make sure the best is still the best
            best_agent_d.load_genome(random.choice(best_genomes_d)[0])
            best_agent_d.update_model()
            reward = run_discriminator_episode(best_agent_d, best_agent_g,
                                               dataloader, config)
            genepool_d.report_score(best_agent_d.genome, reward)
        print('Starting generator episode')
        reward = run_generator_episode(best_agent_d, agent_g, dataloader,
                                       config)
        print('Reward {}'.format(reward, ))
        best_genomes_g = update_agent(agent_g, reward, genepool_g, config)
        if best_genomes_g:
            best_agent_g.load_genome(random.choice(best_genomes_g)[0])
            best_agent_g.update_model()
            reward = run_generator_episode(best_agent_d, best_agent_g,
                                           dataloader, config)
            genepool_g.report_score(best_agent_g.genome, reward)
        num_episodes += 1
        if num_episodes % config.save_every == 0 and config.render:
            # vutils.save_image(real_cpu,
            #         '{}/real_samples.png'.format(opt.outf),
            #         normalize=True)
            print('saving')
            fake = best_agent_g(fixed_noise)
            vutils.save_image(fake.data,
                              '{}/fake_samples_epoch_.png'.format(opt.outf, ),
                              normalize=True)