Beispiel #1
0
def main(config):
    environment = gym.make(config.env)
    state_shape = environment.observation_space.low.shape
    num_hidden = config.num_hidden
    num_actions = environment.action_space.n
    agent = Agent(MLP(state_shape[0], num_hidden, num_actions))
    best_agent = Agent(MLP(state_shape[0], num_hidden, num_actions))
    agent.randomize(config.gene_weight_ratio, config.freq_weight_ratio,
                    config.v_init)
    agent.update_model()
    print(agent.genome)
    genepool = GenePool()
    if config.clear_store:
        genepool.clear()
    num_episodes = 0
    while True:
        print('Starting episode {}'.format(num_episodes))
        best_genomes = genepool.top_n(config.num_best)
        if not config.best:
            reward, steps = run_episode(agent, environment, config)
            print('Reward {} in {} steps'.format(reward, steps))
            genepool.report_score(agent.genome, reward)
            update_agent(agent, reward, best_genomes, config)
        if best_genomes and np.random.uniform() < 0.1 or config.best:
            best_genome, _ = best_genomes[0]  #random.choice(best_genomes)
            best_agent.load_genome(best_genome)
            best_agent.update_model()
            best_reward, steps = run_episode(best_agent, environment, config)
            if not config.best:
                genepool.report_score(best_agent.genome, best_reward)

        num_episodes += 1
Beispiel #2
0
def main(config):
    agent_d = Agent(make_model_d(), cuda=config.cuda)
    agent_g = Agent(make_model_g(), cuda=config.cuda)
    best_agent_d = Agent(make_model_d(), cuda=config.cuda)
    best_agent_g = Agent(make_model_g(), cuda=config.cuda)
    for agent in (agent_d, agent_g, best_agent_d, best_agent_g):
        agent.randomize(config.gene_weight_ratio, config.freq_weight_ratio,
                        config.v_init)
        agent.update_model()
    genepool_d = GenePool(key='d_genes')
    genepool_g = GenePool(key='g_genes')
    if config.clear_store:
        genepool_d.clear()
        genepool_g.clear()
    num_episodes = 0
    while True:
        print('Starting discriminator episode')
        reward = run_discriminator_episode(agent_d, best_agent_g, dataloader,
                                           config)
        print('Reward {}'.format(reward, ))
        best_genomes_d = update_agent(agent_d, reward, genepool_d, config)
        if best_genomes_d:  # make sure the best is still the best
            best_agent_d.load_genome(random.choice(best_genomes_d)[0])
            best_agent_d.update_model()
            reward = run_discriminator_episode(best_agent_d, best_agent_g,
                                               dataloader, config)
            genepool_d.report_score(best_agent_d.genome, reward)
        print('Starting generator episode')
        reward = run_generator_episode(best_agent_d, agent_g, dataloader,
                                       config)
        print('Reward {}'.format(reward, ))
        best_genomes_g = update_agent(agent_g, reward, genepool_g, config)
        if best_genomes_g:
            best_agent_g.load_genome(random.choice(best_genomes_g)[0])
            best_agent_g.update_model()
            reward = run_generator_episode(best_agent_d, best_agent_g,
                                           dataloader, config)
            genepool_g.report_score(best_agent_g.genome, reward)
        num_episodes += 1
        if num_episodes % config.save_every == 0 and config.render:
            # vutils.save_image(real_cpu,
            #         '{}/real_samples.png'.format(opt.outf),
            #         normalize=True)
            print('saving')
            fake = best_agent_g(fixed_noise)
            vutils.save_image(fake.data,
                              '{}/fake_samples_epoch_.png'.format(opt.outf, ),
                              normalize=True)
Beispiel #3
0
def main(config):
    agent_g = Agent(netG, cuda=config.cuda)
    agent_g.randomize(config.gene_weight_ratio, config.freq_weight_ratio,
                      config.v_init)
    agent_g.update_model()
    genepool_g = GenePool(key='g_genes_vggmse')
    if config.clear_store:
        genepool_g.clear()
    num_episodes = 0
    while True:
        print('Starting generator episode')
        reward = run_generator_episode(agent_g, vgg_features, dataloader,
                                       config)
        print('Reward {}'.format(reward, ))
        update_agent(agent_g, reward, genepool_g, config)
        num_episodes += 1
if __name__ == '__main__':
    import argparse
    import multiprocessing

    argparser = argparse.ArgumentParser()
    argparser.add_argument('--env', default='SpaceInvaders-v0')
    argparser.add_argument('--render', action='store_true')
    argparser.add_argument('--num-hidden', type=int, default=64)
    argparser.add_argument('--exhibition', action='store_true')
    argparser.add_argument('--random-start', type=int, default=30)
    argparser.add_argument('--model', default='mlp')
    argparser.add_argument('--base-agent-id', type=int, default=0)
    Optimizer.add_config_to_parser(argparser)
    config = argparser.parse_args()

    if config.clear_store:
        genepool = GenePool(redis_params=config.redis_params)
        genepool.clear()

    if config.exhibition:
        main(config)
    else:
        processes = []
        for agent_i in range(config.num_agents):
            p = multiprocessing.Process(target=main, args=(config, ))
            p.start()
            processes.append(p)
        for p in processes:
            p.join()