def main(config): environment = gym.make(config.env) state_shape = environment.observation_space.low.shape num_hidden = config.num_hidden num_actions = environment.action_space.n agent = Agent(MLP(state_shape[0], num_hidden, num_actions)) best_agent = Agent(MLP(state_shape[0], num_hidden, num_actions)) agent.randomize(config.gene_weight_ratio, config.freq_weight_ratio, config.v_init) agent.update_model() print(agent.genome) genepool = GenePool() if config.clear_store: genepool.clear() num_episodes = 0 while True: print('Starting episode {}'.format(num_episodes)) best_genomes = genepool.top_n(config.num_best) if not config.best: reward, steps = run_episode(agent, environment, config) print('Reward {} in {} steps'.format(reward, steps)) genepool.report_score(agent.genome, reward) update_agent(agent, reward, best_genomes, config) if best_genomes and np.random.uniform() < 0.1 or config.best: best_genome, _ = best_genomes[0] #random.choice(best_genomes) best_agent.load_genome(best_genome) best_agent.update_model() best_reward, steps = run_episode(best_agent, environment, config) if not config.best: genepool.report_score(best_agent.genome, best_reward) num_episodes += 1
def main(config): agent_d = Agent(make_model_d(), cuda=config.cuda) agent_g = Agent(make_model_g(), cuda=config.cuda) best_agent_d = Agent(make_model_d(), cuda=config.cuda) best_agent_g = Agent(make_model_g(), cuda=config.cuda) for agent in (agent_d, agent_g, best_agent_d, best_agent_g): agent.randomize(config.gene_weight_ratio, config.freq_weight_ratio, config.v_init) agent.update_model() genepool_d = GenePool(key='d_genes') genepool_g = GenePool(key='g_genes') if config.clear_store: genepool_d.clear() genepool_g.clear() num_episodes = 0 while True: print('Starting discriminator episode') reward = run_discriminator_episode(agent_d, best_agent_g, dataloader, config) print('Reward {}'.format(reward, )) best_genomes_d = update_agent(agent_d, reward, genepool_d, config) if best_genomes_d: # make sure the best is still the best best_agent_d.load_genome(random.choice(best_genomes_d)[0]) best_agent_d.update_model() reward = run_discriminator_episode(best_agent_d, best_agent_g, dataloader, config) genepool_d.report_score(best_agent_d.genome, reward) print('Starting generator episode') reward = run_generator_episode(best_agent_d, agent_g, dataloader, config) print('Reward {}'.format(reward, )) best_genomes_g = update_agent(agent_g, reward, genepool_g, config) if best_genomes_g: best_agent_g.load_genome(random.choice(best_genomes_g)[0]) best_agent_g.update_model() reward = run_generator_episode(best_agent_d, best_agent_g, dataloader, config) genepool_g.report_score(best_agent_g.genome, reward) num_episodes += 1 if num_episodes % config.save_every == 0 and config.render: # vutils.save_image(real_cpu, # '{}/real_samples.png'.format(opt.outf), # normalize=True) print('saving') fake = best_agent_g(fixed_noise) vutils.save_image(fake.data, '{}/fake_samples_epoch_.png'.format(opt.outf, ), normalize=True)
def main(config): agent_g = Agent(netG, cuda=config.cuda) agent_g.randomize(config.gene_weight_ratio, config.freq_weight_ratio, config.v_init) agent_g.update_model() genepool_g = GenePool(key='g_genes_vggmse') if config.clear_store: genepool_g.clear() num_episodes = 0 while True: print('Starting generator episode') reward = run_generator_episode(agent_g, vgg_features, dataloader, config) print('Reward {}'.format(reward, )) update_agent(agent_g, reward, genepool_g, config) num_episodes += 1
if __name__ == '__main__': import argparse import multiprocessing argparser = argparse.ArgumentParser() argparser.add_argument('--env', default='SpaceInvaders-v0') argparser.add_argument('--render', action='store_true') argparser.add_argument('--num-hidden', type=int, default=64) argparser.add_argument('--exhibition', action='store_true') argparser.add_argument('--random-start', type=int, default=30) argparser.add_argument('--model', default='mlp') argparser.add_argument('--base-agent-id', type=int, default=0) Optimizer.add_config_to_parser(argparser) config = argparser.parse_args() if config.clear_store: genepool = GenePool(redis_params=config.redis_params) genepool.clear() if config.exhibition: main(config) else: processes = [] for agent_i in range(config.num_agents): p = multiprocessing.Process(target=main, args=(config, )) p.start() processes.append(p) for p in processes: p.join()