def run(self, agent_id): scoreboard = AgentScoreboard(self.config.num_agents, redis_params=self.config.redis_params) environment = gym.make(config.env) state_shape = (1, 100, 100) num_hidden = self.config.num_hidden num_actions = environment.action_space.n base_filters = 16 model_class = dict(mlp=MLP, cnn=CNN)[self.config.model] model = model_class(state_shape, base_filters, num_hidden, num_actions) agent = Agent(model) if self.config.exhibition: self.exhibition(agent, environment, scoreboard) else: def run_episode_with_args(agent, env): def f(genome): return self.run_episode(agent, genome, env) return f worker = Optimizer(config, model, agent_id, scoreboard) worker.run(run_episode_with_args(agent, environment))
def run(self): genepool = GenePool(redis_params=self.config.redis_params) environment = gym.make(config.env) state_shape = (1, 100, 100) num_hidden = self.config.num_hidden num_actions = environment.action_space.n base_filters = 16 model_class = dict(mlp=MLP, cnn=CNN)[self.config.model] model = model_class(state_shape, base_filters, num_hidden, num_actions) agent = Agent(model) agent.randomize(config.gene_weight_ratio, config.freq_weight_ratio, config.v_init) agent.update_model() print(agent.summary()) if self.config.exhibition: self.exhibition(agent, environment, genepool) else: def run_episode_with_args(agent, env): def f(genome): return self.run_episode(agent, genome, env) return f worker = Optimizer(config, model, agent, genepool) worker.run(run_episode_with_args(agent, environment))
def main(config): agent_g = Agent(netG, cuda=config.cuda) agent_g.randomize(config.gene_weight_ratio, config.freq_weight_ratio, config.v_init) agent_g.update_model() genepool_g = GenePool(key='g_genes_vggmse') if config.clear_store: genepool_g.clear() num_episodes = 0 while True: print('Starting generator episode') reward = run_generator_episode(agent_g, vgg_features, dataloader, config) print('Reward {}'.format(reward, )) update_agent(agent_g, reward, genepool_g, config) num_episodes += 1
def main(config): environment = gym.make(config.env) state_shape = environment.observation_space.low.shape num_hidden = config.num_hidden num_actions = environment.action_space.n agent = Agent(MLP(state_shape[0], num_hidden, num_actions)) best_agent = Agent(MLP(state_shape[0], num_hidden, num_actions)) agent.randomize(config.gene_weight_ratio, config.freq_weight_ratio, config.v_init) agent.update_model() print(agent.genome) genepool = GenePool() if config.clear_store: genepool.clear() num_episodes = 0 while True: print('Starting episode {}'.format(num_episodes)) best_genomes = genepool.top_n(config.num_best) if not config.best: reward, steps = run_episode(agent, environment, config) print('Reward {} in {} steps'.format(reward, steps)) genepool.report_score(agent.genome, reward) update_agent(agent, reward, best_genomes, config) if best_genomes and np.random.uniform() < 0.1 or config.best: best_genome, _ = best_genomes[0] #random.choice(best_genomes) best_agent.load_genome(best_genome) best_agent.update_model() best_reward, steps = run_episode(best_agent, environment, config) if not config.best: genepool.report_score(best_agent.genome, best_reward) num_episodes += 1
def main(config): agent_d = Agent(make_model_d(), cuda=config.cuda) agent_g = Agent(make_model_g(), cuda=config.cuda) best_agent_d = Agent(make_model_d(), cuda=config.cuda) best_agent_g = Agent(make_model_g(), cuda=config.cuda) for agent in (agent_d, agent_g, best_agent_d, best_agent_g): agent.randomize(config.gene_weight_ratio, config.freq_weight_ratio, config.v_init) agent.update_model() genepool_d = GenePool(key='d_genes') genepool_g = GenePool(key='g_genes') if config.clear_store: genepool_d.clear() genepool_g.clear() num_episodes = 0 while True: print('Starting discriminator episode') reward = run_discriminator_episode(agent_d, best_agent_g, dataloader, config) print('Reward {}'.format(reward, )) best_genomes_d = update_agent(agent_d, reward, genepool_d, config) if best_genomes_d: # make sure the best is still the best best_agent_d.load_genome(random.choice(best_genomes_d)[0]) best_agent_d.update_model() reward = run_discriminator_episode(best_agent_d, best_agent_g, dataloader, config) genepool_d.report_score(best_agent_d.genome, reward) print('Starting generator episode') reward = run_generator_episode(best_agent_d, agent_g, dataloader, config) print('Reward {}'.format(reward, )) best_genomes_g = update_agent(agent_g, reward, genepool_g, config) if best_genomes_g: best_agent_g.load_genome(random.choice(best_genomes_g)[0]) best_agent_g.update_model() reward = run_generator_episode(best_agent_d, best_agent_g, dataloader, config) genepool_g.report_score(best_agent_g.genome, reward) num_episodes += 1 if num_episodes % config.save_every == 0 and config.render: # vutils.save_image(real_cpu, # '{}/real_samples.png'.format(opt.outf), # normalize=True) print('saving') fake = best_agent_g(fixed_noise) vutils.save_image(fake.data, '{}/fake_samples_epoch_.png'.format(opt.outf, ), normalize=True)