コード例 #1
0
    def run(self, agent_id):
        scoreboard = AgentScoreboard(self.config.num_agents,
                                     redis_params=self.config.redis_params)
        environment = gym.make(config.env)

        state_shape = (1, 100, 100)
        num_hidden = self.config.num_hidden
        num_actions = environment.action_space.n
        base_filters = 16
        model_class = dict(mlp=MLP, cnn=CNN)[self.config.model]
        model = model_class(state_shape, base_filters, num_hidden, num_actions)
        agent = Agent(model)

        if self.config.exhibition:
            self.exhibition(agent, environment, scoreboard)
        else:

            def run_episode_with_args(agent, env):
                def f(genome):
                    return self.run_episode(agent, genome, env)

                return f

            worker = Optimizer(config, model, agent_id, scoreboard)
            worker.run(run_episode_with_args(agent, environment))
コード例 #2
0
    def run(self):
        genepool = GenePool(redis_params=self.config.redis_params)
        environment = gym.make(config.env)

        state_shape = (1, 100, 100)
        num_hidden = self.config.num_hidden
        num_actions = environment.action_space.n
        base_filters = 16
        model_class = dict(mlp=MLP, cnn=CNN)[self.config.model]
        model = model_class(state_shape, base_filters, num_hidden, num_actions)
        agent = Agent(model)
        agent.randomize(config.gene_weight_ratio, config.freq_weight_ratio,
                        config.v_init)
        agent.update_model()
        print(agent.summary())

        if self.config.exhibition:
            self.exhibition(agent, environment, genepool)
        else:

            def run_episode_with_args(agent, env):
                def f(genome):
                    return self.run_episode(agent, genome, env)

                return f

            worker = Optimizer(config, model, agent, genepool)
            worker.run(run_episode_with_args(agent, environment))
コード例 #3
0
ファイル: vggmse.py プロジェクト: awentzonline/pytorch-cns
def main(config):
    agent_g = Agent(netG, cuda=config.cuda)
    agent_g.randomize(config.gene_weight_ratio, config.freq_weight_ratio,
                      config.v_init)
    agent_g.update_model()
    genepool_g = GenePool(key='g_genes_vggmse')
    if config.clear_store:
        genepool_g.clear()
    num_episodes = 0
    while True:
        print('Starting generator episode')
        reward = run_generator_episode(agent_g, vgg_features, dataloader,
                                       config)
        print('Reward {}'.format(reward, ))
        update_agent(agent_g, reward, genepool_g, config)
        num_episodes += 1
コード例 #4
0
ファイル: atari.py プロジェクト: awentzonline/pytorch-cns
def main(config):
    environment = gym.make(config.env)
    state_shape = environment.observation_space.low.shape
    num_hidden = config.num_hidden
    num_actions = environment.action_space.n
    agent = Agent(MLP(state_shape[0], num_hidden, num_actions))
    best_agent = Agent(MLP(state_shape[0], num_hidden, num_actions))
    agent.randomize(config.gene_weight_ratio, config.freq_weight_ratio,
                    config.v_init)
    agent.update_model()
    print(agent.genome)
    genepool = GenePool()
    if config.clear_store:
        genepool.clear()
    num_episodes = 0
    while True:
        print('Starting episode {}'.format(num_episodes))
        best_genomes = genepool.top_n(config.num_best)
        if not config.best:
            reward, steps = run_episode(agent, environment, config)
            print('Reward {} in {} steps'.format(reward, steps))
            genepool.report_score(agent.genome, reward)
            update_agent(agent, reward, best_genomes, config)
        if best_genomes and np.random.uniform() < 0.1 or config.best:
            best_genome, _ = best_genomes[0]  #random.choice(best_genomes)
            best_agent.load_genome(best_genome)
            best_agent.update_model()
            best_reward, steps = run_episode(best_agent, environment, config)
            if not config.best:
                genepool.report_score(best_agent.genome, best_reward)

        num_episodes += 1
コード例 #5
0
def main(config):
    agent_d = Agent(make_model_d(), cuda=config.cuda)
    agent_g = Agent(make_model_g(), cuda=config.cuda)
    best_agent_d = Agent(make_model_d(), cuda=config.cuda)
    best_agent_g = Agent(make_model_g(), cuda=config.cuda)
    for agent in (agent_d, agent_g, best_agent_d, best_agent_g):
        agent.randomize(config.gene_weight_ratio, config.freq_weight_ratio,
                        config.v_init)
        agent.update_model()
    genepool_d = GenePool(key='d_genes')
    genepool_g = GenePool(key='g_genes')
    if config.clear_store:
        genepool_d.clear()
        genepool_g.clear()
    num_episodes = 0
    while True:
        print('Starting discriminator episode')
        reward = run_discriminator_episode(agent_d, best_agent_g, dataloader,
                                           config)
        print('Reward {}'.format(reward, ))
        best_genomes_d = update_agent(agent_d, reward, genepool_d, config)
        if best_genomes_d:  # make sure the best is still the best
            best_agent_d.load_genome(random.choice(best_genomes_d)[0])
            best_agent_d.update_model()
            reward = run_discriminator_episode(best_agent_d, best_agent_g,
                                               dataloader, config)
            genepool_d.report_score(best_agent_d.genome, reward)
        print('Starting generator episode')
        reward = run_generator_episode(best_agent_d, agent_g, dataloader,
                                       config)
        print('Reward {}'.format(reward, ))
        best_genomes_g = update_agent(agent_g, reward, genepool_g, config)
        if best_genomes_g:
            best_agent_g.load_genome(random.choice(best_genomes_g)[0])
            best_agent_g.update_model()
            reward = run_generator_episode(best_agent_d, best_agent_g,
                                           dataloader, config)
            genepool_g.report_score(best_agent_g.genome, reward)
        num_episodes += 1
        if num_episodes % config.save_every == 0 and config.render:
            # vutils.save_image(real_cpu,
            #         '{}/real_samples.png'.format(opt.outf),
            #         normalize=True)
            print('saving')
            fake = best_agent_g(fixed_noise)
            vutils.save_image(fake.data,
                              '{}/fake_samples_epoch_.png'.format(opt.outf, ),
                              normalize=True)