def main(params) -> None:
    env = make_env(params.env_name, params.policy_type, params.max_episode_steps, params.env_obs_space_name)
    env.set_file_name("{}_{}".format(params.gradients[0], params.env_name))

    simulation = Simulation(env, params.nb_trajs, params.update_threshold, params.nb_updates, params.batch_size,
                            params.print_interval)
    simulation.rescale_reward = lambda reward: reward / 10

    policy_loss_file, critic_loss_file = set_files(params.gradients[0], params.env_name)

    chrono = Chrono()

    for j in range(params.nb_repet):
        env.reinit()
        memory = ReplayBuffer()

        # Initialise the policy/actor
        policy = PolicyNet(params.lr_actor, params.init_alpha, params.lr_alpha, params.target_entropy_alpha)
        pw = PolicyWrapper(policy, params.policy_type, params.env_name, params.team_name, params.max_episode_steps)
        pw.duration_flag = False
        # Initialise the critics
        critic = DoubleQNet(params.lr_critic,params.gamma, params.tau)

        plot_policy(policy, env, True, params.env_name, params.study_name, '_ante_', j, plot=False)

        simulation.train(memory, pw, critic, policy_loss_file, critic_loss_file)

        plot_policy(policy, env, True, params.env_name, params.study_name, '_post_', j, plot=False)
        plot_critic(env, params.env_name, critic.q1, policy, params.study_name, '_q1_post_', j)
        plot_critic(env, params.env_name, critic.q2, policy, params.study_name, '_q2_post_', j)
        critic.q1.save_model('data/critics/{}#{}#SAC{}.pt'.format(params.env_name, params.team_name, str(j)))
        critic.q2.save_model('data/critics/{}#{}#SAC{}.pt'.format(params.env_name, params.team_name, str(j)))

    simulation.env.close()
    chrono.stop()
Ejemplo n.º 2
0
def study_cem(params, starting_pol=None) -> None:
    """
    Start a study of CEM algorithms
    :param params: the parameters of the study
    :param starting_pol: initial policy
    :return: nothing
    """
    assert params.policy_type in ['squashedGaussian', 'normal',
                                  'beta'], 'unsupported policy type'
    chrono = Chrono()
    # cuda = torch.device('cuda')
    study = params.gradients
    if params.nb_trajs_cem is not None:
        params.nb_trajs = params.nb_trajs_cem
    simu = make_simu_from_params(params)
    for i in range(1):  # len(study) Only sum here
        simu.env.set_file_name('cem' + study[i] + '_' + simu.env_name)
        print("study : ", study[i])
        for j in range(params.nb_repet):
            simu.env.reinit()
            if params.policy_type == "squashedGaussian":
                policy = SquashedGaussianPolicy(simu.obs_size, 32, 64, 1)
            elif params.policy_type == "normal":
                policy = NormalPolicy(simu.obs_size, 32, 64, 1)
            elif params.policy_type == "beta":
                policy = BetaPolicy(simu.obs_size, 32, 64, 1)
            if starting_pol is not None:
                policy.set_weights(starting_pol[j])
            pw = PolicyWrapper(policy, j, params.policy_type, simu.env_name,
                               params.team_name, params.max_episode_steps)
            # plot_policy(policy, simu.env, True, simu.env_name, study[i], '_ante_', j, plot=False)
            simu.train_cem(pw, params, policy)
            # plot_policy(policy, simu.env, True, simu.env_name, study[i], '_post_', j, plot=False)
    chrono.stop()
def study_regress(params) -> None:
    assert params.policy_type in ['bernoulli', 'normal', 'squashedGaussian'
                                  ], 'unsupported policy type'
    chrono = Chrono()
    study = params.gradients
    simu = make_simu_from_params(params)
    for i in range(len(study)):
        simu.env.set_file_name(study[i] + '_' + simu.env_name)
        policy_loss_file, critic_loss_file = set_files(study[i], simu.env_name)
        print("study : ", study[i])
        for j in range(params.nb_repet):
            simu.env.reinit()
            if params.policy_type == "bernoulli":
                policy = BernoulliPolicy(simu.obs_size, 24, 36, 1,
                                         params.lr_actor)
            elif params.policy_type == "normal":
                policy = NormalPolicy(simu.obs_size, 24, 36, 1,
                                      params.lr_actor)
            elif params.policy_type == "squashedGaussian":
                policy = SquashedGaussianPolicy(simu.obs_size, 24, 36, 1,
                                                params.lr_actor)
            pw = PolicyWrapper(policy, params.policy_type, simu.env_name,
                               params.team_name, params.max_episode_steps)
            plot_policy(policy,
                        simu.env,
                        True,
                        simu.env_name,
                        study[i],
                        '_ante_',
                        j,
                        plot=False)

            if not simu.discrete:
                act_size = simu.env.action_space.shape[0]
                critic = QNetworkContinuous(simu.obs_size + act_size, 24, 36,
                                            1, params.lr_critic)
            else:
                critic = VNetwork(simu.obs_size, 24, 36, 1, params.lr_critic)
            # plot_critic(simu, critic, policy, study[i], '_ante_', j)

            regress(simu, policy, params.policy_type, 250, params.render)
            simu.train(pw, params, policy, critic, policy_loss_file,
                       critic_loss_file, study[i])
            plot_policy(policy,
                        simu.env,
                        True,
                        simu.env_name,
                        study[i],
                        '_post_',
                        j,
                        plot=False)
            plot_critic(simu, critic, policy, study[i], '_post_', j)
            critic.save_model('data/critics/' + params.env_name + '#' +
                              params.team_name + '#' + study[i] + str(j) +
                              '.pt')
    chrono.stop()
    def download(self,
                 site_id,
                 fields,
                 date_beg,
                 date_end,
                 filename,
                 show_chrono=True):

        url = (
            self.base_url + "?" + "UrlTemplate=" + "%26columns={" +
            ','.join(fields) + "}" + "%26sort={-" + fields[0] + "}"
            #+ "%26segment=100068961" # Ce champ apparaissait pour les requêtes CA.fr
            + "%26space={s:" + str(site_id) + "}"
            #+ "%26period={R:{D:'-1'}}" # La veille
            + "%26period={D:{start:'" + date_beg + "',end:'" + date_end +
            "'}}" + "%26max-results=%23maxresult%23" +
            "%26page-num=%23pagenum%23" + "%26sep=dot" +
            "&UserId={}".format(self.cfg['user_id']) + "&UserLanguageId=1" +
            "&Filename=mycsv" + "&Space={\"sites\":[\"" + str(site_id) +
            "\"],\"group\":\"\"}")

        headers = {
            "Accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
            "Accept-Encoding":
            "gzip, deflate, br",
            "Accept-Language":
            "fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7",
            "Connection":
            "keep-alive",
            "Cookie":
            ("atuserid={}; ".format(self.fj(self.cfg['cookie']['atuserid'])) +
             "AnalyticsSuiteVersion=2; " + "atidvisitor={}; ".format(
                 self.fj(self.cfg['cookie']['atidvisitor'])) +
             "ATToken=Token=" + cookie.FIND_COOKIE() + ";"),
            "Host":
            "exportsproxy.atinternet-solutions.com",
            "Referer":
            "https://apps.atinternet-solutions.com/DataQuery/Designer/",
            "Upgrade-Insecure-Requests":
            "1",
            "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36"  # Yes! on imite vraiment tout!
        }

        #print (cookie.FIND_COOKIE())
        #print (headers)

        if show_chrono:
            chn = Chrono().start()
        r = requests.get(url, headers=headers)
        if show_chrono:
            chn.stop()  # Temps : 36s à 1min12

        with open(filename, 'wt') as fout:
            fout.write(r.text)
Ejemplo n.º 5
0
def run_dyna_prog():
    # walls = [7, 8, 9, 10, 21,27,30,31,32,33,45, 46, 47]
    # height = 6
    # width = 9
    #
    m = build_maze(width, height, walls)  # maze-like MDP definition
    # m = create_maze(10, 10, 0.2)
    m.render()
    # plot_convergence_vi_pi(m, False)
    #
    print("value iteration V")
    cpt = Chrono()
    q, _, nbIter, nbUd = value_iteration_v(m, render=0)
    print(nbIter, nbUd)
    print(len(q))
    cpt.stop()
Ejemplo n.º 6
0
def study_cem(params) -> None:
    """
    Start a sum study of cem
    :param params: the parameters of the study
    :return: nothing
    """

    assert params.policy_type in ['normal'], 'unsupported policy type'
    # cuda = torch.device('cuda')
    study = params.gradients
    simu = make_simu_from_params(params)
    simu.env.set_file_name(study[0] + '_' + simu.env_name)
    reward_file = None
    print("study : ", study)

    # defixed layers
    params.fix_layers = False

    print("cem study") # cem study
    chrono_cem = Chrono()
    for j in range(params.nb_repet):
        simu.env.reinit()
        if params.policy_type=="normal":
            policy = NormalPolicy(simu.obs_size, 24, 36, 1)
        pw = PolicyWrapper(policy, params.policy_type, simu.env_name, j,params.team_name, params.max_episode_steps)
        all_weights,all_rewards,all_pops,all_pops_scores,is_kept=simu.train(pw, params, policy, False, reward_file, "", study[0], 0, True)
    cem_time = chrono_cem.stop()
    return all_weights,all_rewards,all_pops,all_pops_scores,is_kept
Ejemplo n.º 7
0
            print(policy_file)
            env_name, algo, team_name = read_name(policy_file)

            if env_name in self.score_dic:
                scores = get_scores(args, folder, policy_file, env_name, algo, stats_path, hyperparams, n_evals)
                self.score_dic[env_name][scores.mean()] = [team_name, algo, scores.std()]
            else:
                scores = get_scores(args, folder, policy_file, env_name, algo, stats_path, hyperparams, n_evals)
                tmp_dic = {scores.mean(): [team_name, algo, scores.std()]}
                self.score_dic[env_name] = tmp_dic

    def display_hall_of_fame(self) -> None:
        """
        Display the hall of fame of all the evaluated policies
        :return: nothing
        """
        print("Hall of fame")
        for env, dico in self.score_dic.items():
            print("Environment :", env)
            for key, val in sorted(dico.items(),reverse=True):
                print("team: ", val[0], " \t \t algo:", val[1], " \t \t mean score: ", key, "std: ", val[2])


if __name__ == '__main__':
    directory = os.getcwd() + '/data/policies/'
    ev = Evaluator()
    c = Chrono()
    ev.load_policies(directory)
    ev.display_hall_of_fame()
    c.stop()
def study_pg(params) -> None:
    """
    Start a study of the policy gradient algorithms
    :param params: the parameters of the study
    :return: nothing
    """
    #### MODIF : added discrete
    assert params.policy_type in [
        'bernoulli', 'normal', 'squashedGaussian', 'discrete'
    ], 'unsupported policy type'
    ####
    chrono = Chrono()
    # cuda = torch.device('cuda')
    study = params.gradients
    simu = make_simu_from_params(params)
    for i in range(len(study)):
        simu.env.set_file_name(study[i] + '_' + simu.env_name)
        policy_loss_file, critic_loss_file = set_files(study[i], simu.env_name)
        print("study : ", study[i])
        for j in range(params.nb_repet):
            simu.env.reinit()
            if params.policy_type == "bernoulli":
                policy = BernoulliPolicy(simu.obs_size, 100, 200, 1,
                                         params.lr_actor)
            #### MODIF : added the discrete policy
            elif params.policy_type == "discrete":
                if isinstance(simu.env.action_space, gym.spaces.box.Box):
                    nb_actions = int(simu.env.action_space.high[0] -
                                     simu.env.action_space.low[0] + 1)
                    print(
                        "Error : environment action space is not discrete :" +
                        str(simu.env.action_space))
                else:
                    nb_actions = simu.env.action_space.n
                policy = DiscretePolicy(simu.obs_size, 24, 36, nb_actions,
                                        params.lr_actor)
            ####
            elif params.policy_type == "normal":
                policy = NormalPolicy(simu.obs_size, 100, 200, 1,
                                      params.lr_actor)
            elif params.policy_type == "squashedGaussian":
                policy = SquashedGaussianPolicy(simu.obs_size, 100, 200, 1,
                                                params.lr_actor)
            elif params.policy_type == "DDPG":
                policy = DDPG(simu.obs_size, 24, 36, 1, params.lr_actor)
            # policy = policy.cuda()
            pw = PolicyWrapper(policy, params.policy_type, simu.env_name,
                               params.team_name, params.max_episode_steps)
            plot_policy(policy,
                        simu.env,
                        True,
                        simu.env_name,
                        study[i],
                        '_ante_',
                        j,
                        plot=False)

            if not simu.discrete:
                act_size = simu.env.action_space.shape[0]
                critic = QNetworkContinuous(simu.obs_size + act_size, 24, 36,
                                            1, params.lr_critic)
            else:
                critic = VNetwork(simu.obs_size, 24, 36, 1, params.lr_critic)
            # plot_critic(simu, critic, policy, study[i], '_ante_', j)

            simu.train(pw, params, policy, critic, policy_loss_file,
                       critic_loss_file, study[i])
            plot_policy(policy,
                        simu.env,
                        True,
                        simu.env_name,
                        study[i],
                        '_post_',
                        j,
                        plot=False)
            if False:
                if params.policy_type == "normal":
                    plot_normal_histograms(policy, j, simu.env_name)
                else:
                    plot_weight_histograms(policy, j, simu.env_name)
        plot_critic(simu, critic, policy, study[i], '_post_', j)
        critic.save_model('data/critics/' + params.env_name + '#' +
                          params.team_name + '#' + study[i] + str(j) + '.pt')
    chrono.stop()
Ejemplo n.º 9
0
	mydir = "/sdiles/ubuntu/sdiles"
	myext = ["nc"] 

	paths = []
	for file in lpw.getDirectoryContent(mydir):
		if lpw.checkExtention(myext, file):
			paths.append(file)


	for i in range(50,len(paths)):
		
		
		timer.start()
		md5 = getMD5(paths[i])
		timer.stop()

		print i, ") md5 for ",pathLeaf(paths[i],ext=True), " computed in: ", timer.formatted()
		
		src = ncOpen(paths[i], mode='r')
		
		bb  = ncg.getBoundingBox(src['lat'],src['lon'])
		z 	= ncg.getZoomLevel(src['lat'],src['lon'])

		ncs = NetcdfSlicer(src)

		timer.reset()

		timer.start()
		ndiles = ncs.createDiles("/sdiles/ubuntu/diles/"+md5+"/"+pathLeaf(paths[i],ext=False),bb,int(z))
		timer.stop()