def main(params) -> None: env = make_env(params.env_name, params.policy_type, params.max_episode_steps, params.env_obs_space_name) env.set_file_name("{}_{}".format(params.gradients[0], params.env_name)) simulation = Simulation(env, params.nb_trajs, params.update_threshold, params.nb_updates, params.batch_size, params.print_interval) simulation.rescale_reward = lambda reward: reward / 10 policy_loss_file, critic_loss_file = set_files(params.gradients[0], params.env_name) chrono = Chrono() for j in range(params.nb_repet): env.reinit() memory = ReplayBuffer() # Initialise the policy/actor policy = PolicyNet(params.lr_actor, params.init_alpha, params.lr_alpha, params.target_entropy_alpha) pw = PolicyWrapper(policy, params.policy_type, params.env_name, params.team_name, params.max_episode_steps) pw.duration_flag = False # Initialise the critics critic = DoubleQNet(params.lr_critic,params.gamma, params.tau) plot_policy(policy, env, True, params.env_name, params.study_name, '_ante_', j, plot=False) simulation.train(memory, pw, critic, policy_loss_file, critic_loss_file) plot_policy(policy, env, True, params.env_name, params.study_name, '_post_', j, plot=False) plot_critic(env, params.env_name, critic.q1, policy, params.study_name, '_q1_post_', j) plot_critic(env, params.env_name, critic.q2, policy, params.study_name, '_q2_post_', j) critic.q1.save_model('data/critics/{}#{}#SAC{}.pt'.format(params.env_name, params.team_name, str(j))) critic.q2.save_model('data/critics/{}#{}#SAC{}.pt'.format(params.env_name, params.team_name, str(j))) simulation.env.close() chrono.stop()
def study_cem(params, starting_pol=None) -> None: """ Start a study of CEM algorithms :param params: the parameters of the study :param starting_pol: initial policy :return: nothing """ assert params.policy_type in ['squashedGaussian', 'normal', 'beta'], 'unsupported policy type' chrono = Chrono() # cuda = torch.device('cuda') study = params.gradients if params.nb_trajs_cem is not None: params.nb_trajs = params.nb_trajs_cem simu = make_simu_from_params(params) for i in range(1): # len(study) Only sum here simu.env.set_file_name('cem' + study[i] + '_' + simu.env_name) print("study : ", study[i]) for j in range(params.nb_repet): simu.env.reinit() if params.policy_type == "squashedGaussian": policy = SquashedGaussianPolicy(simu.obs_size, 32, 64, 1) elif params.policy_type == "normal": policy = NormalPolicy(simu.obs_size, 32, 64, 1) elif params.policy_type == "beta": policy = BetaPolicy(simu.obs_size, 32, 64, 1) if starting_pol is not None: policy.set_weights(starting_pol[j]) pw = PolicyWrapper(policy, j, params.policy_type, simu.env_name, params.team_name, params.max_episode_steps) # plot_policy(policy, simu.env, True, simu.env_name, study[i], '_ante_', j, plot=False) simu.train_cem(pw, params, policy) # plot_policy(policy, simu.env, True, simu.env_name, study[i], '_post_', j, plot=False) chrono.stop()
def study_regress(params) -> None: assert params.policy_type in ['bernoulli', 'normal', 'squashedGaussian' ], 'unsupported policy type' chrono = Chrono() study = params.gradients simu = make_simu_from_params(params) for i in range(len(study)): simu.env.set_file_name(study[i] + '_' + simu.env_name) policy_loss_file, critic_loss_file = set_files(study[i], simu.env_name) print("study : ", study[i]) for j in range(params.nb_repet): simu.env.reinit() if params.policy_type == "bernoulli": policy = BernoulliPolicy(simu.obs_size, 24, 36, 1, params.lr_actor) elif params.policy_type == "normal": policy = NormalPolicy(simu.obs_size, 24, 36, 1, params.lr_actor) elif params.policy_type == "squashedGaussian": policy = SquashedGaussianPolicy(simu.obs_size, 24, 36, 1, params.lr_actor) pw = PolicyWrapper(policy, params.policy_type, simu.env_name, params.team_name, params.max_episode_steps) plot_policy(policy, simu.env, True, simu.env_name, study[i], '_ante_', j, plot=False) if not simu.discrete: act_size = simu.env.action_space.shape[0] critic = QNetworkContinuous(simu.obs_size + act_size, 24, 36, 1, params.lr_critic) else: critic = VNetwork(simu.obs_size, 24, 36, 1, params.lr_critic) # plot_critic(simu, critic, policy, study[i], '_ante_', j) regress(simu, policy, params.policy_type, 250, params.render) simu.train(pw, params, policy, critic, policy_loss_file, critic_loss_file, study[i]) plot_policy(policy, simu.env, True, simu.env_name, study[i], '_post_', j, plot=False) plot_critic(simu, critic, policy, study[i], '_post_', j) critic.save_model('data/critics/' + params.env_name + '#' + params.team_name + '#' + study[i] + str(j) + '.pt') chrono.stop()
def download(self, site_id, fields, date_beg, date_end, filename, show_chrono=True): url = ( self.base_url + "?" + "UrlTemplate=" + "%26columns={" + ','.join(fields) + "}" + "%26sort={-" + fields[0] + "}" #+ "%26segment=100068961" # Ce champ apparaissait pour les requêtes CA.fr + "%26space={s:" + str(site_id) + "}" #+ "%26period={R:{D:'-1'}}" # La veille + "%26period={D:{start:'" + date_beg + "',end:'" + date_end + "'}}" + "%26max-results=%23maxresult%23" + "%26page-num=%23pagenum%23" + "%26sep=dot" + "&UserId={}".format(self.cfg['user_id']) + "&UserLanguageId=1" + "&Filename=mycsv" + "&Space={\"sites\":[\"" + str(site_id) + "\"],\"group\":\"\"}") headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7", "Connection": "keep-alive", "Cookie": ("atuserid={}; ".format(self.fj(self.cfg['cookie']['atuserid'])) + "AnalyticsSuiteVersion=2; " + "atidvisitor={}; ".format( self.fj(self.cfg['cookie']['atidvisitor'])) + "ATToken=Token=" + cookie.FIND_COOKIE() + ";"), "Host": "exportsproxy.atinternet-solutions.com", "Referer": "https://apps.atinternet-solutions.com/DataQuery/Designer/", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36" # Yes! on imite vraiment tout! } #print (cookie.FIND_COOKIE()) #print (headers) if show_chrono: chn = Chrono().start() r = requests.get(url, headers=headers) if show_chrono: chn.stop() # Temps : 36s à 1min12 with open(filename, 'wt') as fout: fout.write(r.text)
def run_dyna_prog(): # walls = [7, 8, 9, 10, 21,27,30,31,32,33,45, 46, 47] # height = 6 # width = 9 # m = build_maze(width, height, walls) # maze-like MDP definition # m = create_maze(10, 10, 0.2) m.render() # plot_convergence_vi_pi(m, False) # print("value iteration V") cpt = Chrono() q, _, nbIter, nbUd = value_iteration_v(m, render=0) print(nbIter, nbUd) print(len(q)) cpt.stop()
def study_cem(params) -> None: """ Start a sum study of cem :param params: the parameters of the study :return: nothing """ assert params.policy_type in ['normal'], 'unsupported policy type' # cuda = torch.device('cuda') study = params.gradients simu = make_simu_from_params(params) simu.env.set_file_name(study[0] + '_' + simu.env_name) reward_file = None print("study : ", study) # defixed layers params.fix_layers = False print("cem study") # cem study chrono_cem = Chrono() for j in range(params.nb_repet): simu.env.reinit() if params.policy_type=="normal": policy = NormalPolicy(simu.obs_size, 24, 36, 1) pw = PolicyWrapper(policy, params.policy_type, simu.env_name, j,params.team_name, params.max_episode_steps) all_weights,all_rewards,all_pops,all_pops_scores,is_kept=simu.train(pw, params, policy, False, reward_file, "", study[0], 0, True) cem_time = chrono_cem.stop() return all_weights,all_rewards,all_pops,all_pops_scores,is_kept
print(policy_file) env_name, algo, team_name = read_name(policy_file) if env_name in self.score_dic: scores = get_scores(args, folder, policy_file, env_name, algo, stats_path, hyperparams, n_evals) self.score_dic[env_name][scores.mean()] = [team_name, algo, scores.std()] else: scores = get_scores(args, folder, policy_file, env_name, algo, stats_path, hyperparams, n_evals) tmp_dic = {scores.mean(): [team_name, algo, scores.std()]} self.score_dic[env_name] = tmp_dic def display_hall_of_fame(self) -> None: """ Display the hall of fame of all the evaluated policies :return: nothing """ print("Hall of fame") for env, dico in self.score_dic.items(): print("Environment :", env) for key, val in sorted(dico.items(),reverse=True): print("team: ", val[0], " \t \t algo:", val[1], " \t \t mean score: ", key, "std: ", val[2]) if __name__ == '__main__': directory = os.getcwd() + '/data/policies/' ev = Evaluator() c = Chrono() ev.load_policies(directory) ev.display_hall_of_fame() c.stop()
def study_pg(params) -> None: """ Start a study of the policy gradient algorithms :param params: the parameters of the study :return: nothing """ #### MODIF : added discrete assert params.policy_type in [ 'bernoulli', 'normal', 'squashedGaussian', 'discrete' ], 'unsupported policy type' #### chrono = Chrono() # cuda = torch.device('cuda') study = params.gradients simu = make_simu_from_params(params) for i in range(len(study)): simu.env.set_file_name(study[i] + '_' + simu.env_name) policy_loss_file, critic_loss_file = set_files(study[i], simu.env_name) print("study : ", study[i]) for j in range(params.nb_repet): simu.env.reinit() if params.policy_type == "bernoulli": policy = BernoulliPolicy(simu.obs_size, 100, 200, 1, params.lr_actor) #### MODIF : added the discrete policy elif params.policy_type == "discrete": if isinstance(simu.env.action_space, gym.spaces.box.Box): nb_actions = int(simu.env.action_space.high[0] - simu.env.action_space.low[0] + 1) print( "Error : environment action space is not discrete :" + str(simu.env.action_space)) else: nb_actions = simu.env.action_space.n policy = DiscretePolicy(simu.obs_size, 24, 36, nb_actions, params.lr_actor) #### elif params.policy_type == "normal": policy = NormalPolicy(simu.obs_size, 100, 200, 1, params.lr_actor) elif params.policy_type == "squashedGaussian": policy = SquashedGaussianPolicy(simu.obs_size, 100, 200, 1, params.lr_actor) elif params.policy_type == "DDPG": policy = DDPG(simu.obs_size, 24, 36, 1, params.lr_actor) # policy = policy.cuda() pw = PolicyWrapper(policy, params.policy_type, simu.env_name, params.team_name, params.max_episode_steps) plot_policy(policy, simu.env, True, simu.env_name, study[i], '_ante_', j, plot=False) if not simu.discrete: act_size = simu.env.action_space.shape[0] critic = QNetworkContinuous(simu.obs_size + act_size, 24, 36, 1, params.lr_critic) else: critic = VNetwork(simu.obs_size, 24, 36, 1, params.lr_critic) # plot_critic(simu, critic, policy, study[i], '_ante_', j) simu.train(pw, params, policy, critic, policy_loss_file, critic_loss_file, study[i]) plot_policy(policy, simu.env, True, simu.env_name, study[i], '_post_', j, plot=False) if False: if params.policy_type == "normal": plot_normal_histograms(policy, j, simu.env_name) else: plot_weight_histograms(policy, j, simu.env_name) plot_critic(simu, critic, policy, study[i], '_post_', j) critic.save_model('data/critics/' + params.env_name + '#' + params.team_name + '#' + study[i] + str(j) + '.pt') chrono.stop()
mydir = "/sdiles/ubuntu/sdiles" myext = ["nc"] paths = [] for file in lpw.getDirectoryContent(mydir): if lpw.checkExtention(myext, file): paths.append(file) for i in range(50,len(paths)): timer.start() md5 = getMD5(paths[i]) timer.stop() print i, ") md5 for ",pathLeaf(paths[i],ext=True), " computed in: ", timer.formatted() src = ncOpen(paths[i], mode='r') bb = ncg.getBoundingBox(src['lat'],src['lon']) z = ncg.getZoomLevel(src['lat'],src['lon']) ncs = NetcdfSlicer(src) timer.reset() timer.start() ndiles = ncs.createDiles("/sdiles/ubuntu/diles/"+md5+"/"+pathLeaf(paths[i],ext=False),bb,int(z)) timer.stop()