def save_checkpoint(self): """Persist checkpoint information""" # the history loss utils.plot_scores(self.checkpoint_prefix + "_actor_loss.png", self.actor_loss_episodes, label="loss") # network torch.save(self.actor.state_dict(), self.checkpoint_prefix + "_actor.pth")
def ubm(features, label_dict): runs_eer = [] runs_hter = [] for experiment_i in range(5): train_set, development_set, test_set, train_dev_set = utils.shuffle_split_data( features, label_dict) train_x, train_y, development_x, development_y, test_x, test_y, train_dev_x, train_dev_y = utils.get_datasets( train_set, development_set, test_set, train_dev_set) nb_of_components = 11 nb_of_components_background = 15 all_gmms = build_GMMs(train_set, nb_of_components, label_dict) all_ubms = build_UBMs(train_set, nb_of_components_background, label_dict) dist_matrix = compute_dist_matrix_with_ubm(development_x, all_gmms, all_ubms, label_dict) cur_eers, cur_thresholds = compute_eer_client_threshold( dist_matrix, development_y, label_dict) runs_eer.append(np.mean(cur_eers)) if experiment_i == 0: utils.plot_scores(dist_matrix, development_y, "Second Section", "e2", label_dict) frr_list, far_list, threshold_list = compute_frr_far_list( dist_matrix, development_y, label_dict) utils.plot_far_frr(frr_list, far_list, threshold_list, "Second Section", "e2") print(f"Client thresholds:{np.array(cur_thresholds)}") all_gmms = build_GMMs(train_dev_set, nb_of_components, label_dict) all_ubms = build_UBMs(train_dev_set, nb_of_components_background, label_dict) dist_matrix = compute_dist_matrix_with_ubm(test_x, all_gmms, all_ubms, label_dict) client_hters = [] for i in range(len(label_dict)): cur_dm = dist_matrix[:, i] genuine_indexes = (test_y == i) client_threshold = cur_thresholds[i] cur_frr, cur_far = compute_frr_far_client(cur_dm, genuine_indexes, client_threshold) client_hters.append((cur_frr + cur_far) / 2) cur_hter = np.mean(client_hters) runs_hter.append(cur_hter) print(f"EERs:{np.array(runs_eer)}, HTERs:{np.array(runs_hter)}") print( f"Average EER:{np.array(runs_eer).mean():.4f}, std:{np.array(runs_eer).std():.4f}" ) print( f"Average HTER:{np.array(runs_hter).mean():.4f}, std:{np.array(runs_hter).std():.4f}" )
def gmm_global_threshold(features, label_dict): runs_eer = [] runs_hter = [] for experiment_i in range(5): train_set, development_set, test_set, train_dev_set = utils.shuffle_split_data( features, label_dict) train_x, train_y, development_x, development_y, test_x, test_y, train_dev_x, train_dev_y = utils.get_datasets( train_set, development_set, test_set, train_dev_set) nb_of_components = 11 all_gmms = build_GMMs(train_set, nb_of_components, label_dict) dist_matrix = compute_dist_matrix(development_x, all_gmms, label_dict) cur_eer, cur_threshold = compute_eer(dist_matrix, development_y, label_dict) runs_eer.append(cur_eer) if experiment_i == 0: utils.plot_scores(dist_matrix, development_y, "First Section", "e1", label_dict) frr_list, far_list, threshold_list = compute_frr_far_list( dist_matrix, development_y, label_dict) utils.plot_far_frr(frr_list, far_list, threshold_list, "First Section", "e1") print(f"Threshold:{cur_threshold}") all_gmms = build_GMMs(train_dev_set, nb_of_components, label_dict) dist_matrix = compute_dist_matrix(test_x, all_gmms, label_dict) cur_frr, cur_far = compute_frr_far(dist_matrix, test_y, cur_threshold, label_dict) cur_hter = (cur_frr + cur_far) / 2 runs_hter.append(cur_hter) print(f"EERs:{np.array(runs_eer)}, HTERs:{np.array(runs_hter)}") print( f"Average EER:{np.array(runs_eer).mean():.4f}, std:{np.array(runs_eer).std():.4f}" ) print( f"Average HTER:{np.array(runs_hter).mean():.4f}, std:{np.array(runs_hter).std():.4f}" )
def train_agent(): plot_scores = [] plot_mean_scores = [] total_score = 0 record = 0 agent = Agent() game = SnakeGame() while True: # get old state state_old = agent.get_state(game) # get move final_move = agent.get_action(state_old) # perform move and get new state reward, done, score = game.play_step(final_move) state_new = agent.get_state(game) # train short memory agent.train_short_memory(state_old, final_move, reward, state_new, done) # remember agent.remember(state_old, final_move, reward, state_new, done) if done: game.reset() agent.num_games += 1 agent.train_long_memory() if score > record: record = score agent.model.save() print(f'Game: {agent.num_games}, Score: {score}, Record: {record}') plot_scores.append(score) total_score += score mean_score = total_score / agent.num_games plot_mean_scores.append(mean_score) plot_scores(plot_scores, plot_mean_scores)
def train( n_episodes, max_t, env_fp, no_graphics, seed, save_every_nth, buffer_size, batch_size, gamma, tau, lr_actor, lr_critic, weight_decay, log, ): log.info("#### Initializing environment...") # init environment env = UnityEnvironment(file_name=env_fp, no_graphics=no_graphics) # get the default brain brain_name = env.brain_names[0] brain = env.brains[brain_name] # reset the environment env_info = env.reset(train_mode=True)[brain_name] # number of agents num_agents = len(env_info.agents) log.info(f"Number of agents: {num_agents}") # size of each action action_size = brain.vector_action_space_size log.info(f"Size of each action: {action_size}") # examine the state space states = env_info.vector_observations state_size = states.shape[1] log.info( f"There are {states.shape[0]} agents. Each observes a state with length: {state_size}" ) log.info(f"The state for the first agent looks like: {states[0]}") agent = Agent( num_agents=len(env_info.agents), state_size=state_size, action_size=action_size, buffer_size=buffer_size, batch_size=batch_size, gamma=gamma, tau=tau, lr_actor=lr_actor, lr_critic=lr_critic, weight_decay=weight_decay, random_seed=seed, ) log.info("#### Training...") scores_deque = deque(maxlen=100) scores = [] for i_episode in range(1, n_episodes + 1): brain_name = env.brain_names[0] env_info = env.reset(train_mode=True)[brain_name] states = env_info.vector_observations agent.reset() score = np.zeros((len(env_info.agents), 1)) for t in range(max_t): actions = agent.act(states) env_info = env.step(actions)[brain_name] next_states = env_info.vector_observations rewards = env_info.rewards rewards = np.array(rewards).reshape((next_states.shape[0], 1)) dones = env_info.local_done dones = np.array(dones).reshape((next_states.shape[0], 1)) agent.step(states, actions, rewards, next_states, dones) score += rewards states = next_states if np.any(dones): break scores_deque.append(np.mean(score)) scores.append(np.mean(score)) print( "Episode {}\tAverage Score: {:.2f}\tScore: {:.2f}".format( i_episode, np.mean(scores_deque), scores[-1]), end="\r", ) if i_episode % 100 == 0: print("\rEpisode {}\tAverage Score: {:.2f}".format( i_episode, np.mean(scores_deque))) if i_episode % save_every_nth == 0: save_checkpoint( state={ "episode": i_episode, "actor_state_dict": agent.actor_local.state_dict(), "critic_state_dict": agent.critic_local.state_dict(), "scores_deque": scores_deque, "scores": scores, }, filename="checkpoint.pth", ) plot_scores( scores=scores, title=f"Avg score over {len(env_info.agents)} agents", fname="avg_scores.png", savefig=True, ) if np.mean(scores_deque) >= 30: torch.save(agent.actor_local.state_dict(), "checkpoint_actor.pth") torch.save(agent.critic_local.state_dict(), "checkpoint_critic.pth") print( "\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}" .format(i_episode - 100, np.mean(scores_deque))) break
return model def get_minibatch_grad(model, X_train, y_train): xs, hs, errs = [], [], [] for x, cls_idx in zip(X_train, y_train): h, y_pred = forward(x, model) y_true = np.zeros(n_class) y_true[int(cls_idx)] = 1. err = y_true - y_pred # Accumulate the informations of minibatch # x: input # h: hidden state # err: gradient of output layer xs.append(x) hs.append(h) errs.append(err) # Backprop using the informations we get from the current minibatch return backward(model, np.array(xs), np.array(hs), np.array(errs)) model = make_network() trained_model, accuracy_scores = SGD_Optimizer(model, X_train, y_train, minibatch_size) utils.plot_scores(accuracy_scores)
batch_size = 50 num_epochs = 100 num_classes = 2 hidden_units = 100 hidden_units2 = 10 dimensions = 2 # PeaksData da, SwissRollData, GMMData X_train, y_train, X_test, y_test = utils.get_data('PeaksData') X_train, y_train = shuffle(X_train, y_train) # gradient and jacobian tests grad_test_W(X_train, y_train) grad_test_b(X_train, y_train) jacobian_test_W(X_train, y_train) jacobian_test_b(X_train, y_train) grad_test_W_whole_network(X_train, y_train) grad_test_b_whole_network(X_train, y_train) model = models.MyNeuralNetwork() model.add(layers.Linear(dimensions, hidden_units)) model.add(activations.ReLU()) model.add(layers.Softmax(hidden_units, 5)) optimizer = optimizers.SGD(model.parameters, lr=0.1) losses, train_accuracy, test_accuracy = model.fit(X_train, y_train, X_test, y_test, batch_size, num_epochs, optimizer) # plotting utils.plot_scores(train_accuracy, test_accuracy)
def dqn_algorithm(agent, env, brain_name, max_n_episodes=2000, max_n_steps=1000, epsilon_start=1.0, epsilon_min=0.01, epsilon_decay_rate=0.995): """Deep Q-Learning Agent. Parameters ---------- max_n_episodes : int Maximum number of training episodes max_n_steps : int Maximum number of steps per episode epsilon_start : float Starting value of epsilon, for epsilon-greedy action selection epsilon_min : float) Minimum value of epsilon epsilon_decay_rate : float Multiplicative factor (per episode) for decreasing epsilon """ all_scores = [] last_100_scores = deque(maxlen=100) last_100_scores_rolling_means = [] epsilon = epsilon_start # loop through episodes is_game_over = False episode_count = 1 while not is_game_over: # observe state and initialize score state = env.reset(train_mode=True)[brain_name].vector_observations[0] score = 0 # loop through steps within each episode is_episode_over = False agent.t_step = 1 while not is_episode_over: # pick action action = agent.act(state, epsilon) # observe updated environment, reward and next state updated_env = env.step(action)[brain_name] next_state = updated_env.vector_observations[0] reward = updated_env.rewards[0] is_episode_over = updated_env.local_done[0] # update next state and add reward from step to episode score agent.step(state, action, reward, next_state, is_episode_over) state = next_state score += reward # if episode is over or max_n_steps reached, end loop # otherwise, do one more step is_episode_over = is_episode_over or (agent.t_step >= max_n_steps) agent.t_step += 1 # anneal epsilon epsilon = max(epsilon_min, epsilon_decay_rate * epsilon) # keep track of most recent score last_100_scores.append(score) all_scores.append(score) last_100_scores_mean = np.mean(last_100_scores) last_100_scores_rolling_means.append(last_100_scores_mean) plot_scores(all_scores, last_100_scores_rolling_means, episode_count, agent.buffer_size, agent.batch_size, agent.gamma, agent.tau, agent.lr, agent.update_every, agent.qnetwork_local) print('\rEpisode {}\tAverage Score: {:.2f}'.format( episode_count, last_100_scores_mean), end="") completed_100_episodes = episode_count % 100 == 0 if completed_100_episodes: print('\rEpisode {}\tAverage Score: {:.2f}'.format( episode_count, last_100_scores_mean)) is_problem_solved = last_100_scores_mean >= 13.0 if is_problem_solved: print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'\ .format(episode_count, last_100_scores_mean)) torch.save(agent.qnetwork_local.state_dict(), 'weights.pth') # if problem solved or max_n_episodes reached, end loop # otherwise, play one more episode is_game_over = is_problem_solved or (episode_count >= max_n_episodes) episode_count += 1
def main(): args = parse_arguments(sys.argv[1:]) print("Parameters:") for arg_ in args.sys_args: print(arg_) print() # read data # ========= hapt_data = data.HAPT() hapt_data.load_all_data() hapt_data.aggregate_groups() exp_data = hapt_data.get_train_data() exp_labs = hapt_data.get_train_labels() exp_labels_map = hapt_data.get_labels_map() exp_centroids_num = len(hapt_data.get_labels_map()) if args.data == "test": exp_data = hapt_data.get_test_data() exp_labs = hapt_data.get_test_labels() exp_centroids_num = len(hapt_data.get_labels_map()) if args.aggregate: exp_labs = hapt_data.get_aggregated_train_labels() exp_labels_map = hapt_data.get_aggregated_labels_map() exp_centroids_num = len(hapt_data.get_aggregated_labels_map()) if args.data == "test": exp_labs = hapt_data.get_aggregated_test_labels() # Show experiment data # ==================== if args.showdata: utils.plot_clusters(exp_data, exp_labs, exp_labels_map, True) return # evolution # ========= iterations_list, scores_list, populations_list, total_time_list, log_dir_list, best_indiv_idx_list = [],[],[],[],[],[] best_overall = (-1, 0, 0, 0 ) # score, experiment, generation (iteration), individual for exp_i in range(args.repeat): iterations, scores, populations, total_time, log_dir, best_indiv_idx = evolution.run_SGA( args.iter_num, exp_data, exp_labs, args.pop_num, args.prob_cross, args.prob_mutation, exp_centroids_num, args.adapt_function, args.dist_measure, log_dir="logs", loggin_pref="exp {}/{}: ".format(exp_i + 1, args.repeat)) cur_best_score = scores[best_indiv_idx[0], best_indiv_idx[1]] if best_overall[0] < cur_best_score: best_overall = (cur_best_score, exp_i, best_indiv_idx[0], best_indiv_idx[1]) iterations_list.append(iterations) scores_list.append(scores) populations_list.append(populations) total_time_list.append(total_time) log_dir_list.append(log_dir) best_indiv_idx_list.append(best_indiv_idx) # save plot plot_tuple = ("pop:" + str(args.pop_num), "p_c:" + str(args.prob_cross), "p_m:" + str(args.prob_mutation), "data size:" + str(len(exp_labs)), args.adapt_function, args.dist_measure) utils.plot_scores(iterations, scores, args.adapt_function, plot_tuple, to_file=True, out_dir=log_dir) # visualize # ========= if 1 < args.repeat: plot_tuple = ("pop:" + str(args.pop_num), "p_c:" + str(args.prob_cross), "p_m:" + str(args.prob_mutation), "data size:" + str(len(exp_labs)), args.adapt_function, args.dist_measure) utils.plot_avg_scores(iterations_list, scores_list, args.adapt_function, best_indiv_idx_list, plot_tuple, to_file=True, out_dirs=log_dir_list)
reward = torch.tensor([reward], device=device) # Observe new state if not done: next_state = get_screen(env).to(device) else: next_state = None # Store the transition in memory memory.push(state, action, next_state, reward) # Move to the next state state = next_state # Perform one step of the optimization (on the target network) optimize_model(device, pred_net, target_net, optimizer, memory) steps += 1 if steps == TARGET_UPDATE: # update the target net weights steps = 0 target_net.load_state_dict(pred_net.state_dict()) plot_scores(episode_rewards) print('Done') env.render() env.close() plt.ioff() plt.show()
def main(): args = parse_arguments(sys.argv[1:]) # read params # =========== # possible params: # iter_num, pop_num, centers_num, prob_cross, prob_mutation, data shape, labs shape, # adapt_function, dist_measure, log_dir, best score, best score (index), total_time exp_params = {} text_file = [f for f in os.listdir(args.path) if f.endswith(".txt")][0] with open(os.path.join(args.path, text_file), "r") as text_f: for line in text_f: line = line.replace("\t", "").strip().split(":") if len(line) == 2 and line[0] != "" and line[1] != "": if line[0] == "iter_num" or line[0] == "pop_num" or line[ 0] == "centers_num": exp_params[line[0].replace(" ", "_")] = int(line[1]) elif line[0] == "prob_cross" or line[ 0] == "prob_mutation" or line[0] == "best score": exp_params[line[0].replace(" ", "_")] = float(line[1]) elif line[0] == "data shape" or line[0] == "labs shape": exp_params[line[0].replace(" ", "_")] = make_tuple(line[1]) elif line[0] == "best score (index)": #best score (index): generation 95, individual 99 line[1] = line[1].strip().split(",") exp_params["best_index"] = ( int(line[1][0].strip().split(" ")[1]), int(line[1][1].strip().split(" ")[1])) else: exp_params[line[0].replace(" ", "_")] = line[1] print("\nexperiment parameters were:") for k, v in exp_params.items(): print("{:20}: {}".format(k, v)) # read results # ============ generations = np.load(os.path.join(args.path, "generations.npy")) iterations = np.load(os.path.join(args.path, "iterations.npy")) scores = np.load(os.path.join(args.path, "scores.npy")) best_centers = generations[exp_params["best_index"][0], exp_params["best_index"][1]] print("\nobtained results are:") print( "generations (total num, pop size, centrs num, feats num): {}".format( generations.shape)) print( "iterations (iterations num, ): {}".format( iterations.shape)) print( "scores (total num, pop size): {}".format( scores.shape)) print( "generations total num, iterations num and scores total num must be equal!" ) print("generations pop size and scores pop size must be equal too!") plot_tuple = ("pop:" + str(exp_params["pop_num"]), "p_c:" + str(exp_params["prob_cross"]), "p_m:" + str(exp_params["prob_mutation"]), "data size:" + str(len(exp_params["data_shape"])), exp_params["adapt_function"], exp_params["dist_measure"], "best score:" + str(exp_params["best_score"])[:9] + " at " + str(exp_params["best_index"])) utils.plot_scores(iterations, scores, exp_params["adapt_function"], plot_tuple, not args.nooutput, out_dir=args.outdir) # read data # ========= print("reading data...") hapt_data = data.HAPT() hapt_data.load_all_data() hapt_data.aggregate_groups() test_data = hapt_data.get_test_data() test_labs = hapt_data.get_test_labels() train_data = hapt_data.get_train_data() train_labs = hapt_data.get_train_labels() labs_map = hapt_data.get_labels_map() if exp_params["centers_num"] == 3: test_labs = hapt_data.get_aggregated_test_labels() train_labs = hapt_data.get_aggregated_train_labels() labs_map = hapt_data.get_aggregated_labels_map() centroids_num = len(labs_map) assert exp_params["centers_num"] == centroids_num # do clusterizations # ================== print("clustering...") labels_names = list(labs_map.values()) # train data train_clust_labs = cluster.Centroids.cluster( train_data, best_centers, dist_func=exp_params["dist_measure"]) train_clust_labs = cluster.Utils.adjust_labels(train_clust_labs, train_labs) train_silh = cluster.Evaluate.silhouette(train_data, train_clust_labs, exp_params["dist_measure"]) train_silh_normalized = (train_silh + 1) / 2 train_info_gain = cluster.Evaluate.information_gain( train_labs, train_clust_labs) mapped_train_clust_labs = [labs_map[l] for l in train_clust_labs] mapped_train_labs = [labs_map[l] for l in train_labs] train_conf_mtx = confusion_matrix(mapped_train_labs, mapped_train_clust_labs, labels=labels_names) print("train set\tsilh: {:.6}, silh normalized: {:.6}, info gain: {:.6}". format(train_silh, train_silh_normalized, train_info_gain)) # test data test_clust_labs = cluster.Centroids.cluster( test_data, best_centers, dist_func=exp_params["dist_measure"]) test_clust_labs = cluster.Utils.adjust_labels(test_clust_labs, test_labs) test_silh = cluster.Evaluate.silhouette(test_data, test_clust_labs, exp_params["dist_measure"]) test_silh_normalized = (test_silh + 1) / 2 test_info_gain = cluster.Evaluate.information_gain(test_labs, test_clust_labs) mapped_test_clust_labs = [labs_map[l] for l in test_clust_labs] mapped_test_labs = [labs_map[l] for l in test_labs] test_conf_mtx = confusion_matrix(mapped_test_labs, mapped_test_clust_labs, labels=labels_names) print("test set\tsilh: {:.6}, silh normalized: {:.6}, info gain: {:.6}". format(test_silh, test_silh_normalized, test_info_gain)) # Show data # ========= print("creating plots...") # clusters utils.plot_clusters(train_data, train_labs, labs_map, True, out_dir=args.outdir, filename="train_orig_clusters") utils.plot_clusters(train_data, train_clust_labs, labs_map, True, out_dir=args.outdir, filename="train_obtained_clusters") utils.plot_clusters(test_data, test_labs, labs_map, True, out_dir=args.outdir, filename="test_orig_clusters") utils.plot_clusters(test_data, test_clust_labs, labs_map, True, out_dir=args.outdir, filename="test_obtained_clusters") # confusion matrices utils.plot_confusion_matrix( train_conf_mtx, labels_names, normalize=False, title= 'Confusion matrix\ntrain set\n(silh: {:.6}, silh normalized: {:.6}, info gain: {:.6})' .format(train_silh, train_silh_normalized, train_info_gain), cmap=plt.cm.Blues, out_dir=args.outdir, filename="train_conf_matr_silh_info_gain") utils.plot_confusion_matrix( test_conf_mtx, labels_names, normalize=False, title= 'Confusion matrix\ntest set\n(silh: {:.6}, silh normalized: {:.6}, info gain: {:.6})' .format(test_silh, test_silh_normalized, test_info_gain), cmap=plt.cm.Blues, out_dir=args.outdir, filename="test_conf_matr_silh_info_gain") print("inference ended")
states = next_states episode_reward += rewards if np.any(dones): break agent1_reward.append(episode_reward[0]) agent2_reward.append(episode_reward[1]) if i_episode % print_every == 0: avg_rewards = [np.mean(agent1_reward[-100:]), np.mean(agent2_reward[-100:])] print("\rEpisode {} - \tAverage Score: {:.2f} {:.2f}".format(i_episode, avg_rewards[0], avg_rewards[1]), end="") torch.save(agent1.actor_local.state_dict(), 'agent1_actor_checkpoint.pth') torch.save(agent1.critic_local.state_dict(), 'agent1_critic_checkpoint.pth') torch.save(agent2.actor_local.state_dict(), 'agent2_actor_checkpoint.pth') torch.save(agent2.critic_local.state_dict(), 'agent2_critic_checkpoint.pth') return {'agent1_scores': agent1_reward, 'agent2_scores': agent2_reward} scores = ddpg() env.close() plot_scores(scores['agent1_scores']) plot_scores(scores['agent2_scores']) max_scores = [max(scores['agent1_scores'][i], scores['agent2_scores'][i]) for i in range(len(scores['agent1_scores']))]
scores, best_model, best_score = tune_learning_rate( X_train, Y_train, X_test, Y_test, best_model, best_score, activation) scores_3, best_model, best_score = tune_model(X_train, Y_train, X_test, Y_test, best_model, best_score, activation) fig, ax = plt.subplots(2, 5, figsize=(30, 10)) fig.tight_layout(pad=5.0) fig.subplots_adjust(left=0.062, right=0.97, bottom=0.148, top=0.88, wspace=0.34, hspace=0.383) plot_scores(scores, mapper, ax) plot_scores_3(scores_3, mapper_3, ax) print('\n\ #########################################\n\ ## ##\n\ ## BEST MODEL FOUND ##\n\ ## (HYPER PARAMETER TUNING) ##\n\ ## ##\n\ #########################################\n\ \n') pprint(best_model.get_params()) print('\nTrain Accuracy:\t{:0.3f}'.format(clf.score(X_train, Y_train))) print('\nTest Accuracy:\t{:0.3f}\n\n'.format(clf.score(X_test, Y_test))) print("Time elapsed = {} s\n".format(time.time() - start))
def save_checkpoint(agent, scores_episodes, scores_window): utils.plot_scores(args.checkpoint_prefix + "_reward_history_plot.png", scores_episodes) utils.plot_scores(args.checkpoint_prefix + "_reward_plot.png", scores_window) agent.save_checkpoint()
agent.actions = agent.act(add_noise=True) agent.rewards, agent.next_states, agent.dones = env.step( agent.actions) agent.step() agent.states = agent.next_states scores.append(agent.scores.mean()) scores_window.append(agent.scores.mean()) if ep % print_every == 0: print('Episode %d, avg score: %.2f' % (ep, agent.scores.mean())) if np.mean(scores_window) >= 30: print( '\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}' .format(ep - 100, np.mean(scores_window))) torch.save(agent.actor.state_dict(), 'checkpoints/reacher_%s_actor_checkpoint.pth' % model) torch.save(agent.critic.state_dict(), 'checkpoints/reacher_%s_critic_checkpoint.pth' % model) env.close() return scores, agent if __name__ == '__main__': model_name = 'DDPG' scores, agent = train_agent(300, model_name) plot_scores(scores, model_name)
def by_window(config): result = apply_algorithm(by_window_func, config) plot_scores(result, config)
# Let's explore the enviroment with random acitions #run_gym(env) from agent import DQNAgent state_size = env.observation_space.shape[0] action_size = env.action_space.n # Instantiate agent agent = DQNAgent( state_size=state_size, action_size=action_size, # use_double=True, # use_dueling=True, # use_priority=True, use_noise=True, seed=42) agent.summary() # Let's watch an untrained agent #run_gym(env, get_action=lambda state: agent.act(state)) scores = train_agent(agent, env) plot_scores(scores, 'NoisyNets Deep Q-Network', polyfit_deg=6) #agent.load_weights('prioritized_local_weights.pth') run_gym(env, get_action=lambda state: agent.act(state), max_t=1000)