total += 1 loss = loss / total display_data["loss"] = loss history["loss"].append(loss) total = 0 correct = 0 for x_v_batch, y_v_batch in itertools.islice(generate_batch(x_valid), None, len(x_valid) // config.batch_size): prediction = model.classify(x_v_batch) for j in range(len(prediction)): total += 1 if prediction[j] == y_v_batch[j]: correct += 1 acc = correct / total display_data["eval_acc"] = acc history["acc"].append(acc) bar.set_postfix(display_data) model.save_weights(weights_dir, i) model.save(model_dir) utils.save_class_ids(activity_dict, os.path.join(model_dir, ACTIVITY_ID_NAME_MAPPING_FILENAME)) index = np.argmax(history['acc']) comment = "Max accuracy {} after {} epoch".format(history['acc'][index], index) print(comment) plot_utils.create_plot(history['acc'], history['loss'], os.path.join(model_dir, "results.png"), True, comment)
gamma = 0.98 # alpha = 0.1 nr_episodes = 2_000 epsilon = 0.4 # 1.2 policy $\pi$ # initializing policy to a random policy # initializing Q to zero pi = return_a_random_policy(n, nr_actions) Q = np.zeros((n, n, nr_actions)) # 1.6 setting up the plot ax = create_plot(n) plt.ion() interactive(True) plt.cla() ax.axis('off') for episode_id in tqdm(range(nr_episodes)): terminated = False state = initialize_the_state(n) while not terminated: pi = return_epsilon_greedy_pi(Q, epsilon) action_id = choose_an_action_based_on_pi(state, pi) new_state, reward, terminated = step(state, action_id, n) learn_Q(state, action_id, reward, new_state, Q, gamma, alpha) state = new_state + 0.0
np.random.seed(0) # 1.2 grid size along each direction N = 8 env = Maze(N=N, wall_length=2) # 1.2 policy $\pi$ # initializing policy to a random policy # initializing V to zero pi = return_a_random_policy(N, env.action_space.n, epsilon=1000000) V_accumulate = np.zeros((N, N)) # 1.6 setting up the plot ax = create_plot(N) plt.ion() interactive(True) plt.cla() ax.axis('off') nr_episodes = 1_000 gamma = 0.98 all_states = dstack_product(np.arange(N), np.arange(N)) for episode_id in tqdm(range(nr_episodes)): # a sweep over all the states in the system. for counter, init_state in enumerate(all_states): terminated = False env.reset(init_state)
def plot(): graphs= create_plot() return render_template('plot.html', plot0=graphs[0], plot1=graphs[1],plot2=graphs[2],plot3=graphs[3],plot4=graphs[4] )