Exemple #1
0
            total += 1

        loss = loss / total
        display_data["loss"] = loss
        history["loss"].append(loss)

        total = 0
        correct = 0
        for x_v_batch, y_v_batch in itertools.islice(generate_batch(x_valid), None, len(x_valid) // config.batch_size):
            prediction = model.classify(x_v_batch)
            for j in range(len(prediction)):
                total += 1
                if prediction[j] == y_v_batch[j]:
                    correct += 1

        acc = correct / total
        display_data["eval_acc"] = acc
        history["acc"].append(acc)
        bar.set_postfix(display_data)

        model.save_weights(weights_dir, i)

    model.save(model_dir)
    utils.save_class_ids(activity_dict, os.path.join(model_dir, ACTIVITY_ID_NAME_MAPPING_FILENAME))

    index = np.argmax(history['acc'])
    comment = "Max accuracy {} after {} epoch".format(history['acc'][index], index)
    print(comment)

    plot_utils.create_plot(history['acc'], history['loss'], os.path.join(model_dir, "results.png"), True, comment)
gamma = 0.98

#
alpha = 0.1
nr_episodes = 2_000
epsilon = 0.4

# 1.2 policy $\pi$
# initializing policy to a random policy
# initializing Q to zero

pi = return_a_random_policy(n, nr_actions)
Q = np.zeros((n, n, nr_actions))

# 1.6 setting up the plot
ax = create_plot(n)
plt.ion()
interactive(True)
plt.cla()
ax.axis('off')

for episode_id in tqdm(range(nr_episodes)):
    terminated = False
    state = initialize_the_state(n)
    while not terminated:
        pi = return_epsilon_greedy_pi(Q, epsilon)
        action_id = choose_an_action_based_on_pi(state, pi)
        new_state, reward, terminated = step(state, action_id, n)
        learn_Q(state, action_id, reward, new_state, Q, gamma, alpha)
        state = new_state + 0.0
Exemple #3
0
np.random.seed(0)

# 1.2 grid size along each direction
N = 8

env = Maze(N=N, wall_length=2)

# 1.2 policy $\pi$
# initializing policy to a random policy
# initializing V to zero
pi = return_a_random_policy(N, env.action_space.n, epsilon=1000000)

V_accumulate = np.zeros((N, N))

# 1.6 setting up the plot
ax = create_plot(N)
plt.ion()
interactive(True)
plt.cla()
ax.axis('off')

nr_episodes = 1_000
gamma = 0.98

all_states = dstack_product(np.arange(N), np.arange(N))

for episode_id in tqdm(range(nr_episodes)):
    # a sweep over all the states in the system.
    for counter, init_state in enumerate(all_states):
        terminated = False
        env.reset(init_state)
Exemple #4
0
def plot():
    graphs= create_plot()
    return render_template('plot.html', plot0=graphs[0], plot1=graphs[1],plot2=graphs[2],plot3=graphs[3],plot4=graphs[4] )