예제 #1
0
def plot_actions(dataset_path, qw, index, task, n_actions, save_path):

    dataset = utils.load_object(dataset_path)
    dataset = np.array(dataset)

    actions_etr = np.zeros((n_actions, 3))
    for i in range(n_actions):
        for j in range(3):
            actions_etr[i, j] = dataset[i, 1][j]

    actions_nn = np.zeros((n_actions, 3))

    q = MLPQFunction(task.state_dim,
                     task.action_space.n,
                     layers=layers,
                     initial_params=qw)

    task.starting_day_index = 0
    task.reset()

    actions_counter = 0

    for di in range(task.n_days):

        task.starting_day_index = di
        s = task.reset()

        done = False
        while not done:
            a_list = q.value_actions(s)
            actions_nn[actions_counter, :] = a_list
            a = np.argmax(a_list)
            s, r, done, _ = task.step([a])

            done = done[0]

            actions_counter += 1
            if actions_counter >= n_actions:
                break

            percentage = actions_counter * 100 / n_actions
            if percentage % 10 == 0:
                print("Actions evaluation: {0:3d}%".format(int(percentage)))

        if actions_counter >= n_actions:
            break

    fig, ax = plt.subplots(3, sharex=True, figsize=(16, 9))

    for i in range(3):
        ax[i].plot(actions_etr[:10000, i], label="ETR")
        ax[i].plot(actions_nn[:10000, i], label="NN")
        ax[i].set_title("Action " + str(i - 1))
        ax[i].legend()

    plt.savefig(save_path + '.pdf', format='pdf')
예제 #2
0
def make_Q(weights, task):

    # task params
    state_dim = task.state_dim
    action_dim = 1
    n_actions = task.action_space.n

    return MLPQFunction(state_dim,
                        n_actions,
                        layers=layers,
                        initial_params=weights)
예제 #3
0
tasks_data = utils.load_object(tasks_file)

n_eval_episodes = 5

state_dim = temp_mdp.observation_space.shape[0]
action_dim = 1
n_actions = temp_mdp.N_DISCRETE_ACTIONS

# Create BellmanOperator
operator = MellowBellmanOperator(kappa, tau, xi, temp_mdp.gamma, state_dim,
                                 action_dim)
# Create Q Function
layers = [l1]
if l2 > 0:
    layers.append(l2)
Q = MLPQFunction(state_dim, n_actions, layers=layers, activation=activation)


def run(seed=None):
    return learn(Q,
                 operator,
                 tasks_data,
                 demand,
                 min_env_flow,
                 max_iter=max_iter,
                 buffer_size=buffer_size,
                 batch_size=batch_size,
                 alpha_adam=alpha_adam,
                 alpha_sgd=alpha_sgd,
                 lambda_=lambda_,
                 n_weights=n_weights,
예제 #4
0
def transfer(dataset_path, mdp, save_path, iterations, year, seed=0):

    np.random.seed(seed)

    data = utils.load_object(dataset_path)
    data = np.array(data)

    state_dim = mdp.state_dim
    n_actions = mdp.action_space.n
    mdp.starting_day_index = 0
    mdp.reset()
    day_length = len(mdp.prices[0])

    Q = MLPQFunction(state_dim, n_actions, layers=layers)
    Q.init_weights()

    m_t = 0
    v_t = 0
    t = 0

    utils.save_object([], save_path)

    losses = [[], [], []]

    for i in range(iterations):

        # sample time of day
        time = int(np.random.uniform(low=0, high=day_length))
        datapoints = np.arange(0, len(data) - day_length, day_length)
        datapoints += time
        datapoints = data[datapoints]
        np.random.shuffle(datapoints)
        datapoints = datapoints[:batch_size]

        for a in range(n_actions):
            with torch.autograd.set_detect_anomaly(True):
                train_loss, grad = compute_gradient_single_action(
                    Q, datapoints, a)

            losses[a].append(train_loss)

            print(
                "Y: {0}, I: {1:5d}, Time: {2:4d}, A: {3:1d}, Grad: {4:8.6f}, Train Loss: {5:8.6f}"
                .format(year, i, time, a, np.linalg.norm(grad), train_loss))

            Q._w, t, m_t, v_t = utils.adam(Q._w,
                                           grad,
                                           t,
                                           m_t,
                                           v_t,
                                           alpha=alpha)

        if save_freq > 0 and i % save_freq == 0:
            past_Qs = utils.load_object(save_path)
            past_Qs.append(np.array(Q._w))
            utils.save_object(past_Qs, save_path)
            plot_actions(dataset_path, Q._w, i, mdp, n_actions_plot,
                         path + "/plot-" + year + "-" + str(i))

    print(
        "Model selected index: {0:4d}, Train Loss: [{1:8.6f}, {2:8.6f}, {3:8.6f}]"
        .format(i, losses[0][i], losses[1][i], losses[2][i]))

    return [mdp.get_info(), np.array(Q._w), losses]
예제 #5
0
            for (d1, d2) in zip(doors, doors2)
        ])
        # print([(d1,d2) for (d1,d2) in zip(doors,doors2)])

eval_states = [np.array([0., 0.]) for _ in range(10)]

state_dim = mdps[0][0].state_dim
action_dim = 1
n_actions = mdps[0][0].action_space.n
K = n_basis**2

# Create BellmanOperator
operator = MellowBellmanOperator(kappa, tau, xi, mdps[0][0].gamma, K,
                                 action_dim)
# Create Q Function
Q = MLPQFunction(K, n_actions, layers=None)
# Create RBFs
rbf = build_features_gw_state(gw_size, n_basis, state_dim)


def run(mdp, seed=None):
    return learn(mdp,
                 Q,
                 operator,
                 max_iter=max_iter,
                 buffer_size=buffer_size,
                 batch_size=batch_size,
                 alpha=alpha,
                 train_freq=train_freq,
                 eval_freq=eval_freq,
                 eps_start=eps_start,
예제 #6
0
n_eval_episodes = 5

state_dim = mdps[0].state_dim
action_dim = 1
n_actions = mdps[0].action_space.n

layers = [l1]
if l2 > 0:
    layers.append(l2)

if not dqn:
    # Create BellmanOperator
    operator = MellowBellmanOperator(kappa, tau, xi, mdps[0].gamma, state_dim,
                                     action_dim)
    # Create Q Function
    Q = MLPQFunction(state_dim, n_actions, layers=layers)
else:
    Q, operator = DQN(state_dim,
                      action_dim,
                      n_actions,
                      mdps[0].gamma,
                      layers=layers)


def run(mdp, seed=None):
    return learn(mdp,
                 Q,
                 operator,
                 max_iter=max_iter,
                 buffer_size=buffer_size,
                 batch_size=batch_size,