예제 #1
0
    epoch_loss = 0.0
    for iteration in range(total_num // batch_size):
        seed = np.random.choice(total_num, batch_size)
        x_batch = x_train[seed]
        y_batch = y_train[seed]

        loss = model.loss(x_batch, y_batch)
        epoch_loss += loss

        model.zero_grad()
        model.backward()
        model.update()

    epoch_loss /= batch_size

    pred = model.predict(x_train)
    target = np.argmax(y_train, axis=1)
    score = len(np.where(pred == target)[0]) / len(target)

    val_pred = model.predict(x_val)
    val_target = np.argmax(y_val, axis=1)
    val_score = len(np.where(val_pred == val_target)[0]) / len(val_target)

    acc_stack.append(score)
    val_acc_stack.append(val_score)
    loss_stack.append(epoch_loss)

    if (epoch % verbose) == 0:
        spend = round((time.time() - st), 3)
        print(
            f"epoch : {epoch} | loss : {epoch_loss} | train_score : {score} | validation score : {val_score} | time : {spend} seconds"
예제 #2
0
def main(args):

    # environment initialization
    env = BaseEnv(size=args.env_size)

    # embedding network initialization
    f_s_a = MLP(env.state_size + env.action_size, args.s_a_hidden_size,
                args.embedding_dim)
    f_s = MLP(env.state_size, args.s_hidden_size, args.embedding_dim)

    # buffer initialization
    replay_buffer_1 = ReplayBuffer(args, env)
    replay_buffer_2 = ReplayBuffer(args, env)
    goal_buffer = GoalBuffer()
    init_goal = tuple(np.random.randint(1, args.env_size[0] + 1, size=2))
    goal_buffer.store(init_goal)

    # agent initialization
    agent = Agent(env_size=args.env_size)

    # optimizer initialization
    s_a_optimizer = torch.optim.Adam(f_s_a.parameters(), lr=args.s_a_lr)
    s_optimizer = torch.optim.Adam(f_s.parameters(), lr=args.s_a_lr)

    log_loss = []

    for epoch in tqdm.tqdm(range(args.epoch_num)):
        start_position = np.random.randint(1, args.env_size[0] + 1, size=2)
        goal = goal_buffer.sample_batch_goal(size=1)[0]
        print("goal point is :{}".format(goal))
        g_feature = agent.get_state_feature(goal)
        ns, r, terminate = env.reset(size=args.env_size,
                                     start_pos=start_position)
        for step in range(args.max_step):
            s = ns
            s_feature = agent.get_state_feature(s)
            action, min_dist = agent.get_best_action(s_feature, f_s_a, f_s,
                                                     g_feature)
            ns, r, terminate = env.step(action)
            goal_buffer.store(ns)
            ns_feature = agent.get_state_feature(ns)
            vec_action = vectorize_action(action)
            # print(s_feature.shape, vec_action.shape)
            # store one step loss
            # s_a_pred = f_s_a.predict(np.concatenate((s_feature, vec_action), axis=0).reshape((1, -1)))
            # ns_pred = f_s.predict(np.array(ns_feature).reshape((1, -1)))
            e_1 = agent.get_dist(s_feature, vec_action, ns_feature, f_s_a,
                                 f_s)[0]
            replay_buffer_1.add([s_feature, vec_action, ns_feature, None, e_1])

            # store two step loss
            sub_g = goal_buffer.sample_batch_goal(size=1, with_weights=False)
            sub_g_feature = agent.get_states_feature(sub_g)[0]
            na, min_dist = agent.get_best_action(ns_feature, f_s_a, f_s,
                                                 sub_g_feature)

            dist_ns = agent.get_dist(ns_feature, vectorize_action(na),
                                     sub_g_feature, f_s_a, f_s)
            target = dist_ns + 1
            dist_s = agent.get_dist(s_feature, vec_action, sub_g_feature,
                                    f_s_a, f_s)
            e_2 = abs(dist_s - target)
            replay_buffer_2.add(
                [s_feature, vec_action, ns_feature, sub_g_feature, e_2])
            if terminate:
                break

        for step in range(args.random_step):
            s = ns
            s_feature = agent.get_state_feature(s)
            action = agent.get_random_action()
            ns, r, terminate = env.step(action)
            goal_buffer.store(ns)
            ns_feature = agent.get_state_feature(ns)
            vec_action = vectorize_action(action)
            # print(s_feature.shape, vec_action.shape)
            # store one step loss
            # s_a_pred = f_s_a.predict(np.concatenate((s_feature, vec_action), axis=0).reshape((1, -1)))
            # ns_pred = f_s.predict(np.array(ns_feature).reshape((1, -1)))
            e_1 = agent.get_dist(s_feature, vec_action, ns_feature, f_s_a,
                                 f_s)[0]
            replay_buffer_1.add([s_feature, vec_action, ns_feature, None, e_1])

            # store two step loss
            sub_g = goal_buffer.sample_batch_goal(size=1, with_weights=False)
            sub_g_feature = agent.get_states_feature(sub_g)[0]
            na, min_dist = agent.get_best_action(ns_feature, f_s_a, f_s,
                                                 sub_g_feature)

            dist_ns = agent.get_dist(ns_feature, vectorize_action(na),
                                     sub_g_feature, f_s_a, f_s)
            target = dist_ns + 1
            dist_s = agent.get_dist(s_feature, vec_action, sub_g_feature,
                                    f_s_a, f_s)
            e_2 = abs(dist_s - target)
            replay_buffer_2.add(
                [s_feature, vec_action, ns_feature, sub_g_feature, e_2])

        batch_1, _, index_1 = replay_buffer_1.get_batch_data()

        _, batch_2, index_2 = replay_buffer_2.get_batch_data()

        loss_1 = torch.mean(
            torch.norm((f_s_a(batch_1['sa']) - f_s(batch_1['ns'])), dim=1))

        na = agent.get_best_actions(batch_2['ns'], f_s_a, f_s, batch_2['g'])
        target = agent.get_dist(batch_2['ns'], na, batch_2['g'], f_s_a,
                                f_s) + 1
        pred = torch.norm((f_s_a(batch_2['sa']) - f_s(batch_2['g'])), dim=1)

        if (epoch + 1) % 100 == 0:
            print(pred - target)
            print(len(replay_buffer_1), len(replay_buffer_2))
            # goal_buffer.goal_visualize()

        loss_2 = torch.mean(torch.abs(pred - target))

        # if epoch >= 1500:
        #     args.reg_term = 0.5

        loss = (1 - args.reg_term) * loss_1 + args.reg_term * loss_2
        log_loss.append(loss)
        s_a_optimizer.zero_grad()
        s_optimizer.zero_grad()
        loss.backward()
        # nn.utils.clip_grad_norm_(f_s.parameters(), args.grad_clip)
        # nn.utils.clip_grad_norm_(f_s_a.parameters(), args.grad_clip)
        s_a_optimizer.step()
        s_optimizer.step()

        # Update replay buffer
        with torch.no_grad():
            e_update_1 = torch.norm(torch.FloatTensor(
                f_s_a.predict(batch_1['sa']) - f_s.predict(batch_1['ns'])),
                                    dim=1)

            na = agent.get_best_actions(batch_2['ns'], f_s_a, f_s,
                                        batch_2['g'])
            target = agent.get_dist(batch_2['ns'], na, batch_2['g'], f_s_a,
                                    f_s) + 1
            pred = torch.norm(torch.FloatTensor(
                f_s_a.predict(batch_2['sa']) - f_s.predict(batch_2['g'])),
                              dim=1)

            e_update_2 = torch.abs(pred - target)

            replay_buffer_1.update_error(index_1, e_update_1)
            replay_buffer_2.update_error(index_2, e_update_2)

        if (epoch + 1) % 100 == 0:
            print(
                "epoch number: {}/{}, total_loss: {}, loss_normal: {}, loss_update: {}"
                .format(epoch + 1, args.epoch_num, loss, loss_1, loss_2))
        # if epoch == args.epoch_num - 1:
        #     print(s_a_embed, s_embed)

        if (epoch + 1) % 1000 == 0 and loss < 1:
            save_model("./model.pt", f_s_a, f_s, args.epoch_num, loss,
                       s_a_optimizer, s_optimizer)
            print("Saving model at epoch: {}".format(epoch))

        if (epoch + 1) % 10000 == 0:
            plt.plot(range(epoch + 1), log_loss, color='red')
            plt.savefig("./{}.pdf".format(int((epoch + 1) / 10000)),
                        dpi=1200,
                        bbox_inches='tight')