コード例 #1
0
def test_C(C, E):
    X, Y = xor_data_generate(1000)
    X = X.cuda()
    Y = Y.cuda()
    T = C(E(X))

    for i in range(1000):
        print("Y_i:", Y[i])
        print("T_i:", T[i])
    criterion = nn.NLLLoss()
    loss = criterion(T, Y.view(-1))
    print("validation loss of C:", loss.detach().cpu().numpy())
コード例 #2
0
def main2():
    torch.manual_seed(233)
    torch.cuda.set_device(0)
    args = get_args()
    config = Config(state_dim=args.hidden,
                    input_dim=args.input_dim,
                    hidden=args.hidden,
                    output_dim=args.num_classes,
                    epsilon=args.epsilon)
    checkpoint = torch.load("cog396test_main_episode_280.tr")
    C = models.SimpleNNClassifier(config)
    E = models.Shared_Encoder(config)
    C.load_state_dict(checkpoint['C_state_dict'])
    E.load_state_dict(checkpoint['E_state_dict'])
    C.cuda()
    E.cuda()

    X_eval, Y_eval = xor_data_generate(int(1e3))
    X_eval = X_eval.cuda()
    Y_eval = Y_eval.cuda()

    class_list = []
    x1_list = []
    x2_list = []
    colors = ['red', 'green']
    for i in range(int(1e3)):
        t = C(E(X_eval[i]))
        print("t:", t)
        if t[0][0] > t[0][1]:
            predict_label = 0
            class_list.append(0)

        else:
            predict_label = 1
            class_list.append(1)
        print("prediction:", predict_label)
        print("real label:", Y_eval[i])
        x1 = float(X_eval[i][0].cpu())
        x2 = float(X_eval[i][1].cpu())
        # print("x1:", x1)
        # print("x2:", x2)
        x1_list.append(x1)
        x2_list.append(x2)

    # fig = plt.figure(figsize=(8, 8))
    plt.scatter(x1_list,
                x2_list,
                c=class_list,
                cmap=matplotlib.colors.ListedColormap(colors))
    plt.savefig("train_c_280.png")
コード例 #3
0
def main3():
    X_eval, Y_eval = xor_data_generate(1000)
    afn_list = []
    x1_list = []
    x2_list = []
    for i in range(1000):
        x1 = float(X_eval[i][0])
        x2 = float(X_eval[i][1])
        y = Y_eval[i]
        afn = np.random.uniform(0, 0.1)
        if y == 0 and (x2 < x1 + 0.3):
            d = float(abs((x2 - x1 + 0.3) / math.sqrt(2)))
            afn += np.random.normal(1 - d, 1)
        x1_list.append(x1)
        x2_list.append(x2)
        afn_list.append(afn)

    plt.scatter(x1_list, x2_list, c=afn_list, cmap='Blues')
    plt.savefig("affn.png")
コード例 #4
0
def Q_eval_vis():
    torch.manual_seed(233)
    torch.cuda.set_device(0)
    args = get_args()
    config = Config(state_dim=args.hidden,
                    input_dim=args.input_dim,
                    hidden=args.hidden,
                    output_dim=args.num_classes,
                    epsilon=args.epsilon)
    checkpoint = torch.load("cog396test_main_episode_280.tr")
    E = models.Shared_Encoder(config)
    E.load_state_dict(checkpoint['E_state_dict'])

    X_eval, Y_eval = xor_data_generate(int(1e3))
    X_eval = X_eval.cuda()
    Y_eval = Y_eval.cuda()

    Q = models.Simple_Q_Net(config)
    Q.load_state_dict(checkpoint['Q_state_dict'])

    E.cuda()
    Q.cuda()

    x1_list = []
    x2_list = []
    affs = []
    for i in range(1000):
        x_i = X_eval[i]
        s_i = E(x_i)
        q0, q1 = Q(s_i)  # q0: torch.
        affs.append(q1 - q0)

        x1 = float(X_eval[i][0].cpu())
        x2 = float(X_eval[i][1].cpu())
        x1_list.append(x1)
        x2_list.append(x2)

    plt.scatter(x1_list, x2_list, c=affs, cmap='Blues')
    plt.savefig("policy_eval_280.png")
コード例 #5
0
def main():
    torch.manual_seed(233)
    torch.cuda.set_device(0)
    args = get_args()
    print("generating config")
    config = Config(
        state_dim=args.hidden,
        input_dim=args.input_dim,
        hidden=args.hidden,
        output_dim=args.num_classes,
        epsilon=args.epsilon
    )
    gamma = args.gamma
    reward_amplify = args.reward_amplify
    passive_drive = args.passive_drive
    memory = models.Memory(args.capacity)
    m = args.batch_size
    print("initializing networks")
    E = models.Shared_Encoder(config)
    Q = models.Simple_Q_Net(config)    # 2-dim x-or problem
    Q_t = models.Simple_Q_Net(config)
    Q_t.load_state_dict(Q.state_dict())     # let Q and Q_t be identical initially

    C = models.SimpleNNClassifier(config)

    episode_length = args.episode_length
    episode_number = args.episode_number

    print("initializing optimizers")
    optimizer_E = torch.optim.Adam(E.parameters(), lr=args.lr, betas=(0., 0.999))
    optimizer_C = torch.optim.Adam(C.parameters(), lr=args.lr, betas=(0., 0.999))
    optimizer_Q = torch.optim.Adam(Q.parameters(), lr=args.lr, betas=(0., 0.999))

    # enable gpu
    E.cuda()
    C.cuda()
    Q.cuda()
    Q_t.cuda()

    #test_C(C, E)
    loss_last = Variable(torch.tensor([0.])).cuda()
    X_eval, Y_eval = xor_data_generate(args.eval_set_size)
    X_eval = X_eval.cuda()
    Y_eval = Y_eval.cuda()
    for i in range(episode_number):
        #X_eval, Y_eval = xor_data_generate(args.eval_set_size)
        #X_eval = X_eval.cuda()
        #Y_eval = Y_eval.cuda()
        X, Y = xor_data_generate(m)
        X = X.cuda()
        Y = Y.cuda()
        for t in range(episode_length):
            try:
                X, Y, loss_last, reward = train_step(E=E,
                                                     C=C,
                                                     Q=Q,
                                                     Q_t=Q_t,
                                                     X=X,
                                                     Y=Y,
                                                     eval_X=X_eval,
                                                     eval_Y=Y_eval,
                                                     gamma=gamma,
                                                     loss_last=loss_last,
                                                     memory=memory,
                                                     optimizer_C=optimizer_C,
                                                     optimizer_E=optimizer_E,
                                                     optimizer_Q=optimizer_Q,
                                                     reward_amplify=reward_amplify,
                                                     passive_drive=passive_drive)
                print("Episode %i step %i, loss=%f, reward=%f" % (
                    i, t, loss_last.detach().cpu().numpy(), reward.detach().cpu().numpy()))
            except Exception as e:
                print("Cannot train the model on this step, error:", e)

        Q_t = Q
        if i % 20 == 0:
            test_C(C, E)
            state = {
                'E_state_dict': E.state_dict(),
                'E_optimizer': optimizer_E.state_dict(),
                'C_state_dict': C.state_dict(),
                'C_optimizer': optimizer_C.state_dict(),
                'Q_state_dict': Q.state_dict(),
                'Q_optimizer': optimizer_Q.state_dict(),

            }
            model_name = "cog396test_main_episode_" + str(i) + ".tr"
            torch.save(state, model_name)
コード例 #6
0
def simple_train_C():
    torch.manual_seed(233)
    torch.cuda.set_device(0)
    args = get_args()
    print("generating config")
    config = Config(state_dim=args.hidden,
                    input_dim=args.input_dim,
                    hidden=args.hidden,
                    output_dim=args.num_classes,
                    epsilon=args.epsilon)
    gamma = args.gamma
    memory = models.Memory(args.capacity)
    m = args.batch_size
    print("initializing networks")
    E = models.Shared_Encoder(config)
    C = models.SimpleNNClassifier(config)

    optimizer_E = torch.optim.Adam(E.parameters(),
                                   lr=args.lr,
                                   betas=(0., 0.999))
    optimizer_C = torch.optim.Adam(C.parameters(),
                                   lr=args.lr,
                                   betas=(0., 0.999))

    E.cuda()
    C.cuda()

    X, Y = xor_data_generate(30000)
    X = X.cuda()
    Y = Y.cuda()
    for i in range(30000):
        x = X[i]
        y = Y[i]
        t = C(E(x))

        criterion = nn.NLLLoss()
        loss = criterion(t, y.view(-1))

        loss.backward()

        optimizer_E.step()
        optimizer_C.step()

        if i % 1000 == 0:
            print("loss of step %i: %f" % (i, loss.detach().cpu().numpy()))

    X_eval, Y_eval = xor_data_generate(int(1e3))
    X_eval = X_eval.cuda()
    Y_eval = Y_eval.cuda()

    class_list = []
    x1_list = []
    x2_list = []
    colors = ['red', 'green']
    for i in range(int(1e3)):
        t = C(E(X_eval[i]))
        print("t:", t)
        if t[0][0] > t[0][1]:
            predict_label = 0
            class_list.append(0)

        else:
            predict_label = 1
            class_list.append(1)
        print("prediction:", predict_label)
        print("real label:", Y_eval[i])
        x1 = float(X_eval[i][0].cpu())
        x2 = float(X_eval[i][1].cpu())
        #print("x1:", x1)
        #print("x2:", x2)
        x1_list.append(x1)
        x2_list.append(x2)

    #fig = plt.figure(figsize=(8, 8))
    plt.scatter(x1_list,
                x2_list,
                c=class_list,
                cmap=matplotlib.colors.ListedColormap(colors))
    plt.savefig("test_c.png")
コード例 #7
0
def train_step(E, Q, Q_t, memory, X, Y, C, optimizer_C, optimizer_E, optimizer_Q, eval_X, eval_Y, loss_last, gamma,
               reward_amplify, passive_drive):
    """
    train process for each step, update Q-network and classification network C (and encoder E)
    :param E:
    :param Q:
    :param Q_t:
    :param memory:
    :param S:
    :param Y:
    :param C:
    :param optimizer_C:
    :param optimizer_Q:
    :param train_X:
    :param train_Y:
    :param eval_X:
    :param eval_Y:
    :param sample_size:
    :param loss_last:
    :param gamma:
    :param reward_amplify:
    :param passive_drive:
    :return:
    """

    # epsilon-greedy policy
    S = E(X)
    m = S.shape[0]
    action = []
    while len(action) == 0:
        rand1 = np.random.rand()
        if rand1 < Q.epsilon:
            # with probability epsilon, randomly select which data points are used (1/2-1/2 probability)
            n, p = 1, 0.5
            br = np.random.binomial(n, p, m)
            action = [i for i in range(m) if br[i] == 1]
        else:
            # with probability (1-epsilon), determine which data points are selected by computing their Q-values
            V = Q(S)    # m pairs of (Q(s, a0), Q(s,a1)) state-action values
            action = choose_action(V)

    # execute action (use selected data to train classifier C)
    S_sampled = S[action]
    Y_sampled = Y[action].view(-1)
    #print("length of S_sampled:", S_sampled.shape[0])
    T = C(S_sampled)
    criterion_C = nn.NLLLoss()
    #print("shape of T:", T.shape)
    #print("shape of Y_sampled:", Y_sampled.shape)
    C_loss = criterion_C(T, Y_sampled)
    C_loss.backward(retain_graph=True)
    optimizer_C.step()
    optimizer_E.step()

    # get the step reward
    reward, loss = reward_computing(E, C, eval_X, eval_Y, loss_last, reward_amplify, passive_drive=passive_drive)

    # sample from training set to get the next batch of data,
    sampled_X, sampled_Y = xor_data_generate(m)
    sampled_X = sampled_X.cuda()
    sampled_Y = sampled_Y.cuda()

    # encode it to obtain s_(t+1)
    S_new = E(sampled_X)

    # store the m transition tuples into memory, using average reward reshaping:
    action = to_one_hot(action, m)
    for i in range(m):
        transition = [S[i], action[i], reward/m, S_new[i]]
        memory.add_transition(transition)

    # sample a random transition tuple from memory
    sampled_transition = memory.sampling()

    # perform temporal difference learning, compute y_j
    s_j = sampled_transition[0]
    a_j = sampled_transition[1]     # either 0 or 1
    r_j = sampled_transition[2]
    s_jp1 = sampled_transition[3]

    q0, q1 = Q_t(s_jp1)
    if q0.data > q1.data:
        y_j = r_j + gamma * q0  # y_j: torch.Variable
    else:
        y_j = r_j + gamma * q1

    q0, q1 = Q(s_j)
    criterion_Q = nn.MSELoss()
    y_j = y_j.detach()
    if a_j == 0:
        Q_loss = criterion_Q(q0, y_j)
    else:
        Q_loss = criterion_Q(q1, y_j)

    Q_loss.backward(retain_graph=True)
    optimizer_Q.step()

    return sampled_X, sampled_Y, loss, reward