def test_C(C, E): X, Y = xor_data_generate(1000) X = X.cuda() Y = Y.cuda() T = C(E(X)) for i in range(1000): print("Y_i:", Y[i]) print("T_i:", T[i]) criterion = nn.NLLLoss() loss = criterion(T, Y.view(-1)) print("validation loss of C:", loss.detach().cpu().numpy())
def main2(): torch.manual_seed(233) torch.cuda.set_device(0) args = get_args() config = Config(state_dim=args.hidden, input_dim=args.input_dim, hidden=args.hidden, output_dim=args.num_classes, epsilon=args.epsilon) checkpoint = torch.load("cog396test_main_episode_280.tr") C = models.SimpleNNClassifier(config) E = models.Shared_Encoder(config) C.load_state_dict(checkpoint['C_state_dict']) E.load_state_dict(checkpoint['E_state_dict']) C.cuda() E.cuda() X_eval, Y_eval = xor_data_generate(int(1e3)) X_eval = X_eval.cuda() Y_eval = Y_eval.cuda() class_list = [] x1_list = [] x2_list = [] colors = ['red', 'green'] for i in range(int(1e3)): t = C(E(X_eval[i])) print("t:", t) if t[0][0] > t[0][1]: predict_label = 0 class_list.append(0) else: predict_label = 1 class_list.append(1) print("prediction:", predict_label) print("real label:", Y_eval[i]) x1 = float(X_eval[i][0].cpu()) x2 = float(X_eval[i][1].cpu()) # print("x1:", x1) # print("x2:", x2) x1_list.append(x1) x2_list.append(x2) # fig = plt.figure(figsize=(8, 8)) plt.scatter(x1_list, x2_list, c=class_list, cmap=matplotlib.colors.ListedColormap(colors)) plt.savefig("train_c_280.png")
def main3(): X_eval, Y_eval = xor_data_generate(1000) afn_list = [] x1_list = [] x2_list = [] for i in range(1000): x1 = float(X_eval[i][0]) x2 = float(X_eval[i][1]) y = Y_eval[i] afn = np.random.uniform(0, 0.1) if y == 0 and (x2 < x1 + 0.3): d = float(abs((x2 - x1 + 0.3) / math.sqrt(2))) afn += np.random.normal(1 - d, 1) x1_list.append(x1) x2_list.append(x2) afn_list.append(afn) plt.scatter(x1_list, x2_list, c=afn_list, cmap='Blues') plt.savefig("affn.png")
def Q_eval_vis(): torch.manual_seed(233) torch.cuda.set_device(0) args = get_args() config = Config(state_dim=args.hidden, input_dim=args.input_dim, hidden=args.hidden, output_dim=args.num_classes, epsilon=args.epsilon) checkpoint = torch.load("cog396test_main_episode_280.tr") E = models.Shared_Encoder(config) E.load_state_dict(checkpoint['E_state_dict']) X_eval, Y_eval = xor_data_generate(int(1e3)) X_eval = X_eval.cuda() Y_eval = Y_eval.cuda() Q = models.Simple_Q_Net(config) Q.load_state_dict(checkpoint['Q_state_dict']) E.cuda() Q.cuda() x1_list = [] x2_list = [] affs = [] for i in range(1000): x_i = X_eval[i] s_i = E(x_i) q0, q1 = Q(s_i) # q0: torch. affs.append(q1 - q0) x1 = float(X_eval[i][0].cpu()) x2 = float(X_eval[i][1].cpu()) x1_list.append(x1) x2_list.append(x2) plt.scatter(x1_list, x2_list, c=affs, cmap='Blues') plt.savefig("policy_eval_280.png")
def main(): torch.manual_seed(233) torch.cuda.set_device(0) args = get_args() print("generating config") config = Config( state_dim=args.hidden, input_dim=args.input_dim, hidden=args.hidden, output_dim=args.num_classes, epsilon=args.epsilon ) gamma = args.gamma reward_amplify = args.reward_amplify passive_drive = args.passive_drive memory = models.Memory(args.capacity) m = args.batch_size print("initializing networks") E = models.Shared_Encoder(config) Q = models.Simple_Q_Net(config) # 2-dim x-or problem Q_t = models.Simple_Q_Net(config) Q_t.load_state_dict(Q.state_dict()) # let Q and Q_t be identical initially C = models.SimpleNNClassifier(config) episode_length = args.episode_length episode_number = args.episode_number print("initializing optimizers") optimizer_E = torch.optim.Adam(E.parameters(), lr=args.lr, betas=(0., 0.999)) optimizer_C = torch.optim.Adam(C.parameters(), lr=args.lr, betas=(0., 0.999)) optimizer_Q = torch.optim.Adam(Q.parameters(), lr=args.lr, betas=(0., 0.999)) # enable gpu E.cuda() C.cuda() Q.cuda() Q_t.cuda() #test_C(C, E) loss_last = Variable(torch.tensor([0.])).cuda() X_eval, Y_eval = xor_data_generate(args.eval_set_size) X_eval = X_eval.cuda() Y_eval = Y_eval.cuda() for i in range(episode_number): #X_eval, Y_eval = xor_data_generate(args.eval_set_size) #X_eval = X_eval.cuda() #Y_eval = Y_eval.cuda() X, Y = xor_data_generate(m) X = X.cuda() Y = Y.cuda() for t in range(episode_length): try: X, Y, loss_last, reward = train_step(E=E, C=C, Q=Q, Q_t=Q_t, X=X, Y=Y, eval_X=X_eval, eval_Y=Y_eval, gamma=gamma, loss_last=loss_last, memory=memory, optimizer_C=optimizer_C, optimizer_E=optimizer_E, optimizer_Q=optimizer_Q, reward_amplify=reward_amplify, passive_drive=passive_drive) print("Episode %i step %i, loss=%f, reward=%f" % ( i, t, loss_last.detach().cpu().numpy(), reward.detach().cpu().numpy())) except Exception as e: print("Cannot train the model on this step, error:", e) Q_t = Q if i % 20 == 0: test_C(C, E) state = { 'E_state_dict': E.state_dict(), 'E_optimizer': optimizer_E.state_dict(), 'C_state_dict': C.state_dict(), 'C_optimizer': optimizer_C.state_dict(), 'Q_state_dict': Q.state_dict(), 'Q_optimizer': optimizer_Q.state_dict(), } model_name = "cog396test_main_episode_" + str(i) + ".tr" torch.save(state, model_name)
def simple_train_C(): torch.manual_seed(233) torch.cuda.set_device(0) args = get_args() print("generating config") config = Config(state_dim=args.hidden, input_dim=args.input_dim, hidden=args.hidden, output_dim=args.num_classes, epsilon=args.epsilon) gamma = args.gamma memory = models.Memory(args.capacity) m = args.batch_size print("initializing networks") E = models.Shared_Encoder(config) C = models.SimpleNNClassifier(config) optimizer_E = torch.optim.Adam(E.parameters(), lr=args.lr, betas=(0., 0.999)) optimizer_C = torch.optim.Adam(C.parameters(), lr=args.lr, betas=(0., 0.999)) E.cuda() C.cuda() X, Y = xor_data_generate(30000) X = X.cuda() Y = Y.cuda() for i in range(30000): x = X[i] y = Y[i] t = C(E(x)) criterion = nn.NLLLoss() loss = criterion(t, y.view(-1)) loss.backward() optimizer_E.step() optimizer_C.step() if i % 1000 == 0: print("loss of step %i: %f" % (i, loss.detach().cpu().numpy())) X_eval, Y_eval = xor_data_generate(int(1e3)) X_eval = X_eval.cuda() Y_eval = Y_eval.cuda() class_list = [] x1_list = [] x2_list = [] colors = ['red', 'green'] for i in range(int(1e3)): t = C(E(X_eval[i])) print("t:", t) if t[0][0] > t[0][1]: predict_label = 0 class_list.append(0) else: predict_label = 1 class_list.append(1) print("prediction:", predict_label) print("real label:", Y_eval[i]) x1 = float(X_eval[i][0].cpu()) x2 = float(X_eval[i][1].cpu()) #print("x1:", x1) #print("x2:", x2) x1_list.append(x1) x2_list.append(x2) #fig = plt.figure(figsize=(8, 8)) plt.scatter(x1_list, x2_list, c=class_list, cmap=matplotlib.colors.ListedColormap(colors)) plt.savefig("test_c.png")
def train_step(E, Q, Q_t, memory, X, Y, C, optimizer_C, optimizer_E, optimizer_Q, eval_X, eval_Y, loss_last, gamma, reward_amplify, passive_drive): """ train process for each step, update Q-network and classification network C (and encoder E) :param E: :param Q: :param Q_t: :param memory: :param S: :param Y: :param C: :param optimizer_C: :param optimizer_Q: :param train_X: :param train_Y: :param eval_X: :param eval_Y: :param sample_size: :param loss_last: :param gamma: :param reward_amplify: :param passive_drive: :return: """ # epsilon-greedy policy S = E(X) m = S.shape[0] action = [] while len(action) == 0: rand1 = np.random.rand() if rand1 < Q.epsilon: # with probability epsilon, randomly select which data points are used (1/2-1/2 probability) n, p = 1, 0.5 br = np.random.binomial(n, p, m) action = [i for i in range(m) if br[i] == 1] else: # with probability (1-epsilon), determine which data points are selected by computing their Q-values V = Q(S) # m pairs of (Q(s, a0), Q(s,a1)) state-action values action = choose_action(V) # execute action (use selected data to train classifier C) S_sampled = S[action] Y_sampled = Y[action].view(-1) #print("length of S_sampled:", S_sampled.shape[0]) T = C(S_sampled) criterion_C = nn.NLLLoss() #print("shape of T:", T.shape) #print("shape of Y_sampled:", Y_sampled.shape) C_loss = criterion_C(T, Y_sampled) C_loss.backward(retain_graph=True) optimizer_C.step() optimizer_E.step() # get the step reward reward, loss = reward_computing(E, C, eval_X, eval_Y, loss_last, reward_amplify, passive_drive=passive_drive) # sample from training set to get the next batch of data, sampled_X, sampled_Y = xor_data_generate(m) sampled_X = sampled_X.cuda() sampled_Y = sampled_Y.cuda() # encode it to obtain s_(t+1) S_new = E(sampled_X) # store the m transition tuples into memory, using average reward reshaping: action = to_one_hot(action, m) for i in range(m): transition = [S[i], action[i], reward/m, S_new[i]] memory.add_transition(transition) # sample a random transition tuple from memory sampled_transition = memory.sampling() # perform temporal difference learning, compute y_j s_j = sampled_transition[0] a_j = sampled_transition[1] # either 0 or 1 r_j = sampled_transition[2] s_jp1 = sampled_transition[3] q0, q1 = Q_t(s_jp1) if q0.data > q1.data: y_j = r_j + gamma * q0 # y_j: torch.Variable else: y_j = r_j + gamma * q1 q0, q1 = Q(s_j) criterion_Q = nn.MSELoss() y_j = y_j.detach() if a_j == 0: Q_loss = criterion_Q(q0, y_j) else: Q_loss = criterion_Q(q1, y_j) Q_loss.backward(retain_graph=True) optimizer_Q.step() return sampled_X, sampled_Y, loss, reward