accu_tp = [] accu_fp = [] accu_iou = [] for epoch in range(1): for i, data in enumerate(loader, 0): # get the inputs inputs, labels = data inputs, labels = inputs.float() / 256, labels.float() # # # wrap them in Variable # inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda()) # threshold = 0.4 net.eval() predicts = net.forward(inputs) # loss, accu = net.loss_function_vec(outputs, labels, threshold, cal_accuracy=True) # # # # print (datetime.datetime.now()) # # print ('Epoch %g'%(epoch)) # print(loss.data.cpu().numpy()) # print(accu) # accu_tp.append(accu[0].data.cpu().numpy()[0]) # accu_fp.append(accu[1].data.cpu().numpy()[0]) # accu_iou.append(accu[2].data.cpu().numpy()[0]) # # plt.plot(thld, accu_tp, 'r') # plt.plot(thld, accu_fp, 'b') # plt.plot(thld, accu_iou, 'g') # plt.show()
for i, data in enumerate(loader, 0): # get the inputs inputs, labels = data inputs, labels = inputs.float()/256, labels.float() # wrap them in Variable inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda()) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize #print (inputs) net.train() outputs = net.forward(inputs) loss, _ = net.loss_function_vec(outputs, labels, 0.5) loss.backward() optimizer.step() # print statistics #running_loss += loss.data[0] if epoch % 1 == 0 and i == 0: # net.eval() # outputs = net.forward(inputs) # loss, accu = net.loss_function_vec(outputs, labels, 0.2, cal_accuracy=True) print (datetime.datetime.now()) print ('Epoch %g'%(epoch)) print(loss.data.cpu().numpy()) logger.scalar_summary('training loss', loss.data.cpu().numpy(), epoch) if epoch % 1 == 0 and i==0: torch.save(net.state_dict(), SAVE_PATH)
for _ in range(args.imagination_depth): # add noise to actions and predict t_a = ( t_a + np.random.normal(0, max_action * args.expl_noise / 10, (t_a.shape[0], t_a.shape[1]))).clip( -max_action, max_action) fwd_input = np.hstack((t_s, t_a)) fwd_input = apply_norm( fwd_input, fwd_norm[0]) # normalize the data before feeding in fwd_input = torch.tensor(fwd_input).float().to(device) fwd_output = forward_dynamics_model.forward(fwd_input) fwd_output = fwd_output.detach().cpu().numpy() fwd_output = unapply_norm( fwd_output, fwd_norm[1]) # unnormalize the output data t_ns = fwd_output[:, : -1] + t_s # predicted next state = predicted delta next state + current state t_r = fwd_output[:, -1] # predicted reward # add to replay buffer # store predicted forward transition in buffer if args.model_based == "forward": for k in range(t_s.shape[0]): fwd_model_replay_buffer.add( t_s[k], t_a[k], t_ns[k], t_r[k], False)
class Agent: def __init__(self, lr=0.003, input_dims=[4], env=None, gamma=0.99, n_actions=2, epsilon_greedy_start=0.5, epsilon_greedy_decay=0.0002, max_size=1000000, layer1_size=64, layer2_size=64, batch_size=128, writer=None): self.env = env self.gamma = gamma self.memory = ReplayBuffer(max_size, input_dims, n_actions) self.batch_size = batch_size self.n_actions = n_actions self.epsilon_greedy_start = epsilon_greedy_start self.epsilon_greedy_decay = epsilon_greedy_decay self.net = Net(lr, input_dims, n_actions=n_actions, fc1_dims=layer1_size, fc2_dims=layer2_size, name='dqn') self.target_net = deepcopy(self.net) self.target_net.load_state_dict(self.net.state_dict()) self.target_net.eval() self.criterion = F.smooth_l1_loss self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.net.to(self.device) self.target_net.to(self.device) self.writer = writer def choose_action(self, state, timestep): epsilon = self.epsilon_greedy_start - self.epsilon_greedy_decay * timestep if random.random() <= epsilon: return self.env.action_space.sample() state = torch.from_numpy(state).to(self.device, torch.float) action = self.net.forward(state).max(0)[1].item() return action def target_update(self): self.target_net.load_state_dict(self.net.state_dict()) def model_update(self, timestep): if len(self.memory) < self.batch_size: return states, actions, rewards, states_, terminals = self.memory.sample_buffer( self.batch_size) states = states.to(self.device) actions = actions.to(self.device) rewards = rewards.to(self.device) states_ = states_.to(self.device) terminals = terminals.to(self.device) state_actinon_values = self.net.forward(states.to(torch.float)) state_actinon_values = state_actinon_values.gather( 1, actions[:, 0].unsqueeze(1).to(torch.long)).squeeze(1) with torch.no_grad(): next_state_values = self.target_net(states_.to( torch.float)).max(1)[0].detach() expected_action_values = self.gamma * next_state_values + rewards expected_action_values = expected_action_values * ( 1 - terminals.to(torch.uint8)) loss = self.criterion(state_actinon_values, expected_action_values.to(torch.float)) self.writer.add_scalar("loss", loss.item(), timestep) self.net.optimizer.zero_grad() loss.backward() for param in self.net.parameters(): param.grad.data.clamp_(-1, 1) self.net.optimizer.step() def store_transition(self, state, action, reward, state_, done): self.memory.store_transtions(state, action, reward, state_, done)
with open(args.param) as paramfile: param = json.load(paramfile) model = Net(14 * 14) optimizer = optim.Adam(model.parameters(), lr=param['learning_rate']) loss_func = nn.CrossEntropyLoss() test_losses = [] test_accuracys = [] for epoch in range(1, int(param['num_epochs']) + 1): inputs = torch.from_numpy(X_train).float() targets = torch.from_numpy(Y_train).long() output = model.forward(inputs) loss = loss_func(output, targets.reshape(-1)) optimizer.zero_grad() loss.backward() optimizer.step() if (epoch + 1) % 10 == 0: test_loss, test_output, test_targets = model.test( X_test, Y_test, loss_func) test_losses.append(test_loss) test_accuracys.append(calc_accuracy(test_output, test_targets)) accuracy = calc_accuracy(output, targets)
dataset = Rand_num() sampler = RandomSampler(dataset) loader = DataLoader(dataset, batch_size=1, sampler=sampler, shuffle=False, num_workers=1, drop_last=True) net = Net() # net.load_state_dict(torch.load(SAVE_PATH)) net.cuda() optimizer = optim.Adam(net.parameters(), lr=0.0005) for epoch in range(10000): for i, data in enumerate(loader, 0): net.zero_grad() video, labels = data print(video.size()) print(labels.size()) labels = torch.squeeze(Variable(labels.long().cuda())) video = torch.squeeze(Variable((video.float() / 256).cuda())) net.train() outputs = net.forward(video) break break # loss = net.lossFunction(outputs, labels) # loss.backward() # optimizer.step() # if i == 0: # torch.save(net.state_dict(), SAVE_PATH) # print (loss)