def mrcnn_bbox_loss(target_bbox, target_class_ids, pred_bbox): """Loss for Mask R-CNN bounding box refinement. target_bbox: [batch, num_rois, (dy, dx, log(dh), log(dw))] target_class_ids: [batch, num_rois]. Integer class IDs. pred_bbox: [batch, num_rois, num_classes, (dy, dx, log(dh), log(dw))] """ # Reshape to merge batch and roi dimensions for simplicity. target_class_ids = target_class_ids.contiguous().view(-1) target_bbox = target_bbox.contiguous().view(-1, 4) pred_bbox = pred_bbox.contiguous().view(-1, pred_bbox.size()[2], 4) # print(target_class_ids) # Only positive ROIs contribute to the loss. And only # the right class_id of each ROI. Get their indicies. positive_roi_ix = torch.gt(target_class_ids , 0) # print(positive_roi_ix) positive_roi_class_ids = torch.masked_select(target_class_ids, positive_roi_ix) indices = target_class_ids # indices = torch.stack([positive_roi_ix, positive_roi_class_ids], dim=1) # print(indices) # Gather the deltas (predicted and true) that contribute to loss # target_bbox = torch.gather(target_bbox, positive_roi_ix) # pred_bbox = torch.gather(pred_bbox, indices) loss = F.smooth_l1_loss(pred_bbox, target_bbox, size_average=True) return loss
def train_batch(param): if len(memory) < param['batch_size']: return 0 batch = memory.sample(param['batch_size']) batch_states = default_states_preprocessor([m.state for m in batch]) batch_next_states = default_states_preprocessor([m.next_state for m in batch]) batch_ended = torch.tensor([m.ended for m in batch]) batch_rewards = torch.tensor([m.reward for m in batch]).to(device) batch_actions = torch.tensor([m.action for m in batch]).to(device) ## Calculate expected reward: with torch.set_grad_enabled(False): not_ended_batch = 1 -torch.ByteTensor(batch_ended).to(device) next_states_non_final = batch_next_states[not_ended_batch] next_state_values = torch.zeros(param['batch_size']).to(device) reward_hat = target_dqn(next_states_non_final) next_state_values[not_ended_batch] = reward_hat.max(1)[0] expected_state_action_values = next_state_values*param['GAMMA'] + batch_rewards # Predict value function: yhat = dqn(batch_states) state_action_values = yhat.gather(1, batch_actions.unsqueeze(1)).squeeze() loss = F.smooth_l1_loss(state_action_values, expected_state_action_values) optimizer.zero_grad() loss.backward() for param in dqn.parameters(): param.data.clamp_(-1, 1) optimizer.step() return float(loss.data.cpu().numpy())
def forward(self, loc_preds, loc_targets, cls_preds, cls_targets): """Compute loss between (loc_preds, loc_targets) and (cls_preds, cls_targets). Args: loc_preds: (tensor) predicted locations, sized [N, #anchors, 4]. loc_targets: (tensor) encoded target locations, sized [N, #anchors, 4]. cls_preds: (tensor) predicted class confidences, sized [N, #anchors, #classes]. cls_targets: (tensor) encoded target labels, sized [N, #anchors]. loss: (tensor) loss = SmoothL1Loss(loc_preds, loc_targets) + CrossEntropyLoss(cls_preds, cls_targets). """ pos = cls_targets > 0 # [N,#anchors] batch_size = pos.size(0) num_pos = pos.sum().item() # =============================================================== # loc_loss = SmoothL1Loss(pos_loc_preds, pos_loc_targets) # =============================================================== mask = pos.unsqueeze(2).expand_as(loc_preds) # [N,#anchors,4] loc_loss = F.smooth_l1_loss(loc_preds[mask], loc_targets[mask], size_average=False) # =============================================================== # cls_loss = CrossEntropyLoss(cls_preds, cls_targets) # =============================================================== cls_loss = F.cross_entropy(cls_preds.view(-1, self.num_classes), cls_targets.view(-1), reduce=False) # [N*#anchors,] cls_loss = cls_loss.view(batch_size, -1) cls_loss[cls_targets < 0] = 0 # set ignored loss to 0 neg = self._hard_negative_mining(cls_loss, pos) # [N,#anchors] cls_loss = cls_loss[pos | neg].sum() print('loc_loss: {} | cls_loss: {}'.format(loc_loss.item() / num_pos, cls_loss.item() / num_pos)) loss = (loc_loss + cls_loss) / num_pos return loss
def backward(self): # # calculate step returns in reverse order #rewards = torch.stack(self.rewards, dim=0) rewards = self.rewards returns = torch.Tensor(len(rewards) - 1, *self.outputs[-1].value.data.size()) step_return = self.outputs[-1].value.data.cpu() for i in range(len(rewards) - 2, -1, -1): step_return.mul_(self.discounts[i]).add_(rewards[i]) returns[i] = step_return if USE_CUDA: returns = returns.cuda() # # calculate losses policy_loss = 0 value_loss = 0 steps = len(self.outputs) - 1 for i in range(steps): advantage = Variable(returns[i] - self.outputs[i].value.data) policy_loss += -self.outputs[i].log_action * advantage value_loss += F.smooth_l1_loss(self.outputs[i].value, Variable(returns[i])) weights_l2 = 0 for param in self.parameters(): weights_l2 += param.norm(2) loss = policy_loss.mean() / steps + value_loss / steps + 0.00001 * weights_l2 loss.backward() # reset state self.reset()
def optimize_model(): global last_sync if len(memory) < BATCH_SIZE: return transitions = memory.sample(BATCH_SIZE) batch = Transition(*zip(*transitions)) non_final_mask = ByteTensor(tuple(map(lambda s: s is not None, batch.next_state))) non_final_next_states = Variable(torch.cat([s for s in batch.next_state if s is not None]), volatile=True) state_batch = Variable(torch.cat(batch.state)) action_batch = Variable(torch.cat(batch.action)) reward_batch = Variable(torch.cat(batch.reward)) state_action_values = model(state_batch).gather(1, action_batch) next_state_values = Variable(torch.zeros(BATCH_SIZE)).type(FloatTensor) next_state_values[non_final_mask] = model(non_final_next_states).max(1)[0] # next_state_values.volatile = False expected_state_action_values = (next_state_values * GAMMA) + reward_batch loss = F.smooth_l1_loss(state_action_values, expected_state_action_values) optimizer.zero_grad() loss.backward() for param in model.parameters(): param.grad.data.clamp_(-1, 1) optimizer.step()
def forward(self, loc_preds, loc_targets, cls_preds, cls_targets): '''Compute loss between (loc_preds, loc_targets) and (cls_preds, cls_targets). Args: loc_preds: (tensor) predicted locations, sized [batch_size, #anchors, 4]. loc_targets: (tensor) encoded target locations, sized [batch_size, #anchors, 4]. cls_preds: (tensor) predicted class confidences, sized [batch_size, #anchors, #classes]. cls_targets: (tensor) encoded target labels, sized [batch_size, #anchors]. loss: (tensor) loss = SmoothL1Loss(loc_preds, loc_targets) + FocalLoss(cls_preds, cls_targets). ''' batch_size, num_boxes = cls_targets.size() pos = cls_targets > 0 # [N,#anchors] num_pos = pos.data.long().sum() ################################################################ # loc_loss = SmoothL1Loss(pos_loc_preds, pos_loc_targets) ################################################################ mask = pos.unsqueeze(2).expand_as(loc_preds) # [N,#anchors,4] masked_loc_preds = loc_preds[mask].view(-1,4) # [#pos,4] masked_loc_targets = loc_targets[mask].view(-1,4) # [#pos,4] loc_loss = F.smooth_l1_loss(masked_loc_preds, masked_loc_targets, size_average=False) ################################################################ # cls_loss = FocalLoss(loc_preds, loc_targets) ################################################################ pos_neg = cls_targets > -1 # exclude ignored anchors mask = pos_neg.unsqueeze(2).expand_as(cls_preds) masked_cls_preds = cls_preds[mask].view(-1,self.num_classes) cls_loss = self.focal_loss_alt(masked_cls_preds, cls_targets[pos_neg]) print('loc_loss: %.3f | cls_loss: %.3f' % (loc_loss.data[0]/num_pos, cls_loss.data[0]/num_pos), end=' | ') loss = (loc_loss+cls_loss)/num_pos return loss
def reply(self): if (len(self.memory) < BATCH_SIZE): return transitions = self.memory.sample(BATCH_SIZE) batch = Transition(*zip(*transitions)) non_final_mask = torch.ByteTensor(tuple(map(lambda s: s is not None, batch.next_state))) state_batch = torch.cat(batch.state) action_batch = torch.cat(batch.action) reward_batch = torch.cat(batch.reward) non_final_next_state = torch.cat([s for s in batch.next_state if s is not None]) self.model.eval() state_action_values = torch.squeeze(self.model(state_batch).gather(1, action_batch)) next_state_values = torch.zeros(BATCH_SIZE).type(torch.FloatTensor) next_state_values[non_final_mask] = self.model(non_final_next_state).data.max(1)[0] expected_state_action_values = reward_batch + GAMMA * next_state_values self.model.train() loss = F.smooth_l1_loss(state_action_values, expected_state_action_values) self.optimizer.zero_grad() loss.backward() self.optimizer.step()
def ohem_detect_loss(self, cls_score, rois_label, bbox_pred, rois_target, rois_inside_ws, rois_outside_ws): def log_sum_exp(x): x_max = x.data.max() return torch.log(torch.sum(torch.exp(x - x_max), dim=1, keepdim=True)) + x_max num_hard = cfg.TRAIN.BATCH_SIZE * self.batch_size pos_idx = rois_label > 0 num_pos = pos_idx.int().sum() # classification loss num_classes = cls_score.size(1) weight = cls_score.data.new(num_classes).fill_(1.) weight[0] = num_pos.data[0] / num_hard conf_p = cls_score.detach() conf_t = rois_label.detach() # rank on cross_entropy loss loss_c = log_sum_exp(conf_p) - conf_p.gather(1, conf_t.view(-1,1)) loss_c[pos_idx] = 100. # include all positive samples _, topk_idx = torch.topk(loss_c.view(-1), num_hard) loss_cls = F.cross_entropy(cls_score[topk_idx], rois_label[topk_idx], weight=weight) # bounding box regression L1 loss pos_idx = pos_idx.unsqueeze(1).expand_as(bbox_pred) loc_p = bbox_pred[pos_idx].view(-1, 4) loc_t = rois_target[pos_idx].view(-1, 4) loss_box = F.smooth_l1_loss(loc_p, loc_t) return loss_cls, loss_box
def accumulate_gradient(self, batch_sz, states, actions, rewards, next_states, mask): """ Compute the temporal difference error. td_error = (r + gamma * max Q(s_,a)) - Q(s,a) """ states = Variable(states) actions = Variable(actions) rewards = Variable(rewards) next_states = Variable(next_states, volatile=True) # Compute Q(s, a) q_values = self.policy(states) q_values = q_values.gather(1, actions.unsqueeze(1)) # Compute Q(s_, a) q_target_values = None if next_states.is_cuda: q_target_values = Variable(torch.zeros(batch_sz).cuda()) else: q_target_values = Variable(torch.zeros(batch_sz)) # Bootstrap for non-terminal states q_target_values[mask] = self.target_policy(next_states).max(1)[0][mask] q_target_values.volatile = False # So we don't mess the huber loss expected_q_values = (q_target_values * self.gamma) + rewards # Compute Huber loss loss = F.smooth_l1_loss(q_values, expected_q_values) # Accumulate gradients loss.backward()
def rpn_bbox_loss(target_bbox, rpn_match, rpn_bbox, config): """Return the RPN bounding box loss graph. config: the model config object. target_bbox: [batch, max positive anchors, (dy, dx, log(dh), log(dw))]. Uses 0 padding to fill in unsed bbox deltas. rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive, -1=negative, 0=neutral anchor. rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))] """ # Positive anchors contribute to the loss, but negative and # neutral anchors (match value of 0 or -1) don't. indices = torch.eq(rpn_match, 1) rpn_bbox = torch.masked_select(rpn_bbox, indices) batch_counts = torch.sum(indices.float(), dim=1) outputs = [] for i in range(config.IMAGES_PER_GPU): # print(batch_counts[i].cpu().data.numpy()[0]) outputs.append(target_bbox[i, torch.arange(int(batch_counts[i].cpu().data.numpy()[0])).type(torch.cuda.LongTensor)]) target_bbox = torch.cat(outputs, dim=0) loss = F.smooth_l1_loss(rpn_bbox, target_bbox, size_average=True) return loss
def learn(self, batch_state, batch_next_state, batch_reward, batch_action): outputs = self.model(batch_state).gather(1, batch_action.unsqueeze(1)).squeeze(1) next_outputs = self.model(batch_next_state).detach().max(1)[0] target = self.gamma*next_outputs + batch_reward td_loss = F.smooth_l1_loss(outputs, target) self.optimizer.zero_grad() td_loss.backward(retain_variables = True) self.optimizer.step()
def optim_fn(reward, value, next_value, log_prob): target = reward + gamma * next_value delta = target - value policy_loss = -log_prob * delta.data value_loss = F.smooth_l1_loss(value, target.data) loss = policy_loss + value_loss optimizer.zero_grad() loss.backward() optimizer.step()
def forward(self, predictions, priors, targets): loc_data, conf_data, _ = predictions # 预测的框以及类别概率(bs,-1,4) (bs,-1,2) priors = priors num = loc_data.size(0) # bs num_priors = priors.size(0) # (bs, 21824, 4) loc_t = torch.Tensor(num, num_priors, 4) # (bs, 21824) conf_t = torch.LongTensor(num, num_priors) # (bs,num_obj, 5) for idx in range(num): truths = targets[idx][:, :-1].data # cx,cy,w,h labels = targets[idx][:, -1].data # 1 or 0 defaults = priors.data # default boxes match(0.35, truths, defaults, [0.1, 0.2], labels, loc_t, conf_t, idx) if self.device.type == 'cuda': loc_t = loc_t.to(self.device) conf_t = conf_t.to(self.device) # 得到概率 >0 的idx pos = conf_t > 0 pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) # 全部展开 计算loss loc_p = loc_data[pos_idx].view(-1, 4) # predict loc_t = loc_t[pos_idx].view(-1, 4) # label loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') batch_conf = conf_data.view(-1, 2) loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(-1, 2) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N return loss_l, loss_c
def optimize_model(): global last_sync if len(memory) < BATCH_SIZE: return transitions = memory.sample(BATCH_SIZE) # Transpose the batch (see http://stackoverflow.com/a/19343/3343043 for # detailed explanation). batch = Transition(*zip(*transitions)) # Compute a mask of non-final states and concatenate the batch elements non_final_mask = ByteTensor(tuple(map(lambda s: s is not None, batch.next_state))) # We don't want to backprop through the expected action values and volatile # will save us on temporarily changing the model parameters' # requires_grad to False! non_final_next_states = Variable(torch.cat([s for s in batch.next_state if s is not None]), volatile=True) state_batch = Variable(torch.cat(batch.state)) action_batch = Variable(torch.cat(batch.action)) reward_batch = Variable(torch.cat(batch.reward)) # Compute Q(s_t, a) - the model computes Q(s_t), then we select the # columns of actions taken state_action_values = model(state_batch).gather(1, action_batch) # Compute V(s_{t+1}) for all next states. next_state_values = Variable(torch.zeros(BATCH_SIZE).type(Tensor)) next_state_values[non_final_mask] = model(non_final_next_states).max(1)[0] # Now, we don't want to mess up the loss with a volatile flag, so let's # clear it. After this, we'll just end up with a Variable that has # requires_grad=False next_state_values.volatile = False # Compute the expected Q values expected_state_action_values = (next_state_values * GAMMA) + reward_batch # Compute Huber loss loss = F.smooth_l1_loss(state_action_values, expected_state_action_values) # Optimize the model optimizer.zero_grad() loss.backward() for param in model.parameters(): param.grad.data.clamp_(-1, 1) optimizer.step()
def train(env, episodes, gamma=0.9): num_actions = env._board_size ** 2 nets = [PolicyNet(num_actions), PolicyNet(num_actions)] optimizers = [optim.Adam(net.parameters(), lr=1e-2) for net in nets] ctrl_fns = [build_ctrl_fn(net, train=True) for net in nets] for episode in range(episodes): state = env.reset() rewards_all = [] done = False while not done: action = ctrl_fns[state.cur_player](state) state, reward, done, _ = env.step(action) rewards_all.append(reward) rewards_all = np.array(rewards_all) rewards_all[:-1] -= rewards_all[1:] rewards_all = [rewards_all[0::2], rewards_all[1::2]] for (net, optimizer, rewards) in zip(nets, optimizers, rewards_all): Rs, R = [], 0 for r in reversed(rewards): R = gamma * R + r Rs.insert(0, R) Rs = torch.tensor(Rs) Rs = (Rs - Rs.mean()) / (Rs.std() + 1e-3) policy_loss = [] value_loss = [] for t, (R, value) in enumerate(zip(Rs, net.values)): policy_loss.append(-net.log_probs[t] * (R - value.item())) value_loss.append(F.smooth_l1_loss(value, torch.tensor([[R]]))) policy_loss = torch.stack(policy_loss).sum() value_loss = torch.stack(value_loss).sum() loss = policy_loss + value_loss optimizer.zero_grad() loss.backward() optimizer.step() del net.log_probs[:] del net.values[:] del rewards_all return [build_ctrl_fn(net, train=False) for net in nets]
def finish_episode(): R = 0 saved_actions = model.saved_actions value_loss = 0 rewards = [] for r in model.rewards[::-1]: R = r + args.gamma * R rewards.insert(0, R) rewards = torch.Tensor(rewards) rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps) for (action, value), r in zip(saved_actions, rewards): reward = r - value.data[0,0] action.reinforce(reward) value_loss += F.smooth_l1_loss(value, Variable(torch.Tensor([r]))) optimizer.zero_grad() final_nodes = [value_loss] + list(map(lambda p: p.action, saved_actions)) gradients = [torch.ones(1)] + [None] * len(saved_actions) autograd.backward(final_nodes, gradients) optimizer.step() del model.rewards[:] del model.saved_actions[:]
def finish_episode(): R = 0 saved_actions = model.saved_actions policy_losses = [] value_losses = [] rewards = [] for r in model.rewards[::-1]: R = r + args.gamma * R rewards.insert(0, R) rewards = torch.Tensor(rewards) rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps) for (log_prob, value), r in zip(saved_actions, rewards): reward = r - value.data[0] policy_losses.append(-log_prob * reward) value_losses.append(F.smooth_l1_loss(value, Variable(torch.Tensor([r])))) optimizer.zero_grad() loss = torch.stack(policy_losses).sum() + torch.stack(value_losses).sum() loss.backward() optimizer.step() del model.rewards[:] del model.saved_actions[:]
def finish_episode(): R = 0 saved_actions = model.saved_actions policy_losses = [] value_losses = [] returns = [] for r in model.rewards[::-1]: R = r + args.gamma * R returns.insert(0, R) returns = torch.tensor(returns) returns = (returns - returns.mean()) / (returns.std() + eps) for (log_prob, value), R in zip(saved_actions, returns): advantage = R - value.item() policy_losses.append(-log_prob * advantage) value_losses.append(F.smooth_l1_loss(value, torch.tensor([R]))) optimizer.zero_grad() loss = torch.stack(policy_losses).sum() + torch.stack(value_losses).sum() loss.backward() optimizer.step() del model.rewards[:] del model.saved_actions[:]
def update_q(self, state, action, next_state): self.model.eval() variable = Variable(self.create_input(state)) qvalue = self.model(variable)[action] next_variable = Variable(self.create_input(next_state)) next_qvalue_max = self.model(next_variable).max() if next_state == GOAL: print('goal') target = qvalue + ETA * (1 - qvalue) else: target = qvalue + ETA * (GAMMA * next_qvalue_max - qvalue) self.model.train() loss = F.smooth_l1_loss(qvalue, Variable(target)) self.optimizer.zero_grad() loss.backward() self.optimizer.step()
def update_policy(self, history, episode): self.model.train() rewards = np.zeros((len(history))) targets = np.zeros((len(history), NUM_ACTION)) for i, entry in enumerate(history): rewards[i] = entry[2] targets[i] = entry[4] discounted_rewards = self.discount_reward(rewards) targets = targets * discounted_rewards targets.reshape(-1, NUM_ACTION) targets = torch.tensor(targets, dtype=torch.float32) self.optimizer.zero_grad() for i, entry in enumerate(history): # print(entry) loss = F.smooth_l1_loss(entry[3], targets[i]) loss.backward() self.optimizer.step()
def optimize_model(): if len(memory) < BATCH_SIZE: return transitions = memory.sample(BATCH_SIZE) # Transpose the batch (see http://stackoverflow.com/a/19343/3343043 for # detailed explanation). batch = Transition(*zip(*transitions)) # Compute a mask of non-final states and concatenate the batch elements non_final_mask = torch.tensor(tuple(map(lambda s: s is not None, batch.next_state)), device=device, dtype=torch.uint8) non_final_next_states = torch.cat([s for s in batch.next_state if s is not None]) state_batch = torch.cat(batch.state) action_batch = torch.cat(batch.action) reward_batch = torch.cat(batch.reward) # Compute Q(s_t, a) - the model computes Q(s_t), then we select the # columns of actions taken state_action_values = policy_net(state_batch).gather(1, action_batch) # Compute V(s_{t+1}) for all next states. next_state_values = torch.zeros(BATCH_SIZE, device=device) next_state_values[non_final_mask] = target_net(non_final_next_states).max(1)[0].detach() # Compute the expected Q values expected_state_action_values = (next_state_values * GAMMA) + reward_batch # Compute Huber loss loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1)) # Optimize the model optimizer.zero_grad() loss.backward() for param in policy_net.parameters(): param.grad.data.clamp_(-1, 1) optimizer.step()
def point_form_loss(boxes_p, conf_p, boxes_gt, conf_gt, priors, match_threshold = 0.5, negative_odd = 3., verbose=True): ''' boxes_predict, boxes_gt are point-form tensor/list come from network/ground truth. boxes_p: (batch_size, num_priors, 4) conf_p: (batch_size, num_priors, num_classes) boxes_gt: [[(4),...],...] (length=batch_size) conf_gt: [[(1),...],...] (length=batch_size) priors: point-form priors (num_priors, 4) Every boxes_p will be mathched with boxes_gt. If one IoU is less than ''' batch_size = boxes_p.shape[0] loc_losses = torch.zeros(batch_size) conf_losses = torch.zeros(batch_size) for i, (b_p, c_p, b_g_list, c_g_list) in enumerate(zip(boxes_p, conf_p, boxes_gt, conf_gt)): # For every entries in a batch, is it true to write it in this form? #if len(b_g_list.squeeze()) == 0: if b_g_list.numel() == 0: continue # skip if no ground truth box , # Yes, we will not use the background in here to train, considering the requirement of Hard Negative Mining. b_g = b_g_list.type(torch.float) # (num_gt, 4) #iou_mat = iou(b_p, b_g) # (num_priors, num_gt) iou_mat = iou(priors, b_g) # use priors insted of b_p to ensure strong matching iou_max_value, iou_max_idx = torch.max(iou_mat, 1) # (num_priors) mask_p = iou_max_value > match_threshold # (num_priors) bool if mask_p.sum() == 0: if verbose: print("Prior box grid fail to match a given bbox. Maybe you need redesign network or redefine the transforms.") continue b_p_masked = b_p[mask_p,:] b_g_selected = torch.index_select(b_g, 0, iou_max_idx) b_g_selected = b_g_selected[mask_p,:] # Localization loss loc_losses[i] = F.smooth_l1_loss(b_p_masked, b_g_selected) # Confidence loss # network will output [0, num_class](0 denote background) For switch data, # it will be{0,1,2}. But the conf_gt is [0, num_class-1](0 denotes smoe class # of object). ''' # Confidence loss without Hard Negative Mining c_g = c_g_list + 1 #(num_gt) int c_g_selected = torch.index_select(c_g, 0, iou_max_idx) c_g_selected[~mask] = 0 # set background class conf_losses[i] = F.cross_entropy(c_p, c_g_selected.long()) # Why int32 int64 does matter??? ''' # Confidence loss with Hard Negative Mining c_g = c_g_list + 1 #(num_gt) int c_g_selected = torch.index_select(c_g, 0, iou_max_idx) c_g_selected[~mask_p] = 0 # set background class num_negative = (mask_p.sum().float() * negative_odd).floor().long() iou_max_value_n = iou_max_value[~mask_p] _, negative_idx = iou_max_value_n.sort(descending=True) _, negative_rank = negative_idx.sort() mask_n = negative_rank < num_negative loss_p = F.cross_entropy(c_p[mask_p,:], c_g_selected[mask_p].long()) loss_n = F.cross_entropy(c_p[~mask_p][mask_n,:], c_g_selected[~mask_p][mask_n].long()) conf_losses[i] = loss_p + loss_n # It's not equivalent to [mask_p | mask_n,:] since cross_entropy control size. loc_loss = loc_losses.sum() conf_loss = conf_losses.sum() #loss = loc_loss + alpha * conf_loss return loc_loss, conf_loss #point_form_loss = point_form_loss_matched_with_priors
def optimize_model(optimizer, policy_net, target_net, memory, device, GAMMA=0.99, BATCH_SIZE=32, n_steps=20, double_dqn=False): torch.autograd.set_detect_anomaly(True) if len(memory) < BATCH_SIZE: return transitions, idx = memory.sample() """ zip(*transitions) unzips the transitions into Transition(*) creates new named tuple batch.state - tuple of all the states (each state is a tensor) batch.next_state - tuple of all the next states (each state is a tensor) batch.reward - tuple of all the rewards (each reward is a float) batch.action - tuple of all the actions (each action is an int) """ batch = Transition(*zip(*transitions)) actions = tuple((map(lambda a: torch.tensor([[a]], device=device), batch.action))) rewards = tuple((map(lambda r: torch.tensor([r], device=device), batch.reward))) non_final_mask = torch.tensor(tuple( map(lambda s: s is not None, batch.next_state)), device=device, dtype=torch.bool) non_final_next_states = torch.cat( [s for s in batch.next_state if s is not None]).to(device) state_batch = torch.cat(batch.state).to(device) action_batch = torch.cat(actions) reward_batch = torch.cat(rewards) state_action_values = policy_net(state_batch).gather(1, action_batch) if n_steps == 1: next_state_values = torch.zeros(BATCH_SIZE, device=device) if double_dqn: max_action = policy_net(non_final_next_states).max( 1, keepdim=True)[1].detach() next_state_values[non_final_mask] = target_net( non_final_next_states).gather(1, max_action).squeeze(1).detach() else: next_state_values[non_final_mask] = target_net( non_final_next_states).max(1)[0].detach() # next_state_values.requires_grad = False expected_state_action_values = (next_state_values * GAMMA) + reward_batch else: expected_state_action_values = nstep_target(idx=idx, policy_net=policy_net, target_net=target_net, steps=n_steps, memory=memory, device=device, double_dqn=double_dqn) loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1)) optimizer.zero_grad() loss.backward() for param in policy_net.parameters(): param.grad.data.clamp_(-1, 1) optimizer.step() return policy_net
def forward(self, spt_ms, spt_rgb, qry_ms, qry_rgb, epoch): """ :b: number of tasks/batches. :setsz: number of training pairs? :querysz number of test pairs for few shot :param spt_ms: [task_num, setsz, 16, h, w] :param spt_rgb: [task_num, querysz, 3, h, w] :param qry_ms: [task_num, setsz, 16, h, w] :param qry_rgb: [task_num, querysz, 3, h, w] :return: """ spt_ms = spt_ms.squeeze() spt_rgb = spt_rgb.squeeze() qry_ms = qry_ms.squeeze() qry_rgb = qry_rgb.squeeze() task_num, setsz, c, h, w = spt_ms.size() _, querysz, c, _, _ = qry_ms.size() # losses_q[k] is the loss on step k of gradient descent (inner loop) losses_q = [0 for _ in range(self.update_step + 1)] # accuracy on step i of gradient descent (inner loop) corrects = [0 for _ in range(self.update_step + 1)] if (epoch < 4001): if (epoch % 2000 == 0) and (epoch > 1): decay = 2 #(epoch // 5) + 1 self.update_lr = self.update_lr / decay print('outer loop lr is: ', self.update_lr) for i in range(task_num): # 1. run the i-th task and compute loss for k=0, k is update step logits = self.net(spt_ms[i], vars=None, bn_training=True) loss = F.smooth_l1_loss(logits, spt_rgb[i]) # create a log with task_num x k #print(loss.item()) # the sum of graidents of outputs w.r.t the input grad = torch.autograd.grad(loss, self.net.parameters()) fast_weights = list( map(lambda p: p[1] - self.update_lr * p[0], zip(grad, self.net.parameters()))) # what are these two torch.no_grad()s about????????????????????? # the first one calculates accuracy right after initialization # which makes sense, the second one is doing an update...why????? # this is the loss and accuracy before first update with torch.no_grad(): # [setsz, nway] logits_q = self.net(qry_ms[i], self.net.parameters(), bn_training=True) loss_q = F.smooth_l1_loss(logits_q, qry_rgb[i]) losses_q[0] += loss_q # adding loss?! pred_q = logits_q # logits_q used to be cross_entropy loss, and # go through softmax to become pred_q. # calculate PSNR correct = errors.find_psnr(pred_q, qry_rgb[i]) corrects[0] = corrects[0] + correct # this is the loss and accuracy after the first update with torch.no_grad(): # [setsz, nway] logits_q = self.net(qry_ms[i], fast_weights, bn_training=True) loss_q = F.smooth_l1_loss(logits_q, qry_rgb[i]) losses_q[1] += loss_q # [setsz] pred_q = logits_q correct = errors.find_psnr(pred_q, qry_rgb[i]) corrects[1] = corrects[1] + correct for k in range(1, self.update_step): # 1. run the i-th task and compute loss for k=1~K-1 logits = self.net(spt_ms[i], fast_weights, bn_training=True) loss = F.smooth_l1_loss(logits, spt_rgb[i]) # 2. compute grad on theta_pi grad = torch.autograd.grad(loss, fast_weights) # 3. theta_pi = theta_pi - train_lr * grad fast_weights = list( map(lambda p: p[1] - self.update_lr * p[0], zip(grad, fast_weights))) logits_q = self.net(qry_ms[i], fast_weights, bn_training=True) self.valid_img = logits_q # loss_q will be overwritten and we just keep the loss_q on # last update step ==> losses_q[-1] loss_q = F.smooth_l1_loss(logits_q, qry_rgb[i]) losses_q[k + 1] += loss_q with torch.no_grad(): pred_q = logits_q # convert to numpy correct = errors.find_psnr(pred_q, qry_rgb[i]) corrects[k + 1] = corrects[k + 1] + correct # end of all tasks # sum over all losses on query set across all tasks loss_q = losses_q[-1] / task_num # self.log[-1] += loss.item() # optimize theta parameters # In the Learner the update is with respect to accuracy of the training # set, but for meta_learner the meta_update is with respect to the test # set of each episode. self.meta_optim.zero_grad() loss_q.backward() # backwards through grad above ==> d(loss_q)/d(grad) # print('meta update') # for p in self.net.parameters()[:5]: # print(torch.norm(p).item()) self.meta_optim.step() accs = np.average(np.array(corrects[-1])) #/ (querysz * task_num) print('inner loop lr is: ', self.get_lr(self.meta_optim)) return accs, loss_q
y = f(x) return x, y # Define model fc = torch.nn.Linear(W_target.size(0), 1) for batch_idx in count(1): # Get data batch_x, batch_y = get_batch() # Reset gradients fc.zero_grad() # Forward pass output = F.smooth_l1_loss(fc(batch_x), batch_y) loss = output.item() # Backward pass output.backward() # Apply gradients for param in fc.parameters(): param.data.add_(-0.1 * param.grad.data) # Stop criterion if loss < 1e-3: break print('Loss: {:.6f} after {} batches'.format(loss, batch_idx)) print('==> Learned function:\t' + poly_desc(fc.weight.view(-1), fc.bias))
def _td_loss(x_t, x_diff_t, x_tp1, terminal, discount_factor): x_target = x_diff_t + (1 - terminal.float()) * discount_factor * x_tp1 return functional.smooth_l1_loss(x_t, x_target)
def main(): env = gym.make(config.ENV_NAME) agent = DQN(env) optimizer = optim.Adam(agent.parameters(), lr=0.001) finished = False for epoch in range(config.EPOCHS): state = env.reset() for step in range(config.ITERATIONS): action = agent.get_action(state, 'egreedy') next_state, reward, done, _ = env.step(action[0, 0]) if done: reward = -1 agent.replay_memory.push(Transition( config.FloatTensor([state]), action, config.FloatTensor([reward]), config.FloatTensor([next_state]) if not done else None)) state = next_state if len(agent.replay_memory) >= config.BATCH_SIZE: batch = agent.replay_memory.sample(config.BATCH_SIZE) batch = Transition(*zip(*batch)) non_final_mask = config.ByteTensor( [s is not None for s in batch.next_state]) non_final_next_state_batch = Variable(torch.cat([ s for s in batch.next_state if s is not None])) state_batch = Variable(torch.cat(batch.state), requires_grad=False) action_batch = Variable(torch.cat(batch.action).view(-1, 1), requires_grad=False) reward_batch = Variable(torch.cat(batch.reward), requires_grad=False) q_values = agent(state_batch).gather(1, action_batch) s_values = Variable(torch.zeros(config.BATCH_SIZE).type( config.FloatTensor), requires_grad=False) s_values[non_final_mask] = agent( non_final_next_state_batch).max(1)[0] expected_q_values = config.GAMMA * s_values + reward_batch loss = F.smooth_l1_loss(torch.sum(q_values), torch.sum(expected_q_values)) optimizer.zero_grad() loss.backward() optimizer.step() if done: break agent.epsilon = config.EPSILON_START - epoch / config.EPOCHS * ( config.EPSILON_START - config.EPSILON_END) if epoch % config.TEST_INTERVAL == 0: sum_reward = 0 for _epoch in range(config.TEST_EPOCHS): epoch_reward = 0 state = env.reset() for step in range(config.TEST_ITERATIONS): # env.render() action = agent.get_action(state) # Default state, reward, done, _ = env.step(action[0, 0]) if done: break epoch_reward += reward sum_reward += epoch_reward avg_reward = sum_reward / config.TEST_EPOCHS print('Epoch: {}, Average Reward: {}'.format(epoch, avg_reward)) print('Current Epsilon:', agent.epsilon) if avg_reward > 195: finished = True if finished: break while True: state = env.reset() round_reward = 0 for step in range(config.TEST_ITERATIONS): env.render() action = agent.get_action(state) # Default state, reward, done, _ = env.step(action[0, 0]) if done: break round_reward += reward print('Round reward:', round_reward)
def forward(self, predictions, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) targets (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data, priors = predictions num = loc_data.size(0) priors = priors[:loc_data.size(1), :] num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data defaults = priors.data match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) if self.use_gpu: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) pos = conf_t > 0 num_pos = pos.sum(dim=1, keepdim=True) # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c = loss_c.view(num, -1) loss_c[pos] = 0 # filter out pos boxes for now _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = num_pos.data.sum().double() loss_l = loss_l.double() loss_c = loss_c.double() loss_l /= N loss_c /= N return loss_l, loss_c
def learn(self, entropy=0): state_arr, state_prime_arr,action_arr, old_probs_arr, vals_arr, \ reward_arr, hidden_in,hidden_out, dones_arr, batches = \ self.memory.generate_batches() state_arr = T.from_numpy(state_arr).float() state_prime_arr = T.from_numpy(state_prime_arr).float() action_arr = T.from_numpy(action_arr).float() old_probs_arr = T.from_numpy(old_probs_arr).float() vals_arr = T.from_numpy(vals_arr).float() reward_arr = T.from_numpy(reward_arr).float().unsqueeze(1) dones_arr = T.from_numpy(dones_arr).float().unsqueeze(1) first_hidden = hidden_in[0].detach() second_hidden = hidden_out[0].detach() for _ in range(self.n_epochs): v_prime = self.get_value(state_prime_arr, second_hidden).squeeze(1) td_target = reward_arr + self.gamma * v_prime * dones_arr # ic(v_prime.shape) # ic(td_target.shape) v_s = self.get_value(state_arr, first_hidden).squeeze(1) # ic(v_s.shape) delta = td_target - v_s # ic(delta.shape) delta = delta.detach().numpy() advantage_lst = [] advantage = 0.0 for item in delta[::-1]: advantage = self.gamma * self.gae_lambda * advantage + item[0] advantage_lst.append([advantage]) advantage_lst.reverse() advantage = T.tensor(advantage_lst, dtype=T.float) # print("Advantage",advantage.shape) pi = self.get_prob(state_arr, first_hidden).probs # ic(action_arr.shape) # ic(pi.shape) pi_a = pi.squeeze(1).gather(1, action_arr.unsqueeze(1).long()) # ic(pi_a.shape) # ic(old_probs_arr.shape) ratio = T.exp(T.log(pi_a) - old_probs_arr.unsqueeze(1)) # ic(ratio.shape) surr1 = ratio * advantage # ic(advantage.shape) # print(advantage.shape,ratio.shape,ratio.mean(),advantage.mean()) surr2 = T.clamp(ratio, 1 - self.policy_clip, 1 + self.policy_clip) * advantage # ic(surr1.shape,surr2.shape) # exit() aloss = -T.min(surr1, surr2) closs = F.smooth_l1_loss(v_s, td_target.detach()) loss = aloss + closs # wandb.log({"surr1":surr1.mean(), # "surr2":surr2.mean(), # "KL divergence":ratio.mean(), # "total_loss":loss.mean(), # "aloss": aloss.mean(), # "closs": closs.mean(), # "entropy":entropy # }) # # print(f"surr1 {surr1.mean()} surr2 {surr2.mean()} ","KL divergence",ratio.mean().item(),f"aloss: {aloss.mean()} closs: {closs.mean()}") # exit() self.actor.optimizer.zero_grad() self.critic.optimizer.zero_grad() loss.mean().backward(retain_graph=True) self.actor.optimizer.step() self.critic.optimizer.step() self.memory.clear_memory()
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:,:-1].data labels = targets[idx][:,-1].data defaults = priors.data match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t,requires_grad=False) pos = conf_t > 0 num_pos = pos.sum(dim=1, keepdim=True) # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1,4) loc_t = loc_t[pos_idx].view(-1,4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') #GIoU # transform_weights = (10.,10.,10.,10.) # x1p,y1p,x2p,y2p = self.bbox_transform(loc_p,transform_weights) # x1gt,y1gt,x2gt,y2gt = self.bbox_transform(loc_t,transform_weights) # #For predicted box Bp, ensuring x2p > x1p and y2p > y1p # x1p_hat = torch.min(x1p,x2p) # x2p_hat = torch.max(x1p,x2p) # y1p_hat = torch.min(y1p,y2p) # y2p_hat = torch.max(y1p,y2p) # #Ensuring x2g > x1g and y2g > y1g # x1g = torch.min(x1gt,x2gt) # x2g = torch.max(x1gt,x2gt) # y1g = torch.min(y1gt,y2gt) # y2g = torch.max(y1gt,y2gt) # #Calculating area Bg : Ag = (x2gt - x1gt)*(y2gt - y1gt) # Ag = (x2g - x1g)*(y2g - y1g) # #Calculating area Bp : Ap = (x2p - x1p)*(y2p - y1p) # Ap = (x2p_hat - x1p_hat)*(y2p_hat - y1p_hat) # #Calculating intersection I between Bp and Bg # x1I = torch.max(x1p_hat,x1g) # x2I = torch.min(x2p_hat,x2g) # y1I = torch.max(y1p_hat,y1g) # y2I = torch.min(y2p_hat,y2g) # I=torch.zeros(loc_p.size(0)) # for i in range(loc_p.size(0)): # if(x2I[i] > x1I[i] and y2I[i] > y1I[i]): # I[i] = (x2I[i] - x1I[i])*(y2I[i] - y1I[i]) # #Finding the coordinate of smallest enclosing box Bc # x1c = torch.min(x1p_hat,x1g) # x2c = torch.max(x2p_hat,x2g) # y1c = torch.min(y1p_hat,y1g) # y2c = torch.max(y2p_hat,y2g) # #Calculating area of Bc : Ac = (x2c - x1c)*(y2c - y1c) # Ac = (x2c - x1c)*(y2c - y1c) # #IoU = I/U, where U = Ap + Ag - I # U = Ap + Ag - I # IoU = I/U # GIoU = IoU - (Ac-U)/Ac # #Loss GIoU # loss_l = torch.sum(1-GIoU) # loss_l = loss_l.mean() #Focal Loss # conf_p = conf_data.view(-1,self.num_classes) # loss_c = self.f_loss(conf_p,conf_t) #Focal Loss # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1,self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1)) # Hard Negative Mining loss_c[pos.view(-1,1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _,loss_idx = loss_c.sort(1, descending=True) _,idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1,keepdim=True) num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) targets_weighted = conf_t[(pos+neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted,reduction='sum')#CATEGORICAL CROSS ENTROPY # loss_c = self.f_loss(conf_p,targets_weighted) #Focal Loss # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l/=N loss_c/=N return loss_l,loss_c
dtype=torch.float32).cuda() else: target_scores = torch.zeros([BATCH_SIZE, agent_num, 5], dtype=torch.float32) for j in range(BATCH_SIZE): if done[j] is not True: target_scores[j] = target_model( states[j, 1:].unsqueeze(0)).squeeze(0).max(1)[0].view( agent_num, 1) #(batch, agent, 1) # print(reward) target_scores = target_scores * 0.999 + reward loss = F.smooth_l1_loss(pred_scores, target_scores) optim.zero_grad() loss.backward() # for param in model.parameters(): # param.grad.data.clamp_(-1, 1) optim.step() if i % TARGET_UPDATE == 0: target_model.load_state_dict(model.state_dict()) if (i + 1) % 100 == 0: print('Iter:%d | loss:%.4f | pred_scores:%.4f | target_scores:%.4f' % (i + 1, loss.item(), torch.mean( pred_scores[0]).item(), torch.mean(target_scores[0]).item())) if (i + 1) % 100 == 0:
def forward(self, src=None, tgt=None, src_lengths=None, tgt_lengths=None, bptt=False, batch=None): #import pdb; pdb.set_trace() self.asr.decoder.noise.sigma = 0.3 self.nmt_encoder.eval() self.tts_encoder.eval() if batch is not None: src, src_lengths = batch.src src_txt, src_txt_lengths = batch.src_txt tgt_txt, tgt_txt_lengths = batch.tgt_txt tgt, tgt_lengths = batch.tgt #import pdb; pdb.set_trace() result = {} with torch.no_grad(): self.asr.eval() asr_results = self.asr(src, src_txt, src_lengths, src_txt_lengths) result['asr_dec_out'] = asr_results['dec_out'] result['asr_attns'] = asr_results['attns'] _, memory_bank, memory_lengths = self.nmt.encode( asr_results['attns']['context'][:-1].detach(), src_txt_lengths - 2) #nmt_results = self.nmt(asr_results['attns']['context'][:-1].detach(), tgt_txt, src_txt_lengths, tgt_txt_lengths) result['trans'] = memory_bank with torch.no_grad(): _, nmt_memory, _ = self.nmt_encoder(src_txt[1:-1], src_txt_lengths - 2) result['trans_tgt'] = nmt_memory.detach() if F.smooth_l1_loss(result['trans'], result['trans_tgt']).item() > 0.005: memory_bank = nmt_memory #return result dec_out, attns = self.nmt.decoder(tgt_txt[:-1], memory_bank, memory_lengths=memory_lengths, tgt_lengths=tgt_txt_lengths) result['nmt_dec_out'] = dec_out result['nmt_attns'] = attns _, memory_bank, memory_lengths = self.tts.encoder( result['nmt_attns']['context'][:-1], tgt_txt_lengths) result['trans2'] = memory_bank with torch.no_grad(): _, tts_memory, _ = self.tts_encoder(tgt_txt[1:-1], tgt_txt_lengths - 2) result['trans2_tgt'] = tts_memory.detach() if F.smooth_l1_loss(result['trans2'], result['trans2_tgt']).item() > 0.005: memory_bank = nmt_memory #return result dec_out, attns = self.tts.decoder(tgt[:-1], memory_bank, memory_lengths=memory_lengths, tgt_lengths=tgt_lengths) result['tts_dec_out'] = dec_out result['tts_attns'] = attns return result
y = f(x) return x, y #%% 声明模型 fc = torch.nn.Linear(W_target.size(0), 1) for batch_idx in count(1): # 获取数据 batch_x, batch_y = get_batch() # 重置求导 fc.zero_grad() # 前向传播 output = F.smooth_l1_loss(fc(batch_x), batch_y) loss = output.item() # 后向传播 output.backward() # 应用导数 for param in fc.parameters(): param.data.add_(-0.1 * param.grad.data) # 停止条件 if loss < 1e-3: plt.cla() plt.scatter(batch_x.data.numpy()[:, 0], batch_y.data.numpy()[:, 0], label='real curve',
def train(self, model, data): tot_loss_lst = [] pi_loss_lst = [] entropy_lst = [] move_entropy_lst = [] v_loss_lst = [] # to calculate fixed advantages before update data_with_adv = [] for mini_batch in data: s, a, m, r, s_prime, done_mask, prob, need_move = mini_batch with torch.no_grad(): pi, pi_move, v, _ = model(s) pi_prime, pi_m_prime, v_prime, _ = model(s_prime) td_target = r + self.gamma * v_prime * done_mask delta = td_target - v # [horizon * batch_size * 1] delta = delta.detach().cpu().numpy() advantage_lst = [] advantage = np.array([0]) for delta_t in delta[::-1]: advantage = self.gamma * self.lmbda * advantage + delta_t advantage_lst.append(advantage) advantage_lst.reverse() advantage = torch.tensor(advantage_lst, dtype=torch.float, device=model.device) data_with_adv.append((s, a, m, r, s_prime, done_mask, prob, need_move, td_target, advantage)) for i in range(self.K_epoch): for mini_batch in data_with_adv: s, a, m, r, s_prime, done_mask, prob, need_move, td_target, advantage = mini_batch pi, pi_move, v, _ = model(s) pi_prime, pi_m_prime, v_prime, _ = model(s_prime) pi_a = pi.gather(2,a) pi_m = pi_move.gather(2,m) pi_am = pi_a*(1-need_move + need_move*pi_m) ratio = torch.exp(torch.log(pi_am) - torch.log(prob)) # a/b == exp(log(a)-log(b)) surr1 = ratio * advantage surr2 = torch.clamp(ratio, 1-self.eps_clip, 1+self.eps_clip) * advantage entropy = -torch.log(pi_am) move_entropy = -need_move*torch.log(pi_m) surr_loss = -torch.min(surr1, surr2) v_loss = F.smooth_l1_loss(v, td_target.detach()) entropy_loss = -1*self.entropy_coef*entropy loss = surr_loss + v_loss + entropy_loss.mean() loss = loss.mean() model.optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), self.grad_clip) model.optimizer.step() tot_loss_lst.append(loss.item()) pi_loss_lst.append(surr_loss.mean().item()) v_loss_lst.append(v_loss.item()) entropy_lst.append(entropy.mean().item()) n_need_move = torch.sum(need_move).item() if n_need_move == 0: move_entropy_lst.append(0) else: move_entropy_lst.append((torch.sum(move_entropy)/n_need_move).item()) return np.mean(tot_loss_lst), np.mean(pi_loss_lst), np.mean(v_loss_lst), np.mean(entropy_lst), np.mean(move_entropy_lst)
def part_forward(self, predictions, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) targets (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data, priors = predictions num = loc_data.size(0) priors = priors[:loc_data.size(1), :] num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data defaults = priors.data # sft match strategy , swordli if ac: sfd_match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) else: match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) if self.use_gpu: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets loc_targets = Variable(loc_t, requires_grad=False) conf_targets = Variable(conf_t, requires_grad=False) ############# Localization Loss part ############## pos = conf_targets > 0 # ignore background num_pos = pos.long().sum(1, keepdim = True) pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_targets[pos_idx].view(-1, 4) loc_loss = F.smooth_l1_loss(loc_p, loc_t, size_average = False) ############### Confiden Loss part ############### """ #focal loss implementation(1) pos_cls = conf_targets > -1 # exclude ignored anchors mask = pos_cls.unsqueeze(2).expand_as(conf_preds) conf_p = conf_preds[mask].view(-1, conf_preds.size(2)).clone() conf_t = conf_targets[pos_cls].view(-1).clone() p = F.softmax(conf_p, 1) p = p.clamp(1e-7, 1. - 1e-7) # to avoid loss going to inf c_mask = conf_p.data.new(conf_p.size(0), conf_p.size(1)).fill_(0) c_mask = Variable(c_mask) ids = conf_t.view(-1, 1) c_mask.scatter_(1, ids, 1.) p_t = (p*c_mask).sum(1).view(-1, 1) p_t_log = p_t.log() # This is focal loss presented in ther paper eq(5) conf_loss = -self.alpha * ((1 - p_t)**self.gamma * p_t_log) conf_loss = conf_loss.sum() """ # focal loss implementation(2) pos_cls = conf_targets >-1 mask = pos_cls.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[mask].view(-1, conf_data.size(2)).clone() p_t_log = -F.cross_entropy(conf_p, conf_targets[pos_cls], size_average = False) p_t = torch.exp(p_t_log) # This is focal loss presented in the paper eq(5) conf_loss = -self.alpha * ((1 - p_t)**self.gamma * p_t_log) N = max(1 , num_pos.data.sum()) # to avoid divide by 0. It is caused by data augmentation when crop the images. The cropping can distort the boxes conf_loss /= N # exclude number of background? loc_loss /= N return conf_loss, loc_loss
def regression_loss(logits, labels, seq_len, loss_type, normalize_indices, var_lambda): """Loss function based on regressing to the correct indices. In the paper, this is called Cycle-back Regression. There are 3 variants of this loss: i) regression_mse: MSE of the predicted indices and ground truth indices. ii) regression_mse_var: MSE of the predicted indices that takes into account the variance of the similarities. This is important when the rate at which sequences go through different phases changes a lot. The variance scaling allows dynamic weighting of the MSE loss based on the similarities. iii) regression_huber: Huber loss between the predicted indices and ground truth indices. Args: logits: Tensor, Pre-softmax similarity scores after cycling back to the starting sequence. labels: Tensor, One hot labels containing the ground truth. The index where the cycle started is 1. num_steps: Integer, Number of steps in the sequence embeddings. steps: Tensor, step indices/frame indices of the embeddings of the shape [N, T] where N is the batch size, T is the number of the timesteps. seq_lens: Tensor, Lengths of the sequences from which the sampling was done. This can provide additional temporal information to the alignment loss. loss_type: String, This specifies the kind of regression loss function. Currently supported loss functions: regression_mse, regression_mse_var, regression_huber. normalize_indices: Boolean, If True, normalizes indices by sequence lengths. Useful for ensuring numerical instabilities don't arise as sequence indices can be large numbers. variance_lambda: Float, Weight of the variance of the similarity predictions while cycling back. If this is high then the low variance similarities are preferred by the loss while making this term low results in high variance of the similarities (more uniform/random matching). huber_delta: float, Huber delta described in tf.keras.losses.huber_loss. Returns: loss: Tensor, A scalar loss calculated using a variant of regression. """ # logits (bs, seq_len, seq_len), labels (bs, seq_len) # steps of shape (bs, seq_len, seq_len) are indexes steps = torch.arange(seq_len)[None, None, :].expand_as(logits) steps = steps.float() beta = F.softmax(logits, dim=2) true_time = labels # pred_time of shape (bs, seq_len) pred_time = torch.sum(steps * beta, 2) if loss_type in ['regression_mse', 'regression_mse_var']: if 'var' in loss_type: tiled_pred_time = pred_time.unsqueeze(1).expand(-1, seq_len, -1) # Variance aware regression. # pred_time_variance of shape (batch_size, seq_len) pred_time_variance = torch.sum( torch.pow(steps - tiled_pred_time, 2) * beta, 2) # Using log of variance as it is numerically stabler. pred_time_log_var = torch.log(pred_time_variance) squared_error = torch.pow(true_time - pred_time, 2) return torch.mean( torch.exp(-pred_time_log_var) * squared_error + var_lambda * pred_time_log_var) else: return F.mse_loss(pred_time, true_time) elif loss_type == 'regression_huber': return F.smooth_l1_loss(pred_time, true_time) else: raise ValueError( 'Unsupported regression loss %s. Supported losses are: ' 'regression_mse, regresstion_mse_var and regression_huber.' % loss_type)
def optimize(): ### Perform experience replay and train the network. nonlocal last_sync if len(memory) < BATCH_SIZE: return transitions = memory.sample(BATCH_SIZE) # Use the replay buffer to sample a batch of transitions batch = Transition(*zip(*transitions)) # batch.state is a tuple of states # batch.action is a tuple os actions # batch.reward is a tuple of rewards state_batch = Variable(torch.cat(batch.state)).float() action_batch = Variable(torch.cat(batch.action)).long() reward_batch = Variable(torch.cat(batch.reward)).float() non_final_mask = torch.ByteTensor( tuple(map(lambda s: s is not None, batch.next_state))) non_final_next_states = Variable(torch.cat( [s for s in batch.next_state if s is not None]), volatile=True).float() # print(type(non_final_next_states.data)) # Compute current Q value, takes only state and output value for every state-action pair # We choose Q based on action taken. # save_image(state_batch.data,"before_conv.png") state_action_values = dqn(state_batch).gather(1, action_batch) # Compute next Q value based on which action gives max Q values next_state_values = Variable(torch.zeros(BATCH_SIZE)) next_state_values[non_final_mask] = dqn(non_final_next_states).max( 1)[0] next_state_values.volatile = False # Compute the target of the current Q values expected_state_action_values = (next_state_values * GAMMA) + reward_batch # same as SmoothL1Loss # Creates a criterion that uses a squared term if # the absolute element-wise error falls below 1 and an L1 term otherwise. loss = F.smooth_l1_loss(state_action_values, expected_state_action_values) # Clears the gradients of all optimized Variable optimizer.zero_grad() # Use autograd to compute the backward pass. This call will compute the # gradient of loss with respect to all Variables with requires_grad=True. # After this call w1.grad and w2.grad will be Variables holding the gradient # of the loss with respect to w1 and w2 respectively. loss.backward() #Clamps the gradients to (-1,1) in-place for param in dqn.parameters(): param.grad.data.clamp_(-1, 1) optimizer.step()
def forward(self, x, flip, loc_preds, loc_targets, cls_preds, cls_targets): '''Compute loss between (loc_preds, loc_targets) and (cls_preds, cls_targets). Args: loc_preds: (tensor) predicted locations, sized [batch_size, #anchors, 4]. loc_targets: (tensor) encoded target locations, sized [batch_size, #anchors, 4]. cls_preds: (tensor) predicted class confidences, sized [batch_size, #anchors, #classes]. cls_targets: (tensor) encoded target labels, sized [batch_size, #anchors]. loss: (tensor) loss = SmoothL1Loss(loc_preds, loc_targets) + FocalLoss(cls_preds, cls_targets). ''' batch_size, num_boxes = cls_targets.size() pos = cls_targets > 0 # [N,#anchors] num_pos = pos.data.long().sum() # ============================================================== # weight # ============================================================== ''' #####flip info n=flip.size()[0] iw=[] i=0 while i<n: if flip[i]==1: y = x[i].data.cpu().numpy() y = np.flip(y,2)#left to right y = torch.from_numpy(y.copy()) y = y.unsqueeze(0) y = Variable(y) z = self.features(y.cuda()) r = z.size(3) z = F.avg_pool2d(z, r) z = z.view(z.size(0), -1) z = F.relu(z) iw.append(torch.mean(z,1) ) else: #train min 0.311, max 0.465 img = x[i].unsqueeze(0) z = self.features(img.cuda()) r = z.size(3) z = F.avg_pool2d(z, r) z = z.view(z.size(0), -1) z = F.relu(z) iw.append(torch.mean(z,1) ) i+=1 iw=torch.cat(iw, dim=0) ''' #########no flip info z = self.features(x.cuda()) r = z.size(3) z = F.avg_pool2d(z, r) z = z.view(z.size(0), -1) z = F.relu(z) iw = torch.mean(z, 1) #iw=(iw-0.311)*(1.0-0.5)/(0.465-0.311)+0.5#median 0.4497 #iw=(iw-0.017)*(1.0-0.5)/(0.042-0.017)+0.5 iw = (iw - 0.129) * (1.0 - 0.5) / (0.180 - 0.129) + 0.5 #block 5 ################################################################ # loc_loss = SmoothL1Loss(pos_loc_preds, pos_loc_targets) ################################################################ mask = pos.unsqueeze(2).expand_as(loc_preds) # [N,#anchors,4] masked_loc_preds = loc_preds[mask].view(-1, 4) # [#pos,4] masked_loc_targets = loc_targets[mask].view(-1, 4) # [#pos,4] loc_loss = F.smooth_l1_loss(masked_loc_preds, masked_loc_targets, size_average=False) ################################################################ # cls_loss = FocalLoss(loc_preds, loc_targets) ################################################################ pos_neg = cls_targets > -1 # exclude ignored anchors mask = pos_neg.unsqueeze(2).expand_as(cls_preds) masked_cls_preds = cls_preds[mask].view(-1, self.num_classes) cls_loss = self.focal_loss_alt(masked_cls_preds, cls_targets[pos_neg], iw) #cls_targets[pos_neg].unsqueeze(1) #print('loc_loss: %.3f|cls_loss: %.3f|iw: %.2f' % (loc_loss.data[0]/num_pos, cls_loss.data[0]/num_pos, iw.data[0]), end=' | ') loss = (loc_loss + cls_loss) / num_pos return loss, loc_loss.data[0] / num_pos, cls_loss.data[ 0] / num_pos, iw.data[0]
def __call__(self, proposals_with_gt, densepose_outputs, densepose_confidences): losses = {} # densepose outputs are computed for all images and all bounding boxes; # i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively, # the outputs will have size(0) == 3+1+2+1 == 7 s, index_uv, u, v = densepose_outputs sigma_1, sigma_2, kappa_u, kappa_v = densepose_confidences conf_type = self.confidence_model_cfg.uv_confidence.type assert u.size(2) == v.size(2) assert u.size(3) == v.size(3) assert u.size(2) == index_uv.size(2) assert u.size(3) == index_uv.size(3) with torch.no_grad(): ( index_uv_img, i_with_dp, bbox_xywh_est, bbox_xywh_gt, index_gt_all, x_norm, y_norm, u_gt_all, v_gt_all, s_gt, index_bbox, ) = _extract_single_tensors_from_matches( # noqa proposals_with_gt ) n_batch = len(i_with_dp) # NOTE: we need to keep the same computation graph on all the GPUs to # perform reduction properly. Hence even if we have no data on one # of the GPUs, we still need to generate the computation graph. # Add fake (zero) loss in the form Tensor.sum() * 0 if not n_batch: losses["loss_densepose_I"] = index_uv.sum() * 0 losses["loss_densepose_S"] = s.sum() * 0 if self.confidence_model_cfg.uv_confidence.enabled: losses["loss_densepose_UV"] = (u.sum() + v.sum()) * 0 if conf_type == DensePoseUVConfidenceType.IID_ISO: losses["loss_densepose_UV"] += sigma_2.sum() * 0 elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO: losses["loss_densepose_UV"] += ( sigma_2.sum() + kappa_u.sum() + kappa_v.sum() ) * 0 else: losses["loss_densepose_U"] = u.sum() * 0 losses["loss_densepose_V"] = v.sum() * 0 return losses zh = u.size(2) zw = u.size(3) ( j_valid, y_lo, y_hi, x_lo, x_hi, w_ylo_xlo, w_ylo_xhi, w_yhi_xlo, w_yhi_xhi, ) = _grid_sampling_utilities( # noqa zh, zw, bbox_xywh_est, bbox_xywh_gt, index_gt_all, x_norm, y_norm, index_bbox ) j_valid_fg = j_valid * (index_gt_all > 0) u_gt = u_gt_all[j_valid_fg] u_est_all = _extract_at_points_packed( u[i_with_dp], index_bbox, index_gt_all, y_lo, y_hi, x_lo, x_hi, w_ylo_xlo, w_ylo_xhi, w_yhi_xlo, w_yhi_xhi, ) u_est = u_est_all[j_valid_fg] v_gt = v_gt_all[j_valid_fg] v_est_all = _extract_at_points_packed( v[i_with_dp], index_bbox, index_gt_all, y_lo, y_hi, x_lo, x_hi, w_ylo_xlo, w_ylo_xhi, w_yhi_xlo, w_yhi_xhi, ) v_est = v_est_all[j_valid_fg] index_uv_gt = index_gt_all[j_valid] index_uv_est_all = _extract_at_points_packed( index_uv[i_with_dp], index_bbox, slice(None), y_lo, y_hi, x_lo, x_hi, w_ylo_xlo[:, None], w_ylo_xhi[:, None], w_yhi_xlo[:, None], w_yhi_xhi[:, None], ) index_uv_est = index_uv_est_all[j_valid, :] if self.confidence_model_cfg.uv_confidence.enabled: sigma_2_est_all = _extract_at_points_packed( sigma_2[i_with_dp], index_bbox, index_gt_all, y_lo, y_hi, x_lo, x_hi, w_ylo_xlo, w_ylo_xhi, w_yhi_xlo, w_yhi_xhi, ) sigma_2_est = sigma_2_est_all[j_valid_fg] if conf_type in [DensePoseUVConfidenceType.INDEP_ANISO]: kappa_u_est_all = _extract_at_points_packed( kappa_u[i_with_dp], index_bbox, index_gt_all, y_lo, y_hi, x_lo, x_hi, w_ylo_xlo, w_ylo_xhi, w_yhi_xlo, w_yhi_xhi, ) kappa_u_est = kappa_u_est_all[j_valid_fg] kappa_v_est_all = _extract_at_points_packed( kappa_v[i_with_dp], index_bbox, index_gt_all, y_lo, y_hi, x_lo, x_hi, w_ylo_xlo, w_ylo_xhi, w_yhi_xlo, w_yhi_xhi, ) kappa_v_est = kappa_v_est_all[j_valid_fg] # Resample everything to the estimated data size, no need to resample # S_est then: s_est = s[i_with_dp] with torch.no_grad(): s_gt = _resample_data( s_gt.unsqueeze(1), bbox_xywh_gt, bbox_xywh_est, self.heatmap_size, self.heatmap_size, mode="nearest", padding_mode="zeros", ).squeeze(1) # add point-based losses: if self.confidence_model_cfg.uv_confidence.enabled: if conf_type == DensePoseUVConfidenceType.IID_ISO: uv_loss = ( self.uv_loss_with_confidences(u_est, v_est, sigma_2_est, u_gt, v_gt) * self.w_points ) losses["loss_densepose_UV"] = uv_loss elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO: uv_loss = ( self.uv_loss_with_confidences( u_est, v_est, sigma_2_est, kappa_u_est, kappa_v_est, u_gt, v_gt ) * self.w_points ) losses["loss_densepose_UV"] = uv_loss else: raise ValueError(f"Unknown confidence model type: {conf_type}") else: u_loss = F.smooth_l1_loss(u_est, u_gt, reduction="sum") * self.w_points losses["loss_densepose_U"] = u_loss v_loss = F.smooth_l1_loss(v_est, v_gt, reduction="sum") * self.w_points losses["loss_densepose_V"] = v_loss index_uv_loss = F.cross_entropy(index_uv_est, index_uv_gt.long()) * self.w_part losses["loss_densepose_I"] = index_uv_loss if self.n_segm_chan == 2: s_gt = s_gt > 0 s_loss = F.cross_entropy(s_est, s_gt.long()) * self.w_segm losses["loss_densepose_S"] = s_loss return losses
y = x.mm(self.W) + self.b[0] # must use b[0] as a number return Variable(x), Variable(y) # Learning target poly = Polynom(degree=4) # The model nnet = torch.nn.Linear(poly.degree, 1) # Train it print('------- TRAINING ---------') for batch_idx in count(1): batch_x, batch_y = poly.get_batch(64) nnet.zero_grad() output = F.smooth_l1_loss(nnet(batch_x), batch_y) output.backward() batch_loss = output.data[0] # Upgrade model for param in nnet.parameters(): param.data.add_(-0.003 * param.grad.data) if 0 == batch_idx % 100: print('batch', batch_idx, 'loss', batch_loss) if batch_loss < 1e-3: break print('------- RESULT ---------') print('==> Learned function: {}'.format( Polynom.show(nnet.weight.data, nnet.bias.data)))
def learn(env,num_episodes,experiment_dir,replay_memory_size=500000,replay_memory_init_size=50000,update_target_estimator_every=10000,discount_factor=0.99,epsilon_start=1.0,epsilon_end=0.1,epsilon_decay_steps=500000,batch_size=32, record_video_every=50): replay_memory = [] # Keeps track of useful statistics stats = plotting.EpisodeStats( episode_lengths=np.zeros(num_episodes), episode_rewards=np.zeros(num_episodes)) #monitor_path = os.path.join(experiment_dir, "monitor") #if not os.path.exists(monitor_path): #os.makedirs(monitor_path) num_steps = 0 state = env.reset() state = process(state) state = torch.cat(tuple([state] * 4), dim=1) def get_action(): sample = random.random() epsilon = epsilon_end + (epsilon_start - epsilon_end) * math.exp(-1. * num_steps / epsilon_decay_steps) if sample > epsilon: with torch.no_grad(): return (policy_net(((torch.from_numpy(state)).float()/255).to(device)).max(1)[1].data[0]) else: return random.randrange(4) for i in range(replay_memory_init_size): action = get_action() next_state, reward, done, _ = env.step(action) next_state = process(next_state) next_state = np.concatenate((state[:,1:,:,:],next_state), axis=1) replay_memory.append(Transition(state, [[int(action)]],[reward], next_state, [not done])) if done: state = env.reset() state = process(state) state = np.concatenate([state] * 4, axis = 1) else: state = next_state #env = Monitor(env, directory=monitor_path, video_callable=lambda count: count % record_video_every == 0, resume=True) for i_episode in range(num_episodes): state = env.reset() state = process(state) state = np.concatenate([state] * 4, axis = 1) loss = None for t in count(): if num_steps % update_target_estimator_every == 0: target_net.load_state_dict(policy_net.state_dict()) action = get_action() next_state, reward, done, _ = env.step(action) num_steps+=1 next_state = process(next_state) next_state = np.concatenate((state[:,1:,:,:],next_state), axis=1) if len(replay_memory) == replay_memory_size: replay_memory.pop(0) replay_memory.append(Transition(state, [[int(action)]],[reward], next_state, [not done])) # Update statistics stats.episode_rewards[i_episode] += reward stats.episode_lengths[i_episode] +=1 if(num_steps%4==0): transitions = random.sample(replay_memory, batch_size) for i,t in enumerate(transitions): transitions[i] = Transition(torch.tensor(t.state, device= device, dtype = torch.float)/255,torch.tensor(t.action, device= device, dtype = torch.long), torch.tensor(t.reward, device= device, dtype = torch.float), torch.tensor(t.next_state, device= device, dtype = torch.float)/255 , torch.tensor(t.done, device= device, dtype = torch.float)) batch = Transition(*zip(*transitions)) state_batch = (torch.cat(batch.state)) action_batch = (torch.cat(batch.action)) reward_batch = torch.cat(batch.reward) next_state_batch = torch.cat(batch.next_state) done_batch = torch.cat(batch.done) state_action_values = policy_net(state_batch).gather(1, action_batch) q_next_batch = target_net(next_state_batch).max(1)[0].detach() target_batch = reward_batch + discount_factor*done_batch*q_next_batch loss = F.smooth_l1_loss(state_action_values, target_batch.unsqueeze(1)) optimizer.zero_grad() loss.backward() for param in policy_net.parameters(): param.grad.data.clamp_(-1, 1) optimizer.step() state = next_state if done: break if i_episode % 100 == 0: gc.collect() if(i_episode%1000): torch.save({ 'episode' : i_episode//1000+1, 'state_dict' : policy_net.state_dict(), 'optimizer' : optimizer.state_dict()}, 'dqn{}.model'.format(i_episode//1000+1)) yield num_steps, plotting.EpisodeStats( episode_lengths=stats.episode_lengths[:i_episode+1], episode_rewards=stats.episode_rewards[:i_episode+1]) return stats
def forward(self, predictions, priors, targets, using_gpu): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data, landm_data = predictions priors = priors num = loc_data.size(0) # num = batch_size num_priors = (priors.size(0)) # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) landm_t = torch.Tensor(num, num_priors, 10) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :4].data # [num_objs, 4] labels = targets[idx][:, -1].data # [num_objs] landms = targets[idx][:, 4:14].data # [num_objs, 10] defaults = priors.data # 关键函数, 实现候选框与真实框之间的匹配 match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx) zeros = torch.tensor(0) if using_gpu: loc_t = loc_t.cuda() conf_t = conf_t.cuda() landm_t = landm_t.cuda() zeros = zeros.cuda() # landm Loss (Smooth L1) # Shape: [batch,num_priors,10] pos1 = conf_t > zeros # 筛选出 >0 的box下标(大部分都是=0的) # 求和, 取得满足条件的box的数量 num_pos_landm = pos1.long().sum(1, keepdim=True) N1 = max(num_pos_landm.data.sum().float(), 1) pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data) landm_p = landm_data[pos_idx1].view(-1, 10) landm_t = landm_t[pos_idx1].view(-1, 10) loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum') pos = conf_t != zeros conf_t[pos] = 1 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) # conf_t: [batch, num_priors] # loss_c: [batch*num_priors, 1], 计算每个priorbox预测后的损失 loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining # 难负样本挖掘, 按照loss进行排序, 取loss最大的负样本参与更新 # 将所有的pos下标的box的loss置为0(pos指示的是正样本的下标) loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now # 将 loss_c 的shape 从 [batch*num_priors, 1] 转换成 [batch, num_priors] loss_c = loss_c.view(num, -1) # 进行降序排序, 并获取到排序的下标 _, loss_idx = loss_c.sort(1, descending=True) # 将下标进行升序排序, 并获取到下标的下标 _, idx_rank = loss_idx.sort(1) # num_pos: [batch, 1], 统计每个样本中的obj个数 num_pos = pos.long().sum(1, keepdim=True) # 根据obj的个数, 确定负样本的个数(正样本的3倍) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) # 获取到负样本的下标 neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples # 计算包括正样本和负样本的置信度损失 pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) # 按照pos_idx和neg_idx指示的下标筛选参与计算损失的预测数据 conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N # 将损失函数归一化后返回 N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N loss_landm /= N1 return loss_l, loss_c, loss_landm
def learn_from_experience(self, data, entropy_coeff, normalize_returns=True, normalize_advantages=True, clip_grad=True): # Sanity Check assert len(data['tstep']) == len(data['obs']) == len(data['act']) == len(data['logp']) == len(data['val']) \ == len(data['rew']) == len(data['entropy']) == len(data['disc_rtg_rews']) == len(data['disc_rtg_rews']) assert len(data['per_episode_rews']) == len(data['per_episode_length']) # Don't need to backprop through returns returns = torch.tensor(data['disc_rtg_rews']) if normalize_returns: # returns = (returns - returns.mean()) / returns.std() returns = (returns) / returns.std() # Calculate advantages separately (to apply normalization) advantages = [] for return_, value in zip(returns, data['val']): advantages.append(return_ - value) #advantages.append(return_) advantages = torch.tensor(advantages) if normalize_advantages: advantages = (advantages - advantages.mean()) / advantages.std() assert len(advantages) == len(data['tstep']) # Zero out gradients before calculating loss model.optimizer.zero_grad() # Calculate actor and critic loss actor_loss = [] critic_loss = [] for logprob, advantage, return_, value in zip(data['logp'], advantages, returns, data['val']): actor_loss.append(-(logprob * advantage)) # Why L1 loss? From pytorch doc: # It is less sensitive to outliers than the MSELoss and in some cases prevents exploding gradients critic_loss.append(F.smooth_l1_loss(return_, torch.squeeze(value))) # critic_loss.append(advantage.pow(2)) # Entropy Loss (https://medium.com/@awjuliani/maximum-entropy-policies-in-reinforcement-learning-everyday-life-f5a1cc18d32d) # https://jaromiru.com/2017/03/26/lets-make-an-a3c-implementation/ actor_loss = torch.stack(actor_loss).mean() critic_loss = 0.5 * torch.stack(critic_loss).mean() entropy_avg = torch.stack(data['entropy']).mean() entropy_loss = -(entropy_coeff * entropy_avg) total_loss = actor_loss + critic_loss + entropy_loss # Perform backprop step total_loss.backward() if clip_grad: torch.nn.utils.clip_grad_norm_(self.parameters(), 0.5) model.optimizer.step() # Compute info for logging avg_ep_len = torch.tensor(data['per_episode_length'], requires_grad=False, dtype=torch.float).mean().item() avg_ep_raw_rew = torch.tensor(data['per_episode_rews'], requires_grad=False, dtype=torch.float).mean().item() epoch_timesteps = data['tstep'][-1] num_episodes = len(data['per_episode_length']) # Return logging info return dict(actor_loss=actor_loss, critic_loss=critic_loss, entropy_loss=entropy_loss, entropy_avg=entropy_avg, total_loss=total_loss, avg_ep_len=avg_ep_len, avg_ep_raw_rew=avg_ep_raw_rew, epoch_timesteps=epoch_timesteps, num_episodes=num_episodes, advantages=advantages, pred_values=data['val'], disc_rews=returns)
def train(epoch): epoch_time = time.time() epoch_loss = 0 epoch_error0 = 0 epoch_error1 = 0 epoch_error2 = 0 valid_iteration = 0 if opt.model == 'MyGANet3' or opt.model == 'MyGANet4' or opt.model == 'MyGANet4_8' or opt.model == 'MyGANet5'\ or opt.model == 'MyGANet4_8_rf'or opt.model == 'MyGANet9_t2'or opt.model == 'MyGANet9_t3': pass else: model.train() for iteration, batch in enumerate(training_data_loader): input1, input2, target = Variable(batch[0], requires_grad=True), Variable(batch[1], requires_grad=True), Variable( batch[2], requires_grad=False) if cuda: input1 = input1.cuda() input2 = input2.cuda() target = target.cuda() target = torch.squeeze(target, 1) mask = target < opt.max_disp mask.detach_() valid = target[mask].size()[0] start_full_time = time.time() if valid > 0: optimizer.zero_grad() # T1 train if opt.model == 'GANet11' or opt.model == 'MyGANet' or opt.model == 'MyGANet2' or opt.model == 'MyGANet4_8_t1' \ or opt.model == 'MyGANet5_t1' or opt.model == 'MyGANet4_8_rf_t1'or opt.model == 'MyGANet9_t1': disp1, disp2 = model(input1, input2) disp0 = (disp1 + disp2) / 2. if opt.kitti or opt.kitti2015: loss = 0.4 * F.smooth_l1_loss(disp1[mask], target[mask], reduction='mean') + 1.2 * criterion( disp2[mask], target[mask]) else: loss = 0.4 * F.smooth_l1_loss(disp1[mask], target[mask], reduction='mean') + 1.2 * F.smooth_l1_loss( disp2[mask], target[mask], reduction='mean') # T2 train elif opt.model == 'MyGANet5'or opt.model == 'MyGANet4_8_rf'or opt.model == 'MyGANet9_t2': disp0, disp1, disp2 = model(input1, input2) loss0 = F.smooth_l1_loss(disp0[mask], target[mask], reduction='mean') if opt.kitti or opt.kitti2015: loss = 0.4 * (0.9 - (loss0 - F.smooth_l1_loss(disp1[mask], target[mask], reduction='mean'))) + \ 1.2 * (0.9 - (loss0 - criterion(disp2[mask],target[mask]))) else: loss = 0.4 * (0.9 - (loss0 - F.smooth_l1_loss(disp1[mask], target[mask], reduction='mean'))) + \ 1.2 * (0.9 - (loss0 - F.smooth_l1_loss(disp2[mask], target[mask], reduction='mean'))) # T3 train elif opt.model == 'MyGANet3' or opt.model == 'MyGANet4' or opt.model == 'MyGANet4_8'or opt.model == 'MyGANet9_t3': disp0, disp1, disp2 = model(input1, input2) loss1 = F.smooth_l1_loss(disp0[mask], target[mask], reduction='mean') if opt.kitti or opt.kitti2015: loss = 0.9-(loss1-criterion(disp2[mask], target[mask])) else: loss = 0.9-(loss1-F.smooth_l1_loss(disp2[mask], target[mask], reduction='mean')) elif opt.model == 'MyGANet9': disp00, disp0, disp11, disp1, disp2 = model(input1, input2) if opt.kitti or opt.kitti2015: loss = 0.2 * F.smooth_l1_loss(disp00[mask], target[mask], reduction='mean') + \ 0.4 * F.smooth_l1_loss(disp0[mask], target[mask], reduction='mean') + \ 0.6 * F.smooth_l1_loss(disp11[mask], target[mask], reduction='mean') + \ 1 * F.smooth_l1_loss(disp1[mask], target[mask], reduction='mean') + \ 1 * criterion(disp2[mask], target[mask]) else: loss = 0.2 * F.smooth_l1_loss(disp00[mask], target[mask], reduction='mean') + \ 0.4 * F.smooth_l1_loss(disp0[mask], target[mask], reduction='mean') + \ 0.6 * F.smooth_l1_loss(disp11[mask], target[mask], reduction='mean') + \ 1 * F.smooth_l1_loss(disp1[mask], target[mask], reduction='mean') + \ 1 * F.smooth_l1_loss(disp2[mask], target[mask], reduction='mean') elif opt.model == 'GANet_deep' or opt.model == 'CasGANet10': disp0, disp1, disp2 = model(input1, input2) if opt.kitti or opt.kitti2015: loss = 0.2 * F.smooth_l1_loss(disp0[mask], target[mask], reduction='mean') + 0.6 * F.smooth_l1_loss( disp1[mask], target[mask], reduction='mean') + criterion(disp2[mask], target[mask]) else: loss = 0.2 * F.smooth_l1_loss(disp0[mask], target[mask], reduction='mean') + 0.6 * F.smooth_l1_loss( disp1[mask], target[mask], reduction='mean') + F.smooth_l1_loss(disp2[mask], target[mask], reduction='mean') else: raise Exception("No suitable model found ...") loss.backward() optimizer.step() error0 = torch.mean(torch.abs(disp0[mask] - target[mask])) error1 = torch.mean(torch.abs(disp1[mask] - target[mask])) error2 = torch.mean(torch.abs(disp2[mask] - target[mask])) epoch_loss += loss.item() valid_iteration += 1 epoch_error0 += error0.item() epoch_error1 += error1.item() epoch_error2 += error2.item() print("===> Epoch[{}]({}/{}): Loss: {:.4f}, Error: ({:.4f} {:.4f} {:.4f}), Time:{:.2f}s".format(epoch, iteration, len( training_data_loader), loss.item(), error0.item(), error1.item(), error2.item(), time.time() - start_full_time)) sys.stdout.flush() print("===> Epoch {} Complete: Avg. Loss: {:.4f}, Avg. Error: ({:.4f} {:.4f} {:.4f}), Time:{:.2f}min".format(epoch, epoch_loss / valid_iteration, epoch_error0 / valid_iteration, epoch_error1 / valid_iteration, epoch_error2 / valid_iteration, (time.time() - epoch_time) / 60)) return epoch_loss / valid_iteration
def finetunning(self, spt_ms, spt_rgb, qry_ms, qry_rgb): """ :param spt_ms: [task_num, setsz, 16, h, w] :param spt_rgb: [task_num, setsz, 16, h, w] :param qry_ms: [task_num, setsz, 16, h, w] :param qry_rgb: [task_num, setsz, 16, h, w] :return: """ assert len(spt_ms.shape) == 4 querysz = qry_ms.size(0) corrects = [0 for _ in range(self.update_step_test + 1)] # in order to not ruin the state of running_mean/variance and bn_weight/bias # we finetunning on the copied model instead of self.net net = deepcopy(self.net) # 1. run the i-th task and compute loss for k=0 logits = net(spt_ms) loss = F.cross_entropy(logits, spt_rgb) grad = torch.autograd.grad(loss, net.parameters()) fast_weights = list( map(lambda p: p[1] - self.update_lr * p[0], zip(grad, net.parameters()))) # this is the loss and accuracy before first update with torch.no_grad(): # [setsz, nway] pred_q = net(qry_ms, net.parameters(), bn_training=True) # [setsz] # pred_q = F.softmax(logits_q, dim=1).argmax(dim=1) # scalar correct = torch.eq(pred_q, qry_rgb).sum().item() corrects[0] = corrects[0] + correct # this is the loss and accuracy after the first update with torch.no_grad(): # [setsz, nway] pred_q = net(qry_ms, fast_weights, bn_training=True) # [setsz] # pred_q = F.softmax(logits_q, dim=1).argmax(dim=1) # scalar correct = torch.eq(pred_q, qry_rgb).sum().item() corrects[1] = corrects[1] + correct for k in range(1, self.update_step_test): # 1. run the i-th task and compute loss for k=1~K-1 logits = net(spt_ms, fast_weights, bn_training=True) loss = F.cross_entropy(logits, spt_rgb) # 2. compute grad on theta_pi grad = torch.autograd.grad(loss, fast_weights) # 3. theta_pi = theta_pi - train_lr * grad fast_weights = list( map(lambda p: p[1] - self.update_lr * p[0], zip(grad, fast_weights))) pred_q = net(qry_ms, fast_weights, bn_training=True) # loss_q will be overwritten and just keep the loss_q on last update step. loss_q = F.smooth_l1_loss(pred_q, qry_rgb) with torch.no_grad(): # pred_q = F.softmax(logits_q, dim=1).argmax(dim=1) correct = torch.eq(pred_q, qry_rgb).sum().item() # convert to numpy corrects[k + 1] = corrects[k + 1] + correct del net accs = np.array(corrects) / querysz return accs
def regression_loss(prediction_normalized, meta, alpha=1., **kwargs): pred = prediction_normalized[0] kp = meta['keypts_normalized'].to(pred.device) B, nA, _ = pred.shape return F.smooth_l1_loss(pred * alpha, kp * alpha)
def finish_episode(*, model, optimizer, history, gamma: float = 0.1, return_means: bool = False) -> Optional[tuple]: """Calculate the losses and backprop them through the models NN.""" # initialize a few variables # eps needs to be tensor now{} eps = Tensor([np.finfo(np.float32).eps]) # machine epsilon losses = deque() returns_to_average = deque() species_actions = deque() # init actions_per_agent = deque() # init for (_, agent_rewards, saved_actions) in history: R = 0 # The discounted reward rewards = deque() policy_losses = deque() state_value_losses = deque() # reverse rewards (its a deque!) agent_rewards.reverse() # iterate over all rewards that we got during the play for r in agent_rewards: # backwards to account for the more recent actions returns_to_average.append(r) # for later averaging R = r + gamma * R # discount! rewards.appendleft(R) # deque power baby! rewards = torch.Tensor(rewards).type(dtype) # use gpu if available rewards = (rewards - rewards.mean()) / (rewards.std() + eps) # I think the eps should take care of my problem of NaNs. Somehow it # doesn't work, but the effect is the same as if I just switch the NaNs # to 0. # converting NaNs to 0. rewards[rewards != rewards] = 0 # should convert all NaN to 0 actions_per_agent.clear() # clear the deque # now interate over all probability-state value-reward pairs for (log_prob, state_value, action), r in zip(saved_actions, rewards): actions_per_agent.append(action) # save action for later reward = r - state_value.item() # get the value, needs `Variable` policy_losses.append(-log_prob * reward) # calculate the (smooth) L^1 loss = least absolute deviation state_value_losses.append( F.smooth_l1_loss(state_value, Variable(torch.Tensor([r]).type(dtype)))) species_actions.append(actions_per_agent.copy()) # empty the gradient of the optimizer optimizer.zero_grad() # calculate the loss losses.append( torch.stack(list(policy_losses)).sum() + torch.stack(list(state_value_losses)).sum()) # average all losses loss = torch.stack(list(losses)).mean() # backpropagate the loss loss.backward() optimizer.step() # free memory losses.clear() # its a deque # if output is wanted if return_means: ret_avg = np.mean(returns_to_average) returns_to_average.clear() return loss, ret_avg, species_actions.copy()
def forward(self, odm_data, priors, loc_targets, cls_targets, arm_data=None, filter_object=False): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). arm_data (tuple): arm branch containg arm_loc and arm_conf filter_object: whether filter out the prediction according to the arm conf score """ loc_data, conf_data = odm_data if arm_data: arm_loc, arm_conf = arm_data num = loc_data.size(0) num_priors = (priors.size(0)) # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.Tensor(num, num_priors) for idx in range(num): truths = loc_targets[idx] labels = cls_targets[idx] + 1 # background as 0 truths = truths.to(self.opt.device) labels = labels.to(self.opt.device) # for object detection if self.num_classes == 2: labels = labels > 0 if arm_data: refine_match(self.threshold, truths, priors, self.variance, labels, loc_t, conf_t, idx, arm_loc[idx]) else: match(self.threshold, truths, priors, self.variance, labels, loc_t, conf_t, idx) # wrap targets loc_t = loc_t conf_t = conf_t if arm_data and filter_object: arm_conf_data = arm_conf[:, :, 1] pos = conf_t > 0 object_score_index = arm_conf_data <= self.object_score pos[object_score_index] = 0 else: pos = conf_t > 0 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loc_t = loc_t.detach() loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1).long()) # Hard Negative Mining loss_c[pos.view(-1).long()] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1).detach() _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted.long(), size_average=False) # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = num_pos.sum().item() loss_l /= N loss_c /= N return loss_l, loss_c
optimizer = torch.optim.SGD(fc.parameters(), lr=0.1, momentum=0.9) y_old = [] x_old = [] for batch_idx in count(1): # Get data batch_x, batch_y = get_batch() # Reset gradients fc.zero_grad() # Forward pass y_est = fc(batch_x) y_old.append(y_est.squeeze().data.numpy()) x_old.append(batch_x[:,0].data.numpy()) output = F.smooth_l1_loss(y_est, batch_y) loss = output.data[0] # Backward pass output.backward() # Apply gradients for param in fc.parameters(): param.data.add_(-0.1 * param.grad.data) # Stop criterion if loss < 1e-3: break print('Loss: {:.6f} after {} batches'.format(loss, batch_idx)) print('==> Learned function:\t' + poly_desc(fc.weight.data.view(-1), fc.bias.data))
q_t = agent.forward(s_t) # epsilon greedy action selection if np.random.uniform() > epsilon: a_t = torch.argmax(q_t) else: a_t = np.random.randint(n_actions) # transition and get reward r_t = env.step(a_t) # get next states info s_next = to_torch(env.get_agent_loc().reshape(1, -1)) max_q_next = torch.max(agent.forward(s_next)) # compute TD target q_target = r_t + gamma * max_q_next # update weights loss = F.smooth_l1_loss(q_t[:, a_t], q_target.data) optimizer.zero_grad() loss.backward() optimizer.step() # update R and n steps step += 1 cumulative_reward += r_t * gamma**step # termination condition if env.is_terminal(): break log_return.append(cumulative_reward) log_steps.append(step) '''
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:,:-1].data labels = targets[idx][:,-1].data defaults = priors.data match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t,requires_grad=False) pos = conf_t > 0 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1,4) loc_t = loc_t[pos_idx].view(-1,4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1,self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1)) # Hard Negative Mining loss_c[pos.view(-1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _,loss_idx = loss_c.sort(1, descending=True) _,idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1,keepdim=True) num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) targets_weighted = conf_t[(pos+neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = num_pos.data.sum() loss_l/=N loss_c/=N return loss_l,loss_c
def forward(self, inputs_, return_frames=False, autoreg=False): if isinstance(inputs_, list): inputs_ = inputs_[0] x = inputs_ for gnl in self.group_norm_layers: gnl.reset_stats() current_loc = 0 timesteps = x.size(2) x = x.transpose(1,2) x = x.reshape((-1, x.size(2), x.size(3), x.size(4))) conv_input = self.stem(x) conv_input = self.ds_block(conv_input, current_loc, self.h_units[0][0]) conv_input = conv_input.reshape((-1, timesteps, conv_input.size(1), conv_input.size(2), conv_input.size(3))).transpose(1,2) current_loc = self.h_units[0][0] hidden_states= {} if self.pred_gn: errors = 0 if return_frames: frames = [] if self.cpc_gn: cpc_targets = [] cpc_preds = {step: [] for step in self.cpc_steps} output = {} if self.hidden_init=='learned': # h_unit, h_name = self.h_units_and_names[-1] # # x = torch.zeros_like(conv_input[:,:,0])[:,:,None] # x = inputs_.new(torch.zeros([inputs_.shape[0], # self._out_feature_channels[-1], # 1, # inputs_.shape[3]//self._strides[-1], # inputs_.shape[4]//self._strides[-1]])) # hidden_states[h_name] = x #torch.zeros_like(x) # hidden_states[h_name] = h_unit(x, hidden_states[h_name], timestep=0, return_extra=[]) # x = hidden_states[h_name] x = torch.zeros_like(conv_input[:,:,0]) current_loc = self.h_units[0][0] for j, (h_unit, h_name) in enumerate(self.h_units_and_names): loc = int(h_name.strip('horizontal')) if j > 0: # x = self.ds_block(x[:,:,None], current_loc, loc).squeeze(2) x = self.ds_block(x, current_loc, loc) hidden_states[h_name] = F.softplus(torch.zeros_like(x)) hidden_states[h_name] = h_unit(F.softplus(x), hidden_states[h_name], timestep=0) x = hidden_states[h_name] x = self.horizontal_norms[h_name](x) x = F.relu_(x) current_loc = loc for j, (td_unit, td_name) in enumerate(self.td_units_and_names): loc = int(td_name.strip('topdown')) h_name = 'horizontal'+str(loc) # print(x.shape) # hidden_states[h_name] = x.new(torch.zeros([x.shape[0], # self._out_feature_channels[loc], # 1, # inputs_.shape[3]//self._strides[loc], # inputs_.shape[4]//self._strides[loc]])) hidden_states[h_name] = td_unit(hidden_states[h_name], x, timestep=0) x = hidden_states[h_name] for i in range(timesteps): if autoreg and i>timesteps//2: x = frame else: x = conv_input[:,:,0] current_loc = self.h_units[0][0] for j, (h_unit, h_name) in enumerate(self.h_units_and_names): loc = int(h_name.strip('horizontal')) if j > 0: # x = self.ds_block(x[:,:,None], current_loc, loc).squeeze(2) x = self.ds_block(x, current_loc, loc) if i == 0 and h_name not in hidden_states: hidden_states[h_name] = F.softplus(torch.zeros_like(x)) hidden_states[h_name], extra = h_unit(F.softplus(x), hidden_states[h_name], timestep=i, return_extra=['error']) # errors = errors + torch.norm(extra['error'].view(extra['error'].shape[0],-1), p=1, dim=1)/1e4 # if i > 0: # errors = errors + torch.abs(extra['error'].view(extra['error'].shape[0],-1)).mean(-1) x = hidden_states[h_name] if (x>1e6).any(): logger.info('variable %s at timestep %d out of bound: %f'%(h_name,i, x.max().item())) x = self.horizontal_norms[h_name](x) x = F.relu_(x) current_loc = loc if self.cpc_gn: if i >= min(self.cpc_steps): cpc_targets.append(self.W_cpc_target(x.transpose(1,3).detach()).view([-1,self.cpc_fan_out])) for step in self.cpc_steps: if i < timesteps-step: cpc_preds[step].append(self.W_cpc_preds[step](x.transpose(1,3)).view([-1,self.cpc_fan_out])) if i <timesteps-1: for j, (td_unit, td_name) in enumerate(self.td_units_and_names): loc = int(td_name.strip('topdown')) h_name = 'horizontal'+str(loc) hidden_states[h_name] = td_unit(hidden_states[h_name], x, timestep=i) x = hidden_states[h_name] if (x>1e6).any(): logger.info('variable %s at timestep %d out of bound: %f'%(td_name, i, x.max().item())) # prediction error -> next step lower layer is detached to avoid gradients flowing through lower layers # pred_error = F.interpolate(x, conv_input.shape[2:], mode='trilinear', align_corners=True) # pred_error = self.final_remap(pred_error) - conv_input[:,:,1][:,:,None].detach() # errors = errors + torch.abs(pred_error.view(pred_error.shape[0],-1)).mean(-1) ## change architecture to take different locations into account if self.pred_gn: frame = self.final_remap(x) if (frame != frame).any(): logger.info('variable frame at timestep %d out of bound'%(i)) if return_frames: frames.append(frame) pred_error = F.smooth_l1_loss(frame, inputs_[:,:,i+1]) # conv_input[:,:,1][:,:,None].detach() errors = errors + pred_error conv_input = conv_input[:,:,1:] logits = self.head(hidden_states[self.h_units_and_names[-1][1]].detach()) #[:,:,None] if (logits != logits).any(): logger.info('variable logits out of bound') output['logits'] = logits # del hidden_states # del conv_input # del x if self.pred_gn: output['pred_errors'] = errors if return_frames: frames = torch.stack(frames, 2) output['frames'] = frames if self.cpc_gn: # calculate CPC # levels of difficulty # easy : across batches (dim = 0) # medium : across space within sample (fixed dim = 0, dims = 3,4) # hard : across time within sample (fixed dim = 0, fixed dims = 3,4, dim=2) # label smoothing -> S,T block diag matrix within B,S block diag matrix cpc_loss = 0 cpc_targets = torch.stack(cpc_targets,0) for step in self.cpc_steps: if len(cpc_preds[step])>1: cpc_preds[step] = torch.cat(cpc_preds[step], 0) cpc_output = torch.matmul(cpc_targets[step-min(self.cpc_steps):].view([-1, cpc_preds[step].shape[-1]]), cpc_preds[step].t()) labels = torch.cumsum(torch.ones_like(cpc_preds[step][:,0]).long(), 0) -1 cpc_loss = cpc_loss + F.cross_entropy(cpc_output, labels) if (cpc_loss != cpc_loss).any(): logger.info('variable CPC at timestep %d out of bound'%(step,)) output['cpc_loss'] = cpc_loss # {'logits': output, # 'cpc_loss': cpc_loss, # 'frames': frames # 'pred_errors': errors} return output
def part_forward(self, predictions, targets , arm_data=None , filter_negative=False ): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) targets (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data, priors = predictions if arm_data: arm_loc_data ,arm_conf_data = arm_data num = loc_data.size(0) priors = priors[:loc_data.size(1), :] num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data defaults = priors.data # sft match strategy , swordli if ac: sfd_match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) else: if arm_data: refine_match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx, arm_loc_data[idx].data) else: match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) if self.use_gpu: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # compute matched anchor number for each gt ''' for i in targets: self.tmp[0] = self.tmp[0]+i.shape[0] self.tmp[1] = self.tmp[1] + conf_t.sum() print(self.tmp[1]/self.tmp[0]) ''' # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) pos = conf_t > 0 num_pos = pos.sum(dim=1, keepdim=True) ''' numpos1 = sum(sum(pos[:,:160*160].data)) numpos2 = sum(sum(pos[:,160*160: 160*160+80*80].data)) numpos3 = sum(sum(pos[:,160*160+80*80: 160*160+80*80+40*40].data)) numpos4 = sum(sum(pos[:,160*160+80*80+40*40: 160*160+80*80+40*40+20*20].data)) numpos5 = sum(sum(pos[:,-10*10-5*5:-5*5:].data)) numpos6 = sum(sum(pos[:,-5*5:].data)) numposall = sum(sum(pos.data)) print(numpos1, numpos2, numpos3, numpos4, numpos5, numpos6 , numposall) ''' # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # Compute max conf across batch for hard negative mining ignore = conf_t < 0 #print(sum(conf_t[0].data.cpu().numpy()==1) , sum(conf_t[0].data.cpu().numpy()==-1)) conf_t[ignore] = 0 batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos] = 0 # filter out pos boxes for now loss_c[ignore] = 0 #filter out ignore loss_c = loss_c.view(num, -1) #loss_c[pos] = 0 # filter out pos boxes for now _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes) targets_weighted = conf_t[(pos+neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N #pdb.set_trace() N = num_pos.data.sum() loss_l /= N loss_c /= N #pdb.set_trace() return loss_l, loss_c
def loss_fnc(self, state_action_values, expected_state_action_values): return F.smooth_l1_loss(state_action_values, expected_state_action_values)
def train_seg_semisup_ict( submit_config: job_helper.SubmitConfig, dataset, model, arch, freeze_bn, opt_type, sgd_momentum, sgd_nesterov, sgd_weight_decay, learning_rate, lr_sched, lr_step_epochs, lr_step_gamma, lr_poly_power, teacher_alpha, bin_fill_holes, crop_size, aug_hflip, aug_vflip, aug_hvflip, aug_scale_hung, aug_max_scale, aug_scale_non_uniform, aug_rot_mag, ict_alpha, cons_loss_fn, cons_weight, conf_thresh, conf_per_pixel, rampup, unsup_batch_ratio, num_epochs, iters_per_epoch, batch_size, n_sup, n_unsup, n_val, split_seed, split_path, val_seed, save_preds, save_model, num_workers): settings = locals().copy() del settings['submit_config'] import os import math import time import itertools import numpy as np import torch.nn as nn, torch.nn.functional as F from architectures import network_architectures import torch.utils.data from datapipe import datasets from datapipe import seg_data, seg_transforms, seg_transforms_cv import evaluation import optim_weight_ema import lr_schedules if crop_size == '': crop_size = None else: crop_size = [int(x.strip()) for x in crop_size.split(',')] torch_device = torch.device('cuda:0') # # Load data sets # ds_dict = datasets.load_dataset(dataset, n_val, val_seed, n_sup, n_unsup, split_seed, split_path) ds_src = ds_dict['ds_src'] ds_tgt = ds_dict['ds_tgt'] tgt_val_ndx = ds_dict['val_ndx_tgt'] src_val_ndx = ds_dict['val_ndx_src'] if ds_src is not ds_tgt else None test_ndx = ds_dict['test_ndx_tgt'] sup_ndx = ds_dict['sup_ndx'] unsup_ndx = ds_dict['unsup_ndx'] n_classes = ds_src.num_classes root_n_classes = math.sqrt(n_classes) if bin_fill_holes and n_classes != 2: print( 'Binary hole filling can only be used with binary (2-class) segmentation datasets' ) return print('Loaded data') # Build network NetClass = network_architectures.seg.get(arch) student_net = NetClass(ds_src.num_classes).to(torch_device) if opt_type == 'adam': student_optim = torch.optim.Adam([ dict(params=student_net.pretrained_parameters(), lr=learning_rate * 0.1), dict(params=student_net.new_parameters(), lr=learning_rate) ]) elif opt_type == 'sgd': student_optim = torch.optim.SGD([ dict(params=student_net.pretrained_parameters(), lr=learning_rate * 0.1), dict(params=student_net.new_parameters(), lr=learning_rate) ], momentum=sgd_momentum, nesterov=sgd_nesterov, weight_decay=sgd_weight_decay) else: raise ValueError('Unknown opt_type {}'.format(opt_type)) if model == 'mean_teacher': teacher_net = NetClass(ds_src.num_classes).to(torch_device) for p in teacher_net.parameters(): p.requires_grad = False teacher_optim = optim_weight_ema.EMAWeightOptimizer( teacher_net, student_net, teacher_alpha) eval_net = teacher_net elif model == 'pi': teacher_net = student_net teacher_optim = None eval_net = student_net else: print('Unknown model type {}'.format(model)) return BLOCK_SIZE = student_net.BLOCK_SIZE NET_MEAN, NET_STD = seg_transforms.get_mean_std(ds_tgt, student_net) if freeze_bn: if not hasattr(student_net, 'freeze_batchnorm'): raise ValueError( 'Network {} does not support batchnorm freezing'.format(arch)) clf_crossent_loss = nn.CrossEntropyLoss(ignore_index=255) print('Built network') if iters_per_epoch == -1: iters_per_epoch = len(unsup_ndx) // batch_size total_iters = iters_per_epoch * num_epochs lr_epoch_scheduler, lr_iter_scheduler = lr_schedules.make_lr_schedulers( optimizer=student_optim, total_iters=total_iters, schedule_type=lr_sched, step_epochs=lr_step_epochs, step_gamma=lr_step_gamma, poly_power=lr_poly_power) # Train data pipeline: transforms train_transforms = [] if crop_size is not None: if aug_scale_hung: train_transforms.append( seg_transforms_cv.SegCVTransformRandomCropScaleHung( crop_size, (0, 0), uniform_scale=not aug_scale_non_uniform)) elif aug_max_scale != 1.0 or aug_rot_mag != 0.0: train_transforms.append( seg_transforms_cv.SegCVTransformRandomCropRotateScale( crop_size, (0, 0), rot_mag=aug_rot_mag, max_scale=aug_max_scale, uniform_scale=not aug_scale_non_uniform, constrain_rot_scale=True)) else: train_transforms.append( seg_transforms_cv.SegCVTransformRandomCrop(crop_size, (0, 0))) else: if aug_scale_hung: raise NotImplementedError('aug_scale_hung requires a crop_size') if aug_hflip or aug_vflip or aug_hvflip: train_transforms.append( seg_transforms_cv.SegCVTransformRandomFlip(aug_hflip, aug_vflip, aug_hvflip)) train_transforms.append( seg_transforms_cv.SegCVTransformNormalizeToTensor(NET_MEAN, NET_STD)) # Train data pipeline: supervised and unsupervised data sets train_sup_ds = ds_src.dataset( labels=True, mask=False, xf=False, pair=False, transforms=seg_transforms.SegTransformCompose(train_transforms), pipeline_type='cv') train_unsup_ds = ds_src.dataset( labels=False, mask=True, xf=False, pair=False, transforms=seg_transforms.SegTransformCompose(train_transforms), pipeline_type='cv') collate_fn = seg_data.SegCollate(BLOCK_SIZE) # Train data pipeline: data loaders sup_sampler = seg_data.RepeatSampler( torch.utils.data.SubsetRandomSampler(sup_ndx)) train_sup_loader = torch.utils.data.DataLoader(train_sup_ds, batch_size, sampler=sup_sampler, collate_fn=collate_fn, num_workers=num_workers) if cons_weight > 0.0: unsup_sampler = seg_data.RepeatSampler( torch.utils.data.SubsetRandomSampler(unsup_ndx)) train_unsup_loader = torch.utils.data.DataLoader( train_unsup_ds, batch_size, sampler=unsup_sampler, collate_fn=collate_fn, num_workers=num_workers) else: train_unsup_loader = None # Eval pipeline src_val_loader, tgt_val_loader, test_loader = datasets.eval_data_pipeline( ds_src, ds_tgt, src_val_ndx, tgt_val_ndx, test_ndx, batch_size, collate_fn, NET_MEAN, NET_STD, num_workers) # Report setttings print('Settings:') print(', '.join([ '{}={}'.format(key, settings[key]) for key in sorted(list(settings.keys())) ])) # Report dataset size print('Dataset:') print('len(sup_ndx)={}'.format(len(sup_ndx))) print('len(unsup_ndx)={}'.format(len(unsup_ndx))) if ds_src is not ds_tgt: print('len(src_val_ndx)={}'.format(len(tgt_val_ndx))) print('len(tgt_val_ndx)={}'.format(len(tgt_val_ndx))) else: print('len(val_ndx)={}'.format(len(tgt_val_ndx))) if test_ndx is not None: print('len(test_ndx)={}'.format(len(test_ndx))) if n_sup != -1: print('sup_ndx={}'.format(sup_ndx.tolist())) # Track mIoU for early stopping best_tgt_miou = None best_epoch = 0 eval_net_state = { key: value.detach().cpu().numpy() for key, value in eval_net.state_dict().items() } # Create iterators train_sup_iter = iter(train_sup_loader) train_unsup_iter = iter( train_unsup_loader) if train_unsup_loader is not None else None iter_i = 0 print('Training...') for epoch_i in range(num_epochs): if lr_epoch_scheduler is not None: lr_epoch_scheduler.step(epoch_i) t1 = time.time() if rampup > 0: ramp_val = network_architectures.sigmoid_rampup(epoch_i, rampup) else: ramp_val = 1.0 student_net.train() if teacher_net is not student_net: teacher_net.train() if freeze_bn: student_net.freeze_batchnorm() if teacher_net is not student_net: teacher_net.freeze_batchnorm() sup_loss_acc = 0.0 consistency_loss_acc = 0.0 conf_rate_acc = 0.0 n_sup_batches = 0 n_unsup_batches = 0 src_val_iter = iter( src_val_loader) if src_val_loader is not None else None tgt_val_iter = iter( tgt_val_loader) if tgt_val_loader is not None else None for sup_batch in itertools.islice(train_sup_iter, iters_per_epoch): if lr_iter_scheduler is not None: lr_iter_scheduler.step(iter_i) student_optim.zero_grad() # # Supervised branch # batch_x = sup_batch['image'].to(torch_device) batch_y = sup_batch['labels'].to(torch_device) logits_sup = student_net(batch_x) sup_loss = clf_crossent_loss(logits_sup, batch_y[:, 0, :, :]) sup_loss.backward() if cons_weight > 0.0: for _ in range(unsup_batch_ratio): # # Unsupervised branch # # Mix mode: batch consists of paired unsupervised samples unsup_batch0 = next(train_unsup_iter) unsup_batch1 = next(train_unsup_iter) batch_ux0 = unsup_batch0['image'].to(torch_device) batch_um0 = unsup_batch0['mask'].to(torch_device) batch_ux1 = unsup_batch1['image'].to(torch_device) batch_um1 = unsup_batch1['mask'].to(torch_device) # ICT mix factors ict_mix_factors = np.random.beta(ict_alpha, ict_alpha, size=(len(batch_ux0), 1, 1, 1)) ict_mix_factors = torch.tensor(ict_mix_factors, dtype=torch.float, device=torch_device) # Mix images batch_ux_mixed = batch_ux0 * ( 1.0 - ict_mix_factors) + batch_ux1 * ict_mix_factors batch_um_mixed = batch_um0 * ( 1.0 - ict_mix_factors) + batch_um1 * ict_mix_factors # Get teacher predictions for original images with torch.no_grad(): logits_u0_tea = teacher_net(batch_ux0).detach() logits_u1_tea = teacher_net(batch_ux1).detach() # Get student prediction for mixed image logits_cons_stu = student_net(batch_ux_mixed) # Logits -> probs prob_u0_tea = F.softmax(logits_u0_tea, dim=1) prob_u1_tea = F.softmax(logits_u1_tea, dim=1) prob_cons_stu = F.softmax(logits_cons_stu, dim=1) # Mix teacher predictions using same mask # It makes no difference whether we do this with logits or probabilities as # the mask pixels are either 1 or 0 logits_cons_tea = logits_u0_tea * ( 1 - ict_mix_factors) + logits_u1_tea * ict_mix_factors prob_cons_tea = prob_u0_tea * ( 1 - ict_mix_factors) + prob_u1_tea * ict_mix_factors loss_mask = batch_um_mixed # Confidence thresholding if conf_thresh > 0.0: # Compute probabilities then confidence of each teacher prediction prob_u0_tea = F.softmax(logits_u0_tea, dim=1) prob_u1_tea = F.softmax(logits_u1_tea, dim=1) conf_u0_tea = prob_u0_tea.max(dim=1, keepdim=True)[0] conf_u1_tea = prob_u1_tea.max(dim=1, keepdim=True)[0] # Mix confidences conf_tea = conf_u0_tea * ( 1 - ict_mix_factors) + conf_u1_tea * ict_mix_factors # Compute confidence mask conf_mask = (conf_tea >= conf_thresh).float()[:, None, :, :] # Record rate for reporting conf_rate_acc += float(conf_mask.mean()) # Average confidence mask if requested if not conf_per_pixel: conf_mask = conf_mask.mean() loss_mask = loss_mask * conf_mask elif rampup > 0: conf_rate_acc += ramp_val # Compute per-pixel consistency loss # Note that the way we aggregate the loss across the class/channel dimension (1) # depends on the loss function used. Generally, summing over the class dimension # keeps the magnitude of the gradient of the loss w.r.t. the logits # nearly constant w.r.t. the number of classes. When using logit-variance, # dividing by `sqrt(num_classes)` helps. if cons_loss_fn == 'var': delta_prob = prob_cons_stu - prob_cons_tea consistency_loss = delta_prob * delta_prob consistency_loss = consistency_loss.sum(dim=1, keepdim=True) elif cons_loss_fn == 'logits_var': delta_logits = logits_cons_stu - logits_cons_tea consistency_loss = delta_logits * delta_logits consistency_loss = consistency_loss.sum( dim=1, keepdim=True) / root_n_classes elif cons_loss_fn == 'logits_smoothl1': consistency_loss = F.smooth_l1_loss(logits_cons_stu, logits_cons_tea, reduce=False) consistency_loss = consistency_loss.sum( dim=1, keepdim=True) / root_n_classes elif cons_loss_fn == 'bce': consistency_loss = network_architectures.robust_binary_crossentropy( prob_cons_stu, prob_cons_tea) consistency_loss = consistency_loss.sum(dim=1, keepdim=True) elif cons_loss_fn == 'kld': consistency_loss = F.kl_div(F.log_softmax( logits_cons_stu, dim=1), prob_cons_tea, reduce=False) consistency_loss = consistency_loss.sum(dim=1, keepdim=True) else: raise ValueError( 'Unknown consistency loss function {}'.format( cons_loss_fn)) # Apply consistency loss mask and take the mean over pixels and images consistency_loss = (consistency_loss * loss_mask).mean() # Modulate with rampup if desired if rampup > 0: consistency_loss = consistency_loss * ramp_val # Weight the consistency loss and back-prop unsup_loss = consistency_loss * cons_weight unsup_loss.backward() consistency_loss_acc += float(consistency_loss.detach()) n_unsup_batches += 1 student_optim.step() if teacher_optim is not None: teacher_optim.step() sup_loss_acc += float(sup_loss.detach()) n_sup_batches += 1 iter_i += 1 sup_loss_acc /= n_sup_batches if n_unsup_batches > 0: consistency_loss_acc /= n_unsup_batches conf_rate_acc /= n_unsup_batches eval_net.eval() if ds_src is not ds_tgt: src_iou_eval = evaluation.EvaluatorIoU(ds_src.num_classes, bin_fill_holes) with torch.no_grad(): for batch in src_val_iter: batch_x = batch['image'].to(torch_device) batch_y = batch['labels'].numpy() logits = eval_net(batch_x) pred_y = torch.argmax(logits, dim=1).detach().cpu().numpy() for sample_i in range(len(batch_y)): src_iou_eval.sample(batch_y[sample_i, 0], pred_y[sample_i], ignore_value=255) src_iou = src_iou_eval.score() src_miou = src_iou.mean() else: src_iou_eval = src_iou = src_miou = None tgt_iou_eval = evaluation.EvaluatorIoU(ds_tgt.num_classes, bin_fill_holes) with torch.no_grad(): for batch in tgt_val_iter: batch_x = batch['image'].to(torch_device) batch_y = batch['labels'].numpy() logits = eval_net(batch_x) pred_y = torch.argmax(logits, dim=1).detach().cpu().numpy() for sample_i in range(len(batch_y)): tgt_iou_eval.sample(batch_y[sample_i, 0], pred_y[sample_i], ignore_value=255) tgt_iou = tgt_iou_eval.score() tgt_miou = tgt_iou.mean() t2 = time.time() if ds_src is not ds_tgt: print( 'Epoch {}: took {:.3f}s, TRAIN clf loss={:.6f}, consistency loss={:.6f}, conf rate={:.3%}, ' 'SRC VAL mIoU={:.3%}, TGT VAL mIoU={:.3%}'.format( epoch_i + 1, t2 - t1, sup_loss_acc, consistency_loss_acc, conf_rate_acc, src_miou, tgt_miou)) print('-- SRC {}'.format(', '.join( ['{:.3%}'.format(x) for x in src_iou]))) print('-- TGT {}'.format(', '.join( ['{:.3%}'.format(x) for x in tgt_iou]))) else: print( 'Epoch {}: took {:.3f}s, TRAIN clf loss={:.6f}, consistency loss={:.6f}, conf rate={:.3%}, VAL mIoU={:.3%}' .format(epoch_i + 1, t2 - t1, sup_loss_acc, consistency_loss_acc, conf_rate_acc, tgt_miou)) print('-- {}'.format(', '.join( ['{:.3%}'.format(x) for x in tgt_iou]))) if save_model: model_path = os.path.join(submit_config.run_dir, "model.pth") torch.save(eval_net, model_path) if save_preds: out_dir = os.path.join(submit_config.run_dir, 'preds') os.makedirs(out_dir, exist_ok=True) with torch.no_grad(): for batch in tgt_val_loader: batch_x = batch['image'].to(torch_device) batch_ndx = batch['index'].numpy() logits = eval_net(batch_x) pred_y = torch.argmax(logits, dim=1).detach().cpu().numpy() for sample_i, sample_ndx in enumerate(batch_ndx): ds_tgt.save_prediction_by_index( out_dir, pred_y[sample_i].astype(np.uint32), sample_ndx) else: out_dir = None if test_loader is not None: test_iou_eval = evaluation.EvaluatorIoU(ds_tgt.num_classes, bin_fill_holes) with torch.no_grad(): for batch in test_loader: batch_x = batch['image'].to(torch_device) batch_ndx = batch['index'].numpy() logits = eval_net(batch_x) pred_y = torch.argmax(logits, dim=1).detach().cpu().numpy() for sample_i, sample_ndx in enumerate(batch_ndx): if save_preds: ds_tgt.save_prediction_by_index( out_dir, pred_y[sample_i].astype(np.uint32), sample_ndx) test_iou_eval.sample(batch_y[sample_i, 0], pred_y[sample_i], ignore_value=255) test_iou = test_iou_eval.score() test_miou = test_iou.mean() print('FINAL TEST: mIoU={:.3%}'.format(test_miou)) print('-- TEST {}'.format(', '.join( ['{:.3%}'.format(x) for x in test_iou])))
def loss(self, input, target, weights): if self.config.use_IS: loss = torch.abs(target - input) * torch.from_numpy(weights).to(device=self.config.device) return loss.mean() return F.smooth_l1_loss(input, target)
def learn(self, epoch_index): self.training_step += 1 """Update policy and value parameters using given batch of experience tuples. Q_targets = r + γ * critic_target(next_state, actor_target(next_state)) where: actor_target(state) -> action critic_target(state, action) -> Q-value Params ====== experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples gamma (float): discount factor """ state = torch.tensor([t.state for t in self.memory], dtype=torch.float).to(device) action = torch.tensor([t.action for t in self.memory], dtype=torch.float).to(device) reward = torch.tensor([t.reward for t in self.memory], dtype=torch.float).to(device).unsqueeze(-1) next_state = torch.tensor([t.next_state for t in self.memory], dtype=torch.float).to(device) old_action_log_prob = torch.tensor([t.a_log_prob for t in self.memory], dtype=torch.float).to(device) #print('reward full pure shape', reward.shape) reward = (reward - reward.mean()) / (reward.std() + 0.00001) #print('reward full scaled shape', reward.shape) target_v = [] for mem_index in range(len(state)): with torch.no_grad(): target_v.append(reward[mem_index] + self.gamma * self.critic_local(next_state[mem_index])) target_v = torch.stack(target_v).to(device) #print('target_v.shape',target_v.shape) advantage = [] for mem_index in range(len(state)): with torch.no_grad(): advantage.append(target_v[mem_index] - self.critic_local(state[mem_index])) advantage = torch.stack(advantage).to(device) #print('advantage shape', advantage.shape) for i in range(PPO_UPDATE_PERIOD): for index in BatchSampler( SubsetRandomSampler(range(len(self.memory))), BATCH_SIZE, False): #states, actions, old_action_probs, rewards, next_states, dones = self.memory.sample() #print('index', index) #print('state_indexed_shape', state[index].shape) (mu, sigma) = self.actor_local(state[index]) dist = Normal(mu, sigma) #print('action full size',action.shape) #print('action indexed',action[index]) action_prob = dist.log_prob(action[index]) #print('action_prob', action_prob.shape, '\nold_action_prob shape', old_action_log_prob.shape) ratio = torch.exp(action_prob - old_action_log_prob[index]) #print('ratio_shape',ratio.shape) #print('ratio_shape', advantage[index].shape) L_left = ratio * advantage[index] L_right = torch.clamp(ratio, 1 - self.clip_param - 0.5, 1 + self.clip_param + 0.5) * advantage[index] #update actor network action_loss = -torch.min(L_left, L_right).mean() self.actor_optimizer.zero_grad() action_loss.backward() nn.utils.clip_grad_norm_(self.actor_local.parameters(), 1.0) self.actor_optimizer.step() #update critic optimizer value_loss = F.smooth_l1_loss(self.critic_local(state[index]), target_v[index]) if i == 5: print('val_loss', value_loss) self.critic_optimizer.zero_grad() value_loss.backward() nn.utils.clip_grad_norm_(self.critic_local.parameters(), 1.0) self.critic_optimizer.step() print('PPO step', i) if (epoch_index % 10 == 0) & (epoch_index > 1): del self.memory[:]
def forward(self, predicts, targets): # 回归信息,置信度,先验框 loc_data, conf_data, priors = predicts # print(conf_data.shape) torch.Size([batch_size, 8732, num_classes+1]) # print(conf_data[0][2]) tensor([ 0.5261, -0.1007, 0.1242, -0.0905, 0.0839, -0.7308, 0.0174],device='cuda:0', grad_fn=<SelectBackward>) # 计算出batch_size num = loc_data.size(0) # print(loc_data.shape) torch.Size([1, 8732, 4]) # print('1',priors.shape) torch.Size([8732, 4]) # 取出所有的先验框 priors = priors[:loc_data.size(1), :] # 这一步就是保证priors的个数是和loc_data、conf_data的大小一样,其实本身就是一样的 # print('2',priors.shape) torch.Size([8732, 4]) # 先验框的数量 num_priors = (priors.size(0)) # 创建一个tensor进行处理 loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) loc_t = loc_t.cuda() conf_t = conf_t.cuda() priors = priors.cuda() for idx in range(num): # 获得框 truths = targets[idx][:, :-1] # target存放的很多行,每一行就是一张照片,里面包括了照片里面的每一个框和对应的标签 # 获得标签 labels = targets[idx][:, -1] # 获得先验框 defaults = priors # 找到标签对应的先验框 match(self.threshold, truths, defaults, self.variance, labels, # 每一个标签都对应了先验框,虽然这里没有返回值,但是loc_t和conf_t是一个tensor,函数里面对其改变了值,主函数也会跟着变化 loc_t, conf_t, idx) # 转化成Variable loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) # 所有conf_t>0的地方,代表内部包含物体 pos = conf_t > 0 # conf_t 有8732行,找到大于0的个数,相当于一张图片中8732个先验框中有pos个框是正样本 # print(pos.shape) torch.Size([1, 8732]) # 求和得到每一个图片内部有多少正样本 num_pos = pos.sum(dim=1, keepdim=True) # print(num_pos) tensor([[12]], device='cuda:0') # 计算回归loss,只是对正样本进行求解回归loss pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) # print(pos_idx) loc_p = loc_data[pos_idx].view(-1, 4) # 此时loc_data和pos_idx维度一样,选择出positive的loc # print(loc_p.shape) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # 转化形式 batch_conf = conf_data.view(-1, self.num_classes) # 你可以把softmax函数看成一种接受任何数字并转换为概率分布的非线性方法 # 获得每个框预测到真实框的类的概率 loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) loss_c = loss_c.view(num, -1) loss_c[pos] = 0 # 获得每一张图新的softmax的结果 _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) # 计算每一张图的正样本数量 num_pos = pos.long().sum(1, keepdim=True) # 限制负样本数量 num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # 计算正样本的loss和负样本的loss pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(-1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) N = num_pos.data.sum() loss_l /= N loss_c /= N total_loss = loss_l + loss_c losses = [loss_l,loss_c,total_loss] return LossTuple(*losses)
a_lst.append(a) r_lst.append(r / 100.0) mask_lst.append(1 - done) s = s_prime step_idx += 1 s_final = torch.from_numpy(s_prime).float() v_final = model.v(s_final).detach().clone().numpy() td_target = compute_target(v_final, r_lst, mask_lst) td_target_vec = td_target.reshape(-1) s_vec = torch.tensor(s_lst).float().reshape( -1, 4) # 4 == Dimension of state a_vec = torch.tensor(a_lst).reshape(-1).unsqueeze(1) advantage = td_target_vec - model.v(s_vec).reshape(-1) pi = model.pi(s_vec, softmax_dim=1) pi_a = pi.gather(1, a_vec).reshape(-1) loss = -(torch.log(pi_a) * advantage.detach()).mean() + \ F.smooth_l1_loss(model.v(s_vec).reshape(-1), td_target_vec) optimizer.zero_grad() loss.backward() optimizer.step() if step_idx % PRINT_INTERVAL == 0: test(step_idx, model) envs.close()
def train(self): if len(self.memory) < self.minibatch_size: return for i in range(0, len(self.memory), self.minibatch_size): #transitions = self.memory.sample(self.minibatch_size) transitions = self.memory.pull(self.minibatch_size) print('Batch train: ' + str(int(i / self.minibatch_size) + 1) + "/" + str(int(len(self.memory) / self.minibatch_size) + 1)) aux_transitions = [] for t in transitions: proc_sgray = torch.Tensor(self.state_size, self.state_dim, self.state_dim).to(self.device) proc_sdepth = torch.Tensor(self.state_size, self.state_dim, self.state_dim).to(self.device) proc_next_sgray = torch.Tensor(self.state_size, self.state_dim, self.state_dim).to(self.device) proc_next_sdepth = torch.Tensor(self.state_size, self.state_dim, self.state_dim).to(self.device) count = 0 for sgray, sdepth, next_sgray, next_sdepth in zip( t.sgray, t.sdepth, t.next_sgray, t.next_sdepth): proc_sgray[count] = self.get_tensor_from_image(sgray) proc_sdepth[count] = self.get_tensor_from_image(sdepth) proc_next_sgray[count] = self.get_tensor_from_image( next_sgray) proc_next_sdepth[count] = self.get_tensor_from_image( next_sdepth) count += 1 proc_sgray = proc_sgray.unsqueeze(0).to(self.device) proc_sdepth = proc_sdepth.unsqueeze(0).to(self.device) proc_next_sgray = proc_next_sgray.unsqueeze(0).to(self.device) proc_next_sdepth = proc_next_sdepth.unsqueeze(0).to( self.device) #('sgray','sdepth','action','next_sgray','next_sdepth','reward') one_transition = Transition(proc_sgray, proc_sdepth, t.action, proc_next_sgray, proc_next_sdepth, t.reward) aux_transitions.append(one_transition) transitions = aux_transitions # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for # detailed explanation). This converts batch-array of Transitions # to Transition of batch-arrays. batch = Transition(*zip(*transitions)) #print(batch.sgray) # Compute a mask of non-final states and concatenate the batch elements # (a final state would've been the one after which simulation ended) gray_non_final_mask = torch.tensor(tuple( map(lambda s: s is not None, batch.next_sgray)), device=self.device, dtype=torch.bool) gray_non_final_next_states = torch.cat( [s for s in batch.next_sgray if s is not None]) depth_non_final_mask = torch.tensor(tuple( map(lambda s: s is not None, batch.next_sdepth)), device=self.device, dtype=torch.bool) depth_non_final_next_states = torch.cat( [s for s in batch.next_sdepth if s is not None]) sgray_batch = torch.cat(batch.sgray) sdepth_batch = torch.cat(batch.sdepth) action_batch = torch.cat(batch.action) reward_batch = torch.cat(batch.reward) # Compute Q(s_t, a) - the model computes Q(s_t), then we select the # columns of actions taken. These are the actions which would've been taken # for each batch state according to policy_net sgray_action_values = self.gray_policy_net(sgray_batch).gather( 1, action_batch) sdepth_action_values = self.depth_policy_net(sdepth_batch).gather( 1, action_batch) # Compute V(s_{t+1}) for all next states. # Expected values of actions for non_final_next_states are computed based # on the "older" target_net; selecting their best reward with max(1)[0]. # This is merged based on the mask, such that we'll have either the expected # state value or 0 in case the state was final. next_sgray_values = torch.zeros(self.minibatch_size, device=self.device) next_sgray_values[gray_non_final_mask] = self.gray_target_net( gray_non_final_next_states).max(1)[0].detach() next_sdepth_values = torch.zeros(self.minibatch_size, device=self.device) next_sdepth_values[depth_non_final_mask] = self.depth_target_net( depth_non_final_next_states).max(1)[0].detach() # Compute the expected Q values expected_sgray_action_values = (next_sgray_values * self.discount) + reward_batch expected_sdepth_action_values = (next_sdepth_values * self.discount) + reward_batch # Compute Huber loss gray_loss = F.smooth_l1_loss( sgray_action_values, expected_sgray_action_values.unsqueeze(1)) depth_loss = F.smooth_l1_loss( sdepth_action_values, expected_sdepth_action_values.unsqueeze(1)) # Optimize the model self.gray_optimizer.zero_grad() gray_loss.backward() for param in self.gray_policy_net.parameters(): param.grad.data.clamp_(-1, 1) self.gray_optimizer.step() # Optimize the model self.depth_optimizer.zero_grad() depth_loss.backward() for param in self.depth_policy_net.parameters(): param.grad.data.clamp_(-1, 1) self.depth_optimizer.step()