def mrcnn_bbox_loss(target_bbox, target_class_ids, pred_bbox):
    """Loss for Mask R-CNN bounding box refinement.

    target_bbox: [batch, num_rois, (dy, dx, log(dh), log(dw))]
    target_class_ids: [batch, num_rois]. Integer class IDs.
    pred_bbox: [batch, num_rois, num_classes, (dy, dx, log(dh), log(dw))]
    """
    # Reshape to merge batch and roi dimensions for simplicity.
    target_class_ids = target_class_ids.contiguous().view(-1)
    target_bbox = target_bbox.contiguous().view(-1, 4)
    pred_bbox = pred_bbox.contiguous().view(-1, pred_bbox.size()[2], 4)
#    print(target_class_ids)

    # Only positive ROIs contribute to the loss. And only
    # the right class_id of each ROI. Get their indicies.
    positive_roi_ix = torch.gt(target_class_ids , 0)
#    print(positive_roi_ix)
    positive_roi_class_ids = torch.masked_select(target_class_ids, positive_roi_ix)
    
    indices = target_class_ids
#    indices = torch.stack([positive_roi_ix, positive_roi_class_ids], dim=1)
#    print(indices)
    # Gather the deltas (predicted and true) that contribute to loss
#    target_bbox = torch.gather(target_bbox, positive_roi_ix)
#    pred_bbox = torch.gather(pred_bbox, indices)

    loss = F.smooth_l1_loss(pred_bbox, target_bbox, size_average=True)
    return loss
Beispiel #2
0
def train_batch(param):
    if len(memory) < param['batch_size']:
        return 0
    batch = memory.sample(param['batch_size'])
    batch_states = default_states_preprocessor([m.state for m in batch])
    batch_next_states = default_states_preprocessor([m.next_state for m in batch])
    batch_ended = torch.tensor([m.ended for m in batch])
    batch_rewards = torch.tensor([m.reward for m in batch]).to(device)
    batch_actions = torch.tensor([m.action for m in batch]).to(device)

    ## Calculate expected reward:
    with torch.set_grad_enabled(False):
        not_ended_batch = 1 -torch.ByteTensor(batch_ended).to(device)
        next_states_non_final = batch_next_states[not_ended_batch]
        next_state_values = torch.zeros(param['batch_size']).to(device)
        reward_hat = target_dqn(next_states_non_final)
        next_state_values[not_ended_batch] = reward_hat.max(1)[0]
        expected_state_action_values = next_state_values*param['GAMMA'] + batch_rewards

    # Predict value function:
    yhat = dqn(batch_states)
    state_action_values = yhat.gather(1, batch_actions.unsqueeze(1)).squeeze()

    loss = F.smooth_l1_loss(state_action_values, expected_state_action_values)
    optimizer.zero_grad()
    loss.backward()
    for param in dqn.parameters():
        param.data.clamp_(-1, 1)
    optimizer.step()
    return float(loss.data.cpu().numpy())
Beispiel #3
0
    def forward(self, loc_preds, loc_targets, cls_preds, cls_targets):
        """Compute loss between (loc_preds, loc_targets) and (cls_preds, cls_targets).

        Args:
          loc_preds: (tensor) predicted locations, sized [N, #anchors, 4].
          loc_targets: (tensor) encoded target locations, sized [N, #anchors, 4].
          cls_preds: (tensor) predicted class confidences, sized [N, #anchors, #classes].
          cls_targets: (tensor) encoded target labels, sized [N, #anchors].

        loss:
          (tensor) loss = SmoothL1Loss(loc_preds, loc_targets) + CrossEntropyLoss(cls_preds, cls_targets).
        """
        pos = cls_targets > 0  # [N,#anchors]
        batch_size = pos.size(0)
        num_pos = pos.sum().item()

        # ===============================================================
        # loc_loss = SmoothL1Loss(pos_loc_preds, pos_loc_targets)
        # ===============================================================
        mask = pos.unsqueeze(2).expand_as(loc_preds)  # [N,#anchors,4]
        loc_loss = F.smooth_l1_loss(loc_preds[mask], loc_targets[mask], size_average=False)

        # ===============================================================
        # cls_loss = CrossEntropyLoss(cls_preds, cls_targets)
        # ===============================================================
        cls_loss = F.cross_entropy(cls_preds.view(-1, self.num_classes), cls_targets.view(-1), reduce=False)  # [N*#anchors,]
        cls_loss = cls_loss.view(batch_size, -1)
        cls_loss[cls_targets < 0] = 0  # set ignored loss to 0
        neg = self._hard_negative_mining(cls_loss, pos)  # [N,#anchors]
        cls_loss = cls_loss[pos | neg].sum()

        print('loc_loss: {} | cls_loss: {}'.format(loc_loss.item() / num_pos, cls_loss.item() / num_pos))
        loss = (loc_loss + cls_loss) / num_pos
        return loss
    def backward(self):
        #
        # calculate step returns in reverse order
        #rewards = torch.stack(self.rewards, dim=0)
        rewards = self.rewards

        returns = torch.Tensor(len(rewards) - 1, *self.outputs[-1].value.data.size())
        step_return = self.outputs[-1].value.data.cpu()
        for i in range(len(rewards) - 2, -1, -1):
            step_return.mul_(self.discounts[i]).add_(rewards[i])
            returns[i] = step_return

        if USE_CUDA:
            returns = returns.cuda()
        #
        # calculate losses
        policy_loss = 0
        value_loss = 0
        steps = len(self.outputs) - 1
        for i in range(steps):
            advantage = Variable(returns[i] - self.outputs[i].value.data)
            policy_loss += -self.outputs[i].log_action * advantage
            value_loss += F.smooth_l1_loss(self.outputs[i].value, Variable(returns[i]))

        weights_l2 = 0
        for param in self.parameters():
            weights_l2 += param.norm(2)

        loss = policy_loss.mean() / steps + value_loss / steps + 0.00001 * weights_l2
        loss.backward()

        # reset state
        self.reset()
def optimize_model():
    global last_sync
    if len(memory) < BATCH_SIZE:
        return
    transitions = memory.sample(BATCH_SIZE)
    batch = Transition(*zip(*transitions))

    non_final_mask = ByteTensor(tuple(map(lambda s: s is not None,
                                          batch.next_state)))
    non_final_next_states = Variable(torch.cat([s for s in batch.next_state
                                                if s is not None]),
                                     volatile=True)
    state_batch = Variable(torch.cat(batch.state))
    action_batch = Variable(torch.cat(batch.action))
    reward_batch = Variable(torch.cat(batch.reward))

    state_action_values = model(state_batch).gather(1, action_batch)

    next_state_values = Variable(torch.zeros(BATCH_SIZE)).type(FloatTensor)
    next_state_values[non_final_mask] = model(non_final_next_states).max(1)[0]

    # next_state_values.volatile = False

    expected_state_action_values = (next_state_values * GAMMA) + reward_batch

    loss = F.smooth_l1_loss(state_action_values, expected_state_action_values)

    optimizer.zero_grad()
    loss.backward()
    for param in model.parameters():
        param.grad.data.clamp_(-1, 1)
    optimizer.step()
Beispiel #6
0
    def forward(self, loc_preds, loc_targets, cls_preds, cls_targets):
        '''Compute loss between (loc_preds, loc_targets) and (cls_preds, cls_targets).

        Args:
          loc_preds: (tensor) predicted locations, sized [batch_size, #anchors, 4].
          loc_targets: (tensor) encoded target locations, sized [batch_size, #anchors, 4].
          cls_preds: (tensor) predicted class confidences, sized [batch_size, #anchors, #classes].
          cls_targets: (tensor) encoded target labels, sized [batch_size, #anchors].

        loss:
          (tensor) loss = SmoothL1Loss(loc_preds, loc_targets) + FocalLoss(cls_preds, cls_targets).
        '''
        batch_size, num_boxes = cls_targets.size()
        pos = cls_targets > 0  # [N,#anchors]
        num_pos = pos.data.long().sum()

        ################################################################
        # loc_loss = SmoothL1Loss(pos_loc_preds, pos_loc_targets)
        ################################################################
        mask = pos.unsqueeze(2).expand_as(loc_preds)       # [N,#anchors,4]
        masked_loc_preds = loc_preds[mask].view(-1,4)      # [#pos,4]
        masked_loc_targets = loc_targets[mask].view(-1,4)  # [#pos,4]
        loc_loss = F.smooth_l1_loss(masked_loc_preds, masked_loc_targets, size_average=False)

        ################################################################
        # cls_loss = FocalLoss(loc_preds, loc_targets)
        ################################################################
        pos_neg = cls_targets > -1  # exclude ignored anchors
        mask = pos_neg.unsqueeze(2).expand_as(cls_preds)
        masked_cls_preds = cls_preds[mask].view(-1,self.num_classes)
        cls_loss = self.focal_loss_alt(masked_cls_preds, cls_targets[pos_neg])

        print('loc_loss: %.3f | cls_loss: %.3f' % (loc_loss.data[0]/num_pos, cls_loss.data[0]/num_pos), end=' | ')
        loss = (loc_loss+cls_loss)/num_pos
        return loss
    def reply(self):
        if (len(self.memory) < BATCH_SIZE):
            return

        transitions = self.memory.sample(BATCH_SIZE)

        batch = Transition(*zip(*transitions))

        non_final_mask = torch.ByteTensor(tuple(map(lambda s: s is not None, batch.next_state)))

        state_batch = torch.cat(batch.state)
        action_batch = torch.cat(batch.action)
        reward_batch = torch.cat(batch.reward)
        non_final_next_state = torch.cat([s for s in batch.next_state if s is not None])
        
        self.model.eval()

        state_action_values = torch.squeeze(self.model(state_batch).gather(1, action_batch))

        next_state_values = torch.zeros(BATCH_SIZE).type(torch.FloatTensor)
        next_state_values[non_final_mask] = self.model(non_final_next_state).data.max(1)[0]

        expected_state_action_values = reward_batch + GAMMA * next_state_values
        
        self.model.train()

        loss = F.smooth_l1_loss(state_action_values, expected_state_action_values)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
Beispiel #8
0
    def ohem_detect_loss(self, cls_score, rois_label, bbox_pred, rois_target, rois_inside_ws, rois_outside_ws):

        def log_sum_exp(x):
            x_max = x.data.max()
            return torch.log(torch.sum(torch.exp(x - x_max), dim=1, keepdim=True)) + x_max

        num_hard = cfg.TRAIN.BATCH_SIZE * self.batch_size
        pos_idx = rois_label > 0
        num_pos = pos_idx.int().sum()

        # classification loss
        num_classes = cls_score.size(1)
        weight = cls_score.data.new(num_classes).fill_(1.)
        weight[0] = num_pos.data[0] / num_hard

        conf_p = cls_score.detach()
        conf_t = rois_label.detach()

        # rank on cross_entropy loss
        loss_c = log_sum_exp(conf_p) - conf_p.gather(1, conf_t.view(-1,1))
        loss_c[pos_idx] = 100. # include all positive samples
        _, topk_idx = torch.topk(loss_c.view(-1), num_hard)
        loss_cls = F.cross_entropy(cls_score[topk_idx], rois_label[topk_idx], weight=weight)

        # bounding box regression L1 loss
        pos_idx = pos_idx.unsqueeze(1).expand_as(bbox_pred)
        loc_p = bbox_pred[pos_idx].view(-1, 4)
        loc_t = rois_target[pos_idx].view(-1, 4)
        loss_box = F.smooth_l1_loss(loc_p, loc_t)

        return loss_cls, loss_box
    def accumulate_gradient(self, batch_sz, states, actions, rewards,
                            next_states, mask):
        """ Compute the temporal difference error.
            td_error = (r + gamma * max Q(s_,a)) - Q(s,a)
        """
        states = Variable(states)
        actions = Variable(actions)
        rewards = Variable(rewards)
        next_states = Variable(next_states, volatile=True)

        # Compute Q(s, a)
        q_values = self.policy(states)
        q_values = q_values.gather(1, actions.unsqueeze(1))

        # Compute Q(s_, a)
        q_target_values = None
        if next_states.is_cuda:
            q_target_values = Variable(torch.zeros(batch_sz).cuda())
        else:
            q_target_values = Variable(torch.zeros(batch_sz))

        # Bootstrap for non-terminal states
        q_target_values[mask] = self.target_policy(next_states).max(1)[0][mask]

        q_target_values.volatile = False      # So we don't mess the huber loss
        expected_q_values = (q_target_values * self.gamma) + rewards

        # Compute Huber loss
        loss = F.smooth_l1_loss(q_values, expected_q_values)

        # Accumulate gradients
        loss.backward()
def rpn_bbox_loss(target_bbox, rpn_match, rpn_bbox, config):
    """Return the RPN bounding box loss graph.

    config: the model config object.
    target_bbox: [batch, max positive anchors, (dy, dx, log(dh), log(dw))].
        Uses 0 padding to fill in unsed bbox deltas.
    rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive,
               -1=negative, 0=neutral anchor.
    rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))]
    """
    # Positive anchors contribute to the loss, but negative and
    # neutral anchors (match value of 0 or -1) don't.   
    indices = torch.eq(rpn_match, 1) 
    rpn_bbox = torch.masked_select(rpn_bbox, indices)
    batch_counts = torch.sum(indices.float(), dim=1)
        
    outputs = []
    for i in range(config.IMAGES_PER_GPU):
#        print(batch_counts[i].cpu().data.numpy()[0])
        outputs.append(target_bbox[i, torch.arange(int(batch_counts[i].cpu().data.numpy()[0])).type(torch.cuda.LongTensor)])
    
    target_bbox = torch.cat(outputs, dim=0)
    
    loss = F.smooth_l1_loss(rpn_bbox, target_bbox, size_average=True)
    return loss
Beispiel #11
0
 def learn(self, batch_state, batch_next_state, batch_reward, batch_action):
     outputs = self.model(batch_state).gather(1, batch_action.unsqueeze(1)).squeeze(1)
     next_outputs = self.model(batch_next_state).detach().max(1)[0]
     target = self.gamma*next_outputs + batch_reward
     td_loss = F.smooth_l1_loss(outputs, target)
     self.optimizer.zero_grad()
     td_loss.backward(retain_variables = True)
     self.optimizer.step()
Beispiel #12
0
    def optim_fn(reward, value, next_value, log_prob):
        target = reward + gamma * next_value
        delta = target - value
        policy_loss = -log_prob * delta.data
        value_loss = F.smooth_l1_loss(value, target.data)
        loss = policy_loss + value_loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
Beispiel #13
0
    def forward(self, predictions, priors, targets):
        loc_data, conf_data, _ = predictions  # 预测的框以及类别概率(bs,-1,4) (bs,-1,2)
        priors = priors
        num = loc_data.size(0)  # bs
        num_priors = priors.size(0)

        # (bs, 21824, 4)
        loc_t = torch.Tensor(num, num_priors, 4)
        # (bs, 21824)
        conf_t = torch.LongTensor(num, num_priors)
        # (bs,num_obj, 5)
        for idx in range(num):
            truths = targets[idx][:, :-1].data  # cx,cy,w,h
            labels = targets[idx][:, -1].data  # 1 or 0
            defaults = priors.data  # default boxes
            match(0.35, truths, defaults, [0.1, 0.2], labels, loc_t, conf_t, idx)

        if self.device.type == 'cuda':
            loc_t = loc_t.to(self.device)
            conf_t = conf_t.to(self.device)

        # 得到概率 >0 的idx
        pos = conf_t > 0
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)

        # 全部展开 计算loss
        loc_p = loc_data[pos_idx].view(-1, 4)  # predict
        loc_t = loc_t[pos_idx].view(-1, 4)  # label
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')

        batch_conf = conf_data.view(-1, 2)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(-1, 2)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N

        return loss_l, loss_c
Beispiel #14
0
def optimize_model():
    global last_sync
    if len(memory) < BATCH_SIZE:
        return
    transitions = memory.sample(BATCH_SIZE)
    # Transpose the batch (see http://stackoverflow.com/a/19343/3343043 for
    # detailed explanation).
    batch = Transition(*zip(*transitions))

    # Compute a mask of non-final states and concatenate the batch elements
    non_final_mask = ByteTensor(tuple(map(lambda s: s is not None,
                                          batch.next_state)))

    # We don't want to backprop through the expected action values and volatile
    # will save us on temporarily changing the model parameters'
    # requires_grad to False!
    non_final_next_states = Variable(torch.cat([s for s in batch.next_state
                                                if s is not None]),
                                     volatile=True)
    state_batch = Variable(torch.cat(batch.state))
    action_batch = Variable(torch.cat(batch.action))
    reward_batch = Variable(torch.cat(batch.reward))

    # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
    # columns of actions taken
    state_action_values = model(state_batch).gather(1, action_batch)

    # Compute V(s_{t+1}) for all next states.
    next_state_values = Variable(torch.zeros(BATCH_SIZE).type(Tensor))
    next_state_values[non_final_mask] = model(non_final_next_states).max(1)[0]
    # Now, we don't want to mess up the loss with a volatile flag, so let's
    # clear it. After this, we'll just end up with a Variable that has
    # requires_grad=False
    next_state_values.volatile = False
    # Compute the expected Q values
    expected_state_action_values = (next_state_values * GAMMA) + reward_batch

    # Compute Huber loss
    loss = F.smooth_l1_loss(state_action_values, expected_state_action_values)

    # Optimize the model
    optimizer.zero_grad()
    loss.backward()
    for param in model.parameters():
        param.grad.data.clamp_(-1, 1)
    optimizer.step()
def train(env, episodes, gamma=0.9):
    num_actions = env._board_size ** 2
    nets = [PolicyNet(num_actions), PolicyNet(num_actions)]
    optimizers = [optim.Adam(net.parameters(), lr=1e-2) for net in nets]
    ctrl_fns = [build_ctrl_fn(net, train=True) for net in nets]

    for episode in range(episodes):
        state = env.reset()
        rewards_all = []

        done = False
        while not done:
            action = ctrl_fns[state.cur_player](state)
            state, reward, done, _ = env.step(action)
            rewards_all.append(reward)

        rewards_all = np.array(rewards_all)
        rewards_all[:-1] -= rewards_all[1:]

        rewards_all = [rewards_all[0::2], rewards_all[1::2]]
        for (net, optimizer, rewards) in zip(nets, optimizers, rewards_all):
            Rs, R = [], 0
            for r in reversed(rewards):
                R = gamma * R + r
                Rs.insert(0, R)
            Rs = torch.tensor(Rs)
            Rs = (Rs - Rs.mean()) / (Rs.std() + 1e-3)

            policy_loss = []
            value_loss = []
            for t, (R, value) in enumerate(zip(Rs, net.values)):
                policy_loss.append(-net.log_probs[t] * (R - value.item()))
                value_loss.append(F.smooth_l1_loss(value, torch.tensor([[R]])))
            policy_loss = torch.stack(policy_loss).sum()
            value_loss = torch.stack(value_loss).sum()
            loss = policy_loss + value_loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            del net.log_probs[:]
            del net.values[:]
        del rewards_all

    return [build_ctrl_fn(net, train=False) for net in nets]
Beispiel #16
0
def finish_episode():
    R = 0
    saved_actions = model.saved_actions
    value_loss = 0
    rewards = []
    for r in model.rewards[::-1]:
        R = r + args.gamma * R
        rewards.insert(0, R)
    rewards = torch.Tensor(rewards)
    rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps)
    for (action, value), r in zip(saved_actions, rewards):
        reward = r - value.data[0,0]
        action.reinforce(reward)
        value_loss += F.smooth_l1_loss(value, Variable(torch.Tensor([r])))
    optimizer.zero_grad()
    final_nodes = [value_loss] + list(map(lambda p: p.action, saved_actions))
    gradients = [torch.ones(1)] + [None] * len(saved_actions)
    autograd.backward(final_nodes, gradients)
    optimizer.step()
    del model.rewards[:]
    del model.saved_actions[:]
Beispiel #17
0
def finish_episode():
    R = 0
    saved_actions = model.saved_actions
    policy_losses = []
    value_losses = []
    rewards = []
    for r in model.rewards[::-1]:
        R = r + args.gamma * R
        rewards.insert(0, R)
    rewards = torch.Tensor(rewards)
    rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps)
    for (log_prob, value), r in zip(saved_actions, rewards):
        reward = r - value.data[0]
        policy_losses.append(-log_prob * reward)
        value_losses.append(F.smooth_l1_loss(value, Variable(torch.Tensor([r]))))
    optimizer.zero_grad()
    loss = torch.stack(policy_losses).sum() + torch.stack(value_losses).sum()
    loss.backward()
    optimizer.step()
    del model.rewards[:]
    del model.saved_actions[:]
Beispiel #18
0
def finish_episode():
    R = 0
    saved_actions = model.saved_actions
    policy_losses = []
    value_losses = []
    returns = []
    for r in model.rewards[::-1]:
        R = r + args.gamma * R
        returns.insert(0, R)
    returns = torch.tensor(returns)
    returns = (returns - returns.mean()) / (returns.std() + eps)
    for (log_prob, value), R in zip(saved_actions, returns):
        advantage = R - value.item()
        policy_losses.append(-log_prob * advantage)
        value_losses.append(F.smooth_l1_loss(value, torch.tensor([R])))
    optimizer.zero_grad()
    loss = torch.stack(policy_losses).sum() + torch.stack(value_losses).sum()
    loss.backward()
    optimizer.step()
    del model.rewards[:]
    del model.saved_actions[:]
    def update_q(self, state, action, next_state):
        self.model.eval()

        variable = Variable(self.create_input(state))
        qvalue = self.model(variable)[action]

        next_variable = Variable(self.create_input(next_state))
        next_qvalue_max = self.model(next_variable).max()

        if next_state == GOAL:
            print('goal')
            target = qvalue + ETA * (1 - qvalue)
        else:
            target = qvalue + ETA * (GAMMA * next_qvalue_max - qvalue)

        self.model.train()

        loss = F.smooth_l1_loss(qvalue, Variable(target))
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
    def update_policy(self, history, episode):
        self.model.train()

        rewards = np.zeros((len(history)))
        targets = np.zeros((len(history), NUM_ACTION))
        for i, entry in enumerate(history):
            rewards[i] = entry[2]
            targets[i] = entry[4]
            
        discounted_rewards = self.discount_reward(rewards)
        targets = targets * discounted_rewards

        targets.reshape(-1, NUM_ACTION)
        targets = torch.tensor(targets, dtype=torch.float32)

        self.optimizer.zero_grad()
        for i, entry in enumerate(history):
            # print(entry)
            loss = F.smooth_l1_loss(entry[3], targets[i])
            loss.backward()

        self.optimizer.step()
def optimize_model():
    if len(memory) < BATCH_SIZE:
        return
    transitions = memory.sample(BATCH_SIZE)
    # Transpose the batch (see http://stackoverflow.com/a/19343/3343043 for
    # detailed explanation).
    batch = Transition(*zip(*transitions))

    # Compute a mask of non-final states and concatenate the batch elements
    non_final_mask = torch.tensor(tuple(map(lambda s: s is not None,
                                          batch.next_state)), device=device, dtype=torch.uint8)
    non_final_next_states = torch.cat([s for s in batch.next_state
                                                if s is not None])
    state_batch = torch.cat(batch.state)
    action_batch = torch.cat(batch.action)
    reward_batch = torch.cat(batch.reward)

    # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
    # columns of actions taken
    state_action_values = policy_net(state_batch).gather(1, action_batch)

    # Compute V(s_{t+1}) for all next states.
    next_state_values = torch.zeros(BATCH_SIZE, device=device)
    next_state_values[non_final_mask] = target_net(non_final_next_states).max(1)[0].detach()
    # Compute the expected Q values
    expected_state_action_values = (next_state_values * GAMMA) + reward_batch

    # Compute Huber loss
    loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1))

    # Optimize the model
    optimizer.zero_grad()
    loss.backward()
    for param in policy_net.parameters():
        param.grad.data.clamp_(-1, 1)
    optimizer.step()
Beispiel #22
0
def point_form_loss(boxes_p, conf_p, boxes_gt, conf_gt, priors,
                    match_threshold = 0.5, negative_odd = 3., verbose=True):
    '''
    boxes_predict, boxes_gt are point-form tensor/list come from network/ground truth.
    
    boxes_p: (batch_size, num_priors, 4)
    conf_p: (batch_size, num_priors, num_classes)
    boxes_gt: [[(4),...],...] (length=batch_size)
    conf_gt: [[(1),...],...] (length=batch_size)
    priors: point-form priors (num_priors, 4)
    
    Every boxes_p will be mathched with boxes_gt. If one IoU is less than
    '''
    batch_size = boxes_p.shape[0]    
    
    loc_losses = torch.zeros(batch_size)
    conf_losses = torch.zeros(batch_size)
    for i, (b_p, c_p, b_g_list, c_g_list) in enumerate(zip(boxes_p, conf_p, boxes_gt, conf_gt)):
        # For every entries in a batch, is it true to write it in this form?
        #if len(b_g_list.squeeze()) == 0:
        if b_g_list.numel() == 0:
            continue # skip if no ground truth box , 
            # Yes, we will not use the background in here to train, considering the requirement of Hard Negative Mining.
        
        b_g = b_g_list.type(torch.float) # (num_gt, 4) 
        #iou_mat = iou(b_p, b_g) # (num_priors, num_gt)
        iou_mat = iou(priors, b_g) # use priors insted of b_p to ensure strong matching
        iou_max_value, iou_max_idx = torch.max(iou_mat, 1) # (num_priors)
        mask_p = iou_max_value > match_threshold # (num_priors) bool
        
        if mask_p.sum() == 0:
            if verbose:
                print("Prior box grid fail to match a given bbox. Maybe you need redesign network or redefine the transforms.")
            continue
        
        b_p_masked = b_p[mask_p,:]
        b_g_selected = torch.index_select(b_g, 0, iou_max_idx)
        b_g_selected = b_g_selected[mask_p,:]
        
        # Localization loss
        loc_losses[i] = F.smooth_l1_loss(b_p_masked, b_g_selected)
        
        # Confidence loss
        # network will output [0, num_class](0 denote background) For switch data, 
        # it will be{0,1,2}. But the conf_gt is [0, num_class-1](0 denotes smoe class
        # of object).
        
        '''
        # Confidence loss without Hard Negative Mining
        c_g = c_g_list + 1 #(num_gt) int
        c_g_selected = torch.index_select(c_g, 0, iou_max_idx)
        c_g_selected[~mask] = 0 # set background class
        
        conf_losses[i] = F.cross_entropy(c_p, c_g_selected.long()) # Why int32 int64 does matter???        
        '''
        
        # Confidence loss with Hard Negative Mining
        c_g = c_g_list + 1 #(num_gt) int
        c_g_selected = torch.index_select(c_g, 0, iou_max_idx)
        c_g_selected[~mask_p] = 0 # set background class
        
        num_negative = (mask_p.sum().float() * negative_odd).floor().long()
        iou_max_value_n = iou_max_value[~mask_p]
        _, negative_idx = iou_max_value_n.sort(descending=True)
        _, negative_rank = negative_idx.sort()
        mask_n = negative_rank < num_negative
                
        loss_p = F.cross_entropy(c_p[mask_p,:], c_g_selected[mask_p].long())
        loss_n = F.cross_entropy(c_p[~mask_p][mask_n,:], c_g_selected[~mask_p][mask_n].long())
        
        conf_losses[i] = loss_p + loss_n 
        # It's not equivalent to [mask_p | mask_n,:] since cross_entropy control size.
    
    loc_loss = loc_losses.sum() 
    conf_loss = conf_losses.sum() 
    #loss = loc_loss + alpha * conf_loss
    
    return loc_loss, conf_loss
    
#point_form_loss = point_form_loss_matched_with_priors
Beispiel #23
0
def optimize_model(optimizer,
                   policy_net,
                   target_net,
                   memory,
                   device,
                   GAMMA=0.99,
                   BATCH_SIZE=32,
                   n_steps=20,
                   double_dqn=False):
    torch.autograd.set_detect_anomaly(True)
    if len(memory) < BATCH_SIZE:
        return
    transitions, idx = memory.sample()
    """
    zip(*transitions) unzips the transitions into
    Transition(*) creates new named tuple
    batch.state - tuple of all the states (each state is a tensor)
    batch.next_state - tuple of all the next states (each state is a tensor)
    batch.reward - tuple of all the rewards (each reward is a float)
    batch.action - tuple of all the actions (each action is an int)    
    """
    batch = Transition(*zip(*transitions))

    actions = tuple((map(lambda a: torch.tensor([[a]], device=device),
                         batch.action)))
    rewards = tuple((map(lambda r: torch.tensor([r], device=device),
                         batch.reward)))

    non_final_mask = torch.tensor(tuple(
        map(lambda s: s is not None, batch.next_state)),
                                  device=device,
                                  dtype=torch.bool)

    non_final_next_states = torch.cat(
        [s for s in batch.next_state if s is not None]).to(device)

    state_batch = torch.cat(batch.state).to(device)
    action_batch = torch.cat(actions)
    reward_batch = torch.cat(rewards)

    state_action_values = policy_net(state_batch).gather(1, action_batch)

    if n_steps == 1:

        next_state_values = torch.zeros(BATCH_SIZE, device=device)
        if double_dqn:

            max_action = policy_net(non_final_next_states).max(
                1, keepdim=True)[1].detach()
            next_state_values[non_final_mask] = target_net(
                non_final_next_states).gather(1,
                                              max_action).squeeze(1).detach()

        else:
            next_state_values[non_final_mask] = target_net(
                non_final_next_states).max(1)[0].detach()

        # next_state_values.requires_grad = False

        expected_state_action_values = (next_state_values *
                                        GAMMA) + reward_batch

    else:
        expected_state_action_values = nstep_target(idx=idx,
                                                    policy_net=policy_net,
                                                    target_net=target_net,
                                                    steps=n_steps,
                                                    memory=memory,
                                                    device=device,
                                                    double_dqn=double_dqn)

    loss = F.smooth_l1_loss(state_action_values,
                            expected_state_action_values.unsqueeze(1))

    optimizer.zero_grad()
    loss.backward()

    for param in policy_net.parameters():
        param.grad.data.clamp_(-1, 1)
    optimizer.step()

    return policy_net
    def forward(self, spt_ms, spt_rgb, qry_ms, qry_rgb, epoch):
        """
        :b:             number of tasks/batches.
        :setsz:         number of training pairs?
        :querysz        number of test pairs for few shot
        :param spt_ms:    [task_num, setsz, 16, h, w]
        :param spt_rgb:   [task_num, querysz, 3, h, w] 
        :param qry_ms:    [task_num, setsz, 16, h, w]
        :param qry_rgb:   [task_num, querysz, 3, h, w]

        :return:
        """

        spt_ms = spt_ms.squeeze()
        spt_rgb = spt_rgb.squeeze()
        qry_ms = qry_ms.squeeze()
        qry_rgb = qry_rgb.squeeze()

        task_num, setsz, c, h, w = spt_ms.size()
        _, querysz, c, _, _ = qry_ms.size()
        # losses_q[k] is the loss on step k of gradient descent (inner loop)
        losses_q = [0 for _ in range(self.update_step + 1)]
        # accuracy on step i of gradient descent (inner loop)
        corrects = [0 for _ in range(self.update_step + 1)]
        if (epoch < 4001):
            if (epoch % 2000 == 0) and (epoch > 1):
                decay = 2  #(epoch // 5) + 1
                self.update_lr = self.update_lr / decay
        print('outer loop lr is: ', self.update_lr)
        for i in range(task_num):

            # 1. run the i-th task and compute loss for k=0, k is update step
            logits = self.net(spt_ms[i], vars=None, bn_training=True)
            loss = F.smooth_l1_loss(logits, spt_rgb[i])
            # create a log with task_num x k
            #print(loss.item())
            # the sum of graidents of outputs w.r.t the input
            grad = torch.autograd.grad(loss, self.net.parameters())
            fast_weights = list(
                map(lambda p: p[1] - self.update_lr * p[0],
                    zip(grad, self.net.parameters())))
            # what are these two torch.no_grad()s about?????????????????????
            # the first one calculates accuracy right after initialization
            # which makes sense, the second one is doing an update...why?????
            # this is the loss and accuracy before first update
            with torch.no_grad():
                # [setsz, nway]
                logits_q = self.net(qry_ms[i],
                                    self.net.parameters(),
                                    bn_training=True)
                loss_q = F.smooth_l1_loss(logits_q, qry_rgb[i])
                losses_q[0] += loss_q  # adding loss?!

                pred_q = logits_q  # logits_q used to be cross_entropy loss, and
                # go through softmax to become pred_q.
                # calculate PSNR
                correct = errors.find_psnr(pred_q, qry_rgb[i])
                corrects[0] = corrects[0] + correct

            # this is the loss and accuracy after the first update
            with torch.no_grad():
                # [setsz, nway]
                logits_q = self.net(qry_ms[i], fast_weights, bn_training=True)
                loss_q = F.smooth_l1_loss(logits_q, qry_rgb[i])
                losses_q[1] += loss_q
                # [setsz]
                pred_q = logits_q
                correct = errors.find_psnr(pred_q, qry_rgb[i])
                corrects[1] = corrects[1] + correct

            for k in range(1, self.update_step):
                # 1. run the i-th task and compute loss for k=1~K-1
                logits = self.net(spt_ms[i], fast_weights, bn_training=True)
                loss = F.smooth_l1_loss(logits, spt_rgb[i])
                # 2. compute grad on theta_pi
                grad = torch.autograd.grad(loss, fast_weights)
                # 3. theta_pi = theta_pi - train_lr * grad
                fast_weights = list(
                    map(lambda p: p[1] - self.update_lr * p[0],
                        zip(grad, fast_weights)))

                logits_q = self.net(qry_ms[i], fast_weights, bn_training=True)
                self.valid_img = logits_q
                # loss_q will be overwritten and we just keep the loss_q on
                # last update step ==> losses_q[-1]
                loss_q = F.smooth_l1_loss(logits_q, qry_rgb[i])
                losses_q[k + 1] += loss_q

                with torch.no_grad():
                    pred_q = logits_q
                    # convert to numpy
                    correct = errors.find_psnr(pred_q, qry_rgb[i])
                    corrects[k + 1] = corrects[k + 1] + correct

        # end of all tasks
        # sum over all losses on query set across all tasks
        loss_q = losses_q[-1] / task_num
        # self.log[-1] += loss.item()
        # optimize theta parameters
        # In the Learner the update is with respect to accuracy of the training
        # set, but for meta_learner the meta_update is with respect to the test
        # set of each episode.
        self.meta_optim.zero_grad()
        loss_q.backward()  # backwards through grad above ==> d(loss_q)/d(grad)
        # print('meta update')
        # for p in self.net.parameters()[:5]:
        # 	print(torch.norm(p).item())
        self.meta_optim.step()
        accs = np.average(np.array(corrects[-1]))  #/ (querysz * task_num)
        print('inner loop lr is: ', self.get_lr(self.meta_optim))
        return accs, loss_q
Beispiel #25
0
    y = f(x)
    return x, y


# Define model
fc = torch.nn.Linear(W_target.size(0), 1)

for batch_idx in count(1):
    # Get data
    batch_x, batch_y = get_batch()

    # Reset gradients
    fc.zero_grad()

    # Forward pass
    output = F.smooth_l1_loss(fc(batch_x), batch_y)
    loss = output.item()

    # Backward pass
    output.backward()

    # Apply gradients
    for param in fc.parameters():
        param.data.add_(-0.1 * param.grad.data)

    # Stop criterion
    if loss < 1e-3:
        break

print('Loss: {:.6f} after {} batches'.format(loss, batch_idx))
print('==> Learned function:\t' + poly_desc(fc.weight.view(-1), fc.bias))
def _td_loss(x_t, x_diff_t, x_tp1, terminal, discount_factor):
    x_target = x_diff_t + (1 - terminal.float()) * discount_factor * x_tp1
    return functional.smooth_l1_loss(x_t, x_target)
Beispiel #27
0
def main():
    env = gym.make(config.ENV_NAME)
    agent = DQN(env)
    optimizer = optim.Adam(agent.parameters(), lr=0.001)
    finished = False

    for epoch in range(config.EPOCHS):
        state = env.reset()
        for step in range(config.ITERATIONS):
            action = agent.get_action(state, 'egreedy')
            next_state, reward, done, _ = env.step(action[0, 0])
            if done:
                reward = -1
            agent.replay_memory.push(Transition(
                config.FloatTensor([state]),
                action,
                config.FloatTensor([reward]),
                config.FloatTensor([next_state]) if not done else None))
            state = next_state
            if len(agent.replay_memory) >= config.BATCH_SIZE:
                batch = agent.replay_memory.sample(config.BATCH_SIZE)
                batch = Transition(*zip(*batch))
                non_final_mask = config.ByteTensor(
                    [s is not None for s in batch.next_state])
                non_final_next_state_batch = Variable(torch.cat([
                    s for s in batch.next_state if s is not None]))

                state_batch = Variable(torch.cat(batch.state),
                                       requires_grad=False)
                action_batch = Variable(torch.cat(batch.action).view(-1, 1),
                                        requires_grad=False)
                reward_batch = Variable(torch.cat(batch.reward),
                                        requires_grad=False)

                q_values = agent(state_batch).gather(1, action_batch)
                s_values = Variable(torch.zeros(config.BATCH_SIZE).type(
                    config.FloatTensor), requires_grad=False)
                s_values[non_final_mask] = agent(
                    non_final_next_state_batch).max(1)[0]
                expected_q_values = config.GAMMA * s_values + reward_batch
                loss = F.smooth_l1_loss(torch.sum(q_values),
                                        torch.sum(expected_q_values))
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            if done:
                break
        agent.epsilon = config.EPSILON_START - epoch / config.EPOCHS * (
            config.EPSILON_START - config.EPSILON_END)
        if epoch % config.TEST_INTERVAL == 0:
            sum_reward = 0
            for _epoch in range(config.TEST_EPOCHS):
                epoch_reward = 0
                state = env.reset()
                for step in range(config.TEST_ITERATIONS):
                    # env.render()
                    action = agent.get_action(state)  # Default
                    state, reward, done, _ = env.step(action[0, 0])
                    if done:
                        break
                    epoch_reward += reward
                sum_reward += epoch_reward
            avg_reward = sum_reward / config.TEST_EPOCHS
            print('Epoch: {}, Average Reward: {}'.format(epoch, avg_reward))
            print('Current Epsilon:', agent.epsilon)
            if avg_reward > 195:
                finished = True
        if finished:
            break

    while True:
        state = env.reset()
        round_reward = 0
        for step in range(config.TEST_ITERATIONS):
            env.render()
            action = agent.get_action(state)  # Default
            state, reward, done, _ = env.step(action[0, 0])
            if done:
                break
            round_reward += reward
        print('Round reward:', round_reward)
    def forward(self, predictions, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)
            targets (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        loc_data, conf_data, priors = predictions
        num = loc_data.size(0)
        priors = priors[:loc_data.size(1), :]
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        pos = conf_t > 0
        num_pos = pos.sum(dim=1, keepdim=True)

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c = loss_c.view(num, -1)
        loss_c[pos] = 0  # filter out pos boxes for now
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N

        N = num_pos.data.sum().double()
        loss_l = loss_l.double()
        loss_c = loss_c.double()
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c
Beispiel #29
0
    def learn(self, entropy=0):
        state_arr, state_prime_arr,action_arr, old_probs_arr, vals_arr, \
                reward_arr, hidden_in,hidden_out, dones_arr, batches = \
                    self.memory.generate_batches()

        state_arr = T.from_numpy(state_arr).float()
        state_prime_arr = T.from_numpy(state_prime_arr).float()
        action_arr = T.from_numpy(action_arr).float()
        old_probs_arr = T.from_numpy(old_probs_arr).float()
        vals_arr = T.from_numpy(vals_arr).float()
        reward_arr = T.from_numpy(reward_arr).float().unsqueeze(1)
        dones_arr = T.from_numpy(dones_arr).float().unsqueeze(1)

        first_hidden = hidden_in[0].detach()
        second_hidden = hidden_out[0].detach()
        for _ in range(self.n_epochs):
            v_prime = self.get_value(state_prime_arr, second_hidden).squeeze(1)
            td_target = reward_arr + self.gamma * v_prime * dones_arr
            # ic(v_prime.shape)
            # ic(td_target.shape)
            v_s = self.get_value(state_arr, first_hidden).squeeze(1)
            # ic(v_s.shape)
            delta = td_target - v_s
            # ic(delta.shape)
            delta = delta.detach().numpy()

            advantage_lst = []
            advantage = 0.0
            for item in delta[::-1]:
                advantage = self.gamma * self.gae_lambda * advantage + item[0]
                advantage_lst.append([advantage])
            advantage_lst.reverse()
            advantage = T.tensor(advantage_lst, dtype=T.float)
            # print("Advantage",advantage.shape)
            pi = self.get_prob(state_arr, first_hidden).probs

            # ic(action_arr.shape)
            # ic(pi.shape)
            pi_a = pi.squeeze(1).gather(1, action_arr.unsqueeze(1).long())
            # ic(pi_a.shape)
            # ic(old_probs_arr.shape)
            ratio = T.exp(T.log(pi_a) - old_probs_arr.unsqueeze(1))
            # ic(ratio.shape)
            surr1 = ratio * advantage
            # ic(advantage.shape)
            # print(advantage.shape,ratio.shape,ratio.mean(),advantage.mean())
            surr2 = T.clamp(ratio, 1 - self.policy_clip,
                            1 + self.policy_clip) * advantage
            # ic(surr1.shape,surr2.shape)
            # exit()
            aloss = -T.min(surr1, surr2)
            closs = F.smooth_l1_loss(v_s, td_target.detach())
            loss = aloss + closs
            # wandb.log({"surr1":surr1.mean(),
            #             "surr2":surr2.mean(),
            #             "KL divergence":ratio.mean(),
            #             "total_loss":loss.mean(),
            #             "aloss": aloss.mean(),
            #             "closs": closs.mean(),
            #             "entropy":entropy
            # })
            # # print(f"surr1 {surr1.mean()} surr2 {surr2.mean()} ","KL divergence",ratio.mean().item(),f"aloss: {aloss.mean()} closs: {closs.mean()}")
            # exit()
            self.actor.optimizer.zero_grad()
            self.critic.optimizer.zero_grad()
            loss.mean().backward(retain_graph=True)
            self.actor.optimizer.step()
            self.critic.optimizer.step()
        self.memory.clear_memory()
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data = predictions
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:,:-1].data
            labels = targets[idx][:,-1].data
            defaults = priors.data
            match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t,requires_grad=False)

        pos = conf_t > 0
        num_pos = pos.sum(dim=1, keepdim=True)

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1,4)
        loc_t = loc_t[pos_idx].view(-1,4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')

        #GIoU
        # transform_weights = (10.,10.,10.,10.)
        # x1p,y1p,x2p,y2p = self.bbox_transform(loc_p,transform_weights)
        # x1gt,y1gt,x2gt,y2gt = self.bbox_transform(loc_t,transform_weights)

        # #For predicted box Bp, ensuring x2p > x1p and y2p > y1p
        # x1p_hat = torch.min(x1p,x2p)
        # x2p_hat = torch.max(x1p,x2p)
        # y1p_hat = torch.min(y1p,y2p)
        # y2p_hat = torch.max(y1p,y2p)

        # #Ensuring x2g > x1g and y2g > y1g
        # x1g = torch.min(x1gt,x2gt)
        # x2g = torch.max(x1gt,x2gt)
        # y1g = torch.min(y1gt,y2gt)
        # y2g = torch.max(y1gt,y2gt)

        # #Calculating area Bg : Ag = (x2gt - x1gt)*(y2gt - y1gt)
        # Ag = (x2g - x1g)*(y2g - y1g)

        # #Calculating area Bp : Ap = (x2p - x1p)*(y2p - y1p)
        # Ap = (x2p_hat - x1p_hat)*(y2p_hat - y1p_hat)

        # #Calculating intersection I between Bp and Bg
        # x1I = torch.max(x1p_hat,x1g)
        # x2I = torch.min(x2p_hat,x2g)
        # y1I = torch.max(y1p_hat,y1g)
        # y2I = torch.min(y2p_hat,y2g)

        # I=torch.zeros(loc_p.size(0))

        # for i in range(loc_p.size(0)):
        #     if(x2I[i] > x1I[i] and y2I[i] > y1I[i]):
        #         I[i] = (x2I[i] - x1I[i])*(y2I[i] - y1I[i])   

        # #Finding the coordinate of smallest enclosing box Bc
        # x1c = torch.min(x1p_hat,x1g)
        # x2c = torch.max(x2p_hat,x2g)
        # y1c = torch.min(y1p_hat,y1g)
        # y2c = torch.max(y2p_hat,y2g)

        # #Calculating area of Bc : Ac = (x2c - x1c)*(y2c - y1c)
        # Ac = (x2c - x1c)*(y2c - y1c)

        # #IoU = I/U, where U = Ap + Ag - I
        # U = Ap + Ag - I
        # IoU = I/U
        # GIoU = IoU - (Ac-U)/Ac

        # #Loss GIoU
        # loss_l = torch.sum(1-GIoU)
        # loss_l = loss_l.mean()

        #Focal Loss
        # conf_p = conf_data.view(-1,self.num_classes)
        # loss_c = self.f_loss(conf_p,conf_t) #Focal Loss

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1,self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1))

        # Hard Negative Mining
        loss_c[pos.view(-1,1)] = 0 # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _,loss_idx = loss_c.sort(1, descending=True)
        _,idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1,keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
        targets_weighted = conf_t[(pos+neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted,reduction='sum')#CATEGORICAL CROSS ENTROPY
        # loss_c = self.f_loss(conf_p,targets_weighted) #Focal Loss
 
        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = max(num_pos.data.sum().float(), 1)
        loss_l/=N
        loss_c/=N
        return loss_l,loss_c
Beispiel #31
0
                                    dtype=torch.float32).cuda()
    else:
        target_scores = torch.zeros([BATCH_SIZE, agent_num, 5],
                                    dtype=torch.float32)

    for j in range(BATCH_SIZE):
        if done[j] is not True:
            target_scores[j] = target_model(
                states[j, 1:].unsqueeze(0)).squeeze(0).max(1)[0].view(
                    agent_num, 1)  #(batch, agent, 1)

    # print(reward)

    target_scores = target_scores * 0.999 + reward

    loss = F.smooth_l1_loss(pred_scores, target_scores)
    optim.zero_grad()
    loss.backward()
    # for param in model.parameters():
    #     param.grad.data.clamp_(-1, 1)
    optim.step()

    if i % TARGET_UPDATE == 0:
        target_model.load_state_dict(model.state_dict())

    if (i + 1) % 100 == 0:
        print('Iter:%d | loss:%.4f | pred_scores:%.4f | target_scores:%.4f' %
              (i + 1, loss.item(), torch.mean(
                  pred_scores[0]).item(), torch.mean(target_scores[0]).item()))

    if (i + 1) % 100 == 0:
Beispiel #32
0
    def forward(self,
                src=None,
                tgt=None,
                src_lengths=None,
                tgt_lengths=None,
                bptt=False,
                batch=None):
        #import pdb; pdb.set_trace()
        self.asr.decoder.noise.sigma = 0.3
        self.nmt_encoder.eval()
        self.tts_encoder.eval()
        if batch is not None:
            src, src_lengths = batch.src
            src_txt, src_txt_lengths = batch.src_txt
            tgt_txt, tgt_txt_lengths = batch.tgt_txt
            tgt, tgt_lengths = batch.tgt

        #import pdb; pdb.set_trace()
        result = {}
        with torch.no_grad():
            self.asr.eval()
            asr_results = self.asr(src, src_txt, src_lengths, src_txt_lengths)
            result['asr_dec_out'] = asr_results['dec_out']
            result['asr_attns'] = asr_results['attns']

        _, memory_bank, memory_lengths = self.nmt.encode(
            asr_results['attns']['context'][:-1].detach(), src_txt_lengths - 2)
        #nmt_results = self.nmt(asr_results['attns']['context'][:-1].detach(), tgt_txt, src_txt_lengths, tgt_txt_lengths)
        result['trans'] = memory_bank
        with torch.no_grad():
            _, nmt_memory, _ = self.nmt_encoder(src_txt[1:-1],
                                                src_txt_lengths - 2)
            result['trans_tgt'] = nmt_memory.detach()
            if F.smooth_l1_loss(result['trans'],
                                result['trans_tgt']).item() > 0.005:
                memory_bank = nmt_memory
                #return result

        dec_out, attns = self.nmt.decoder(tgt_txt[:-1],
                                          memory_bank,
                                          memory_lengths=memory_lengths,
                                          tgt_lengths=tgt_txt_lengths)
        result['nmt_dec_out'] = dec_out
        result['nmt_attns'] = attns

        _, memory_bank, memory_lengths = self.tts.encoder(
            result['nmt_attns']['context'][:-1], tgt_txt_lengths)
        result['trans2'] = memory_bank
        with torch.no_grad():
            _, tts_memory, _ = self.tts_encoder(tgt_txt[1:-1],
                                                tgt_txt_lengths - 2)
            result['trans2_tgt'] = tts_memory.detach()
            if F.smooth_l1_loss(result['trans2'],
                                result['trans2_tgt']).item() > 0.005:
                memory_bank = nmt_memory
                #return result
        dec_out, attns = self.tts.decoder(tgt[:-1],
                                          memory_bank,
                                          memory_lengths=memory_lengths,
                                          tgt_lengths=tgt_lengths)
        result['tts_dec_out'] = dec_out
        result['tts_attns'] = attns
        return result
Beispiel #33
0
    y = f(x)
    return x, y


#%% 声明模型
fc = torch.nn.Linear(W_target.size(0), 1)

for batch_idx in count(1):
    # 获取数据
    batch_x, batch_y = get_batch()

    # 重置求导
    fc.zero_grad()

    # 前向传播
    output = F.smooth_l1_loss(fc(batch_x), batch_y)
    loss = output.item()

    # 后向传播
    output.backward()

    # 应用导数
    for param in fc.parameters():
        param.data.add_(-0.1 * param.grad.data)

    # 停止条件
    if loss < 1e-3:
        plt.cla()
        plt.scatter(batch_x.data.numpy()[:, 0],
                    batch_y.data.numpy()[:, 0],
                    label='real curve',
Beispiel #34
0
    def train(self, model, data):
        tot_loss_lst = []
        pi_loss_lst = []
        entropy_lst = []
        move_entropy_lst = []
        v_loss_lst = []

        # to calculate fixed advantages before update
        data_with_adv = []
        for mini_batch in data:
            s, a, m, r, s_prime, done_mask, prob, need_move = mini_batch
            with torch.no_grad():
                pi, pi_move, v, _ = model(s)
                pi_prime, pi_m_prime, v_prime, _ = model(s_prime)

            td_target = r + self.gamma * v_prime * done_mask
            delta = td_target - v                           # [horizon * batch_size * 1]
            delta = delta.detach().cpu().numpy()

            advantage_lst = []
            advantage = np.array([0])
            for delta_t in delta[::-1]:
                advantage = self.gamma * self.lmbda * advantage + delta_t           
                advantage_lst.append(advantage)
            advantage_lst.reverse()
            advantage = torch.tensor(advantage_lst, dtype=torch.float, device=model.device)

            data_with_adv.append((s, a, m, r, s_prime, done_mask, prob, need_move, td_target, advantage))

        for i in range(self.K_epoch):
            for mini_batch in data_with_adv:
                s, a, m, r, s_prime, done_mask, prob, need_move, td_target, advantage = mini_batch
                pi, pi_move, v, _ = model(s)
                pi_prime, pi_m_prime, v_prime, _ = model(s_prime)

                pi_a = pi.gather(2,a)
                pi_m = pi_move.gather(2,m)
                pi_am = pi_a*(1-need_move + need_move*pi_m)
                ratio = torch.exp(torch.log(pi_am) - torch.log(prob))  # a/b == exp(log(a)-log(b))

                surr1 = ratio * advantage
                surr2 = torch.clamp(ratio, 1-self.eps_clip, 1+self.eps_clip) * advantage
                entropy = -torch.log(pi_am)
                move_entropy = -need_move*torch.log(pi_m)

                surr_loss = -torch.min(surr1, surr2)
                v_loss = F.smooth_l1_loss(v, td_target.detach())
                entropy_loss = -1*self.entropy_coef*entropy
                loss = surr_loss + v_loss + entropy_loss.mean()
                loss = loss.mean()

                model.optimizer.zero_grad()
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), self.grad_clip)
                model.optimizer.step()

                tot_loss_lst.append(loss.item())
                pi_loss_lst.append(surr_loss.mean().item())
                v_loss_lst.append(v_loss.item())
                entropy_lst.append(entropy.mean().item())
                n_need_move = torch.sum(need_move).item()
                if n_need_move == 0:
                    move_entropy_lst.append(0)
                else:
                    move_entropy_lst.append((torch.sum(move_entropy)/n_need_move).item())
        return np.mean(tot_loss_lst), np.mean(pi_loss_lst), np.mean(v_loss_lst), np.mean(entropy_lst), np.mean(move_entropy_lst)
    def part_forward(self, predictions, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            targets (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        loc_data, conf_data, priors = predictions
    
        num = loc_data.size(0)
        priors = priors[:loc_data.size(1), :]
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            defaults = priors.data
            # sft match strategy , swordli
            if ac: 
                sfd_match(self.threshold, truths, defaults, self.variance, labels,
                      loc_t, conf_t, idx)
            else:
                match(self.threshold, truths, defaults, self.variance, labels,
                      loc_t, conf_t, idx)
           
        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_targets = Variable(loc_t, requires_grad=False)
        conf_targets = Variable(conf_t, requires_grad=False)

        ############# Localization Loss part ##############
        pos = conf_targets > 0 # ignore background
        num_pos = pos.long().sum(1, keepdim = True)
        
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_targets[pos_idx].view(-1, 4)
        loc_loss = F.smooth_l1_loss(loc_p, loc_t, size_average = False)

       ############### Confiden Loss part ###############
        """
        #focal loss implementation(1)
        pos_cls = conf_targets > -1 # exclude ignored anchors
        mask = pos_cls.unsqueeze(2).expand_as(conf_preds)
        conf_p = conf_preds[mask].view(-1, conf_preds.size(2)).clone()
        conf_t = conf_targets[pos_cls].view(-1).clone()
        p = F.softmax(conf_p, 1)
        p = p.clamp(1e-7, 1. - 1e-7) # to avoid loss going to inf
        c_mask = conf_p.data.new(conf_p.size(0), conf_p.size(1)).fill_(0)
        c_mask = Variable(c_mask)
        ids = conf_t.view(-1, 1)
        c_mask.scatter_(1, ids, 1.)
        p_t = (p*c_mask).sum(1).view(-1, 1)
        p_t_log = p_t.log()
        # This is focal loss presented in ther paper eq(5)
        conf_loss = -self.alpha * ((1 - p_t)**self.gamma * p_t_log)
        conf_loss = conf_loss.sum()
        """
        # focal loss implementation(2)
        pos_cls = conf_targets >-1
        mask = pos_cls.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[mask].view(-1, conf_data.size(2)).clone()
        p_t_log = -F.cross_entropy(conf_p, conf_targets[pos_cls], size_average = False)
        p_t = torch.exp(p_t_log)
        # This is focal loss presented in the paper eq(5)
        conf_loss = -self.alpha * ((1 - p_t)**self.gamma * p_t_log)

        N = max(1 , num_pos.data.sum()) # to avoid divide by 0. It is caused by data augmentation when crop the images. The cropping can distort the boxes 
        conf_loss /= N # exclude number of background?
        loc_loss /= N
        return conf_loss, loc_loss
Beispiel #36
0
def regression_loss(logits, labels, seq_len, loss_type, normalize_indices,
                    var_lambda):
    """Loss function based on regressing to the correct indices.
  In the paper, this is called Cycle-back Regression. There are 3 variants
  of this loss:
  i) regression_mse: MSE of the predicted indices and ground truth indices.
  ii) regression_mse_var: MSE of the predicted indices that takes into account
  the variance of the similarities. This is important when the rate at which
  sequences go through different phases changes a lot. The variance scaling
  allows dynamic weighting of the MSE loss based on the similarities.
  iii) regression_huber: Huber loss between the predicted indices and ground
  truth indices.
  Args:
    logits: Tensor, Pre-softmax similarity scores after cycling back to the
      starting sequence.
    labels: Tensor, One hot labels containing the ground truth. The index where
      the cycle started is 1.
    num_steps: Integer, Number of steps in the sequence embeddings.
    steps: Tensor, step indices/frame indices of the embeddings of the shape
      [N, T] where N is the batch size, T is the number of the timesteps.
    seq_lens: Tensor, Lengths of the sequences from which the sampling was done.
      This can provide additional temporal information to the alignment loss.
    loss_type: String, This specifies the kind of regression loss function.
      Currently supported loss functions: regression_mse, regression_mse_var,
      regression_huber.
    normalize_indices: Boolean, If True, normalizes indices by sequence lengths.
      Useful for ensuring numerical instabilities don't arise as sequence
      indices can be large numbers.
    variance_lambda: Float, Weight of the variance of the similarity
      predictions while cycling back. If this is high then the low variance
      similarities are preferred by the loss while making this term low results
      in high variance of the similarities (more uniform/random matching).
    huber_delta: float, Huber delta described in tf.keras.losses.huber_loss.
  Returns:
     loss: Tensor, A scalar loss calculated using a variant of regression.
  """
    # logits (bs, seq_len, seq_len), labels (bs, seq_len)

    # steps of shape (bs, seq_len, seq_len) are indexes
    steps = torch.arange(seq_len)[None, None, :].expand_as(logits)
    steps = steps.float()

    beta = F.softmax(logits, dim=2)
    true_time = labels
    # pred_time of shape (bs, seq_len)
    pred_time = torch.sum(steps * beta, 2)

    if loss_type in ['regression_mse', 'regression_mse_var']:
        if 'var' in loss_type:
            tiled_pred_time = pred_time.unsqueeze(1).expand(-1, seq_len, -1)
            # Variance aware regression.
            # pred_time_variance of shape (batch_size, seq_len)
            pred_time_variance = torch.sum(
                torch.pow(steps - tiled_pred_time, 2) * beta, 2)

            # Using log of variance as it is numerically stabler.
            pred_time_log_var = torch.log(pred_time_variance)
            squared_error = torch.pow(true_time - pred_time, 2)
            return torch.mean(
                torch.exp(-pred_time_log_var) * squared_error +
                var_lambda * pred_time_log_var)

        else:
            return F.mse_loss(pred_time, true_time)
    elif loss_type == 'regression_huber':
        return F.smooth_l1_loss(pred_time, true_time)
    else:
        raise ValueError(
            'Unsupported regression loss %s. Supported losses are: '
            'regression_mse, regresstion_mse_var and regression_huber.' %
            loss_type)
Beispiel #37
0
    def optimize():
        ### Perform experience replay and train the network.
        nonlocal last_sync

        if len(memory) < BATCH_SIZE:
            return

        transitions = memory.sample(BATCH_SIZE)
        # Use the replay buffer to sample a batch of transitions

        batch = Transition(*zip(*transitions))

        # batch.state is a tuple of states
        # batch.action is a tuple os actions
        # batch.reward is a tuple of rewards

        state_batch = Variable(torch.cat(batch.state)).float()
        action_batch = Variable(torch.cat(batch.action)).long()
        reward_batch = Variable(torch.cat(batch.reward)).float()

        non_final_mask = torch.ByteTensor(
            tuple(map(lambda s: s is not None, batch.next_state)))

        non_final_next_states = Variable(torch.cat(
            [s for s in batch.next_state if s is not None]),
                                         volatile=True).float()
        # print(type(non_final_next_states.data))
        # Compute current Q value, takes only state and output value for every state-action pair
        # We choose Q based on action taken.
        # save_image(state_batch.data,"before_conv.png")
        state_action_values = dqn(state_batch).gather(1, action_batch)
        # Compute next Q value based on which action gives max Q values
        next_state_values = Variable(torch.zeros(BATCH_SIZE))
        next_state_values[non_final_mask] = dqn(non_final_next_states).max(
            1)[0]

        next_state_values.volatile = False
        # Compute the target of the current Q values
        expected_state_action_values = (next_state_values *
                                        GAMMA) + reward_batch

        # same as SmoothL1Loss
        # Creates a criterion that uses a squared term if
        # the absolute element-wise error falls below 1 and an L1 term otherwise.
        loss = F.smooth_l1_loss(state_action_values,
                                expected_state_action_values)

        # Clears the gradients of all optimized Variable
        optimizer.zero_grad()

        # Use autograd to compute the backward pass. This call will compute the
        # gradient of loss with respect to all Variables with requires_grad=True.
        # After this call w1.grad and w2.grad will be Variables holding the gradient
        # of the loss with respect to w1 and w2 respectively.
        loss.backward()

        #Clamps the gradients to (-1,1) in-place
        for param in dqn.parameters():
            param.grad.data.clamp_(-1, 1)

        optimizer.step()
Beispiel #38
0
    def forward(self, x, flip, loc_preds, loc_targets, cls_preds, cls_targets):
        '''Compute loss between (loc_preds, loc_targets) and (cls_preds, cls_targets).

        Args:
          loc_preds: (tensor) predicted locations, sized [batch_size, #anchors, 4].
          loc_targets: (tensor) encoded target locations, sized [batch_size, #anchors, 4].
          cls_preds: (tensor) predicted class confidences, sized [batch_size, #anchors, #classes].
          cls_targets: (tensor) encoded target labels, sized [batch_size, #anchors].

        loss:
          (tensor) loss = SmoothL1Loss(loc_preds, loc_targets) + FocalLoss(cls_preds, cls_targets).
        '''
        batch_size, num_boxes = cls_targets.size()
        pos = cls_targets > 0  # [N,#anchors]
        num_pos = pos.data.long().sum()
        # ==============================================================
        # weight
        # ==============================================================
        ''' 
	#####flip info
	n=flip.size()[0]
        iw=[]
        i=0
        while i<n:
            if flip[i]==1:
                y = x[i].data.cpu().numpy()
                y = np.flip(y,2)#left to right
                y = torch.from_numpy(y.copy())
                y = y.unsqueeze(0)
                y = Variable(y)
                z = self.features(y.cuda())
                r = z.size(3)       
                z = F.avg_pool2d(z, r)
                z = z.view(z.size(0), -1)
                z = F.relu(z)
                iw.append(torch.mean(z,1) )
            else:
                #train min 0.311, max 0.465  
                img = x[i].unsqueeze(0)
                z = self.features(img.cuda())
                r = z.size(3)       
                z = F.avg_pool2d(z, r)
                z = z.view(z.size(0), -1)
                z = F.relu(z)
                iw.append(torch.mean(z,1) )
                
            i+=1  
        iw=torch.cat(iw, dim=0) 
	'''
        #########no flip info
        z = self.features(x.cuda())
        r = z.size(3)
        z = F.avg_pool2d(z, r)
        z = z.view(z.size(0), -1)
        z = F.relu(z)
        iw = torch.mean(z, 1)

        #iw=(iw-0.311)*(1.0-0.5)/(0.465-0.311)+0.5#median 0.4497
        #iw=(iw-0.017)*(1.0-0.5)/(0.042-0.017)+0.5
        iw = (iw - 0.129) * (1.0 - 0.5) / (0.180 - 0.129) + 0.5  #block 5
        ################################################################
        # loc_loss = SmoothL1Loss(pos_loc_preds, pos_loc_targets)
        ################################################################
        mask = pos.unsqueeze(2).expand_as(loc_preds)  # [N,#anchors,4]
        masked_loc_preds = loc_preds[mask].view(-1, 4)  # [#pos,4]
        masked_loc_targets = loc_targets[mask].view(-1, 4)  # [#pos,4]
        loc_loss = F.smooth_l1_loss(masked_loc_preds,
                                    masked_loc_targets,
                                    size_average=False)

        ################################################################
        # cls_loss = FocalLoss(loc_preds, loc_targets)
        ################################################################
        pos_neg = cls_targets > -1  # exclude ignored anchors
        mask = pos_neg.unsqueeze(2).expand_as(cls_preds)
        masked_cls_preds = cls_preds[mask].view(-1, self.num_classes)
        cls_loss = self.focal_loss_alt(masked_cls_preds, cls_targets[pos_neg],
                                       iw)  #cls_targets[pos_neg].unsqueeze(1)

        #print('loc_loss: %.3f|cls_loss: %.3f|iw: %.2f' % (loc_loss.data[0]/num_pos, cls_loss.data[0]/num_pos, iw.data[0]), end=' | ')
        loss = (loc_loss + cls_loss) / num_pos
        return loss, loc_loss.data[0] / num_pos, cls_loss.data[
            0] / num_pos, iw.data[0]
Beispiel #39
0
    def __call__(self, proposals_with_gt, densepose_outputs, densepose_confidences):
        losses = {}
        # densepose outputs are computed for all images and all bounding boxes;
        # i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively,
        # the outputs will have size(0) == 3+1+2+1 == 7
        s, index_uv, u, v = densepose_outputs
        sigma_1, sigma_2, kappa_u, kappa_v = densepose_confidences
        conf_type = self.confidence_model_cfg.uv_confidence.type
        assert u.size(2) == v.size(2)
        assert u.size(3) == v.size(3)
        assert u.size(2) == index_uv.size(2)
        assert u.size(3) == index_uv.size(3)

        with torch.no_grad():
            (
                index_uv_img,
                i_with_dp,
                bbox_xywh_est,
                bbox_xywh_gt,
                index_gt_all,
                x_norm,
                y_norm,
                u_gt_all,
                v_gt_all,
                s_gt,
                index_bbox,
            ) = _extract_single_tensors_from_matches(  # noqa
                proposals_with_gt
            )
        n_batch = len(i_with_dp)

        # NOTE: we need to keep the same computation graph on all the GPUs to
        # perform reduction properly. Hence even if we have no data on one
        # of the GPUs, we still need to generate the computation graph.
        # Add fake (zero) loss in the form Tensor.sum() * 0
        if not n_batch:
            losses["loss_densepose_I"] = index_uv.sum() * 0
            losses["loss_densepose_S"] = s.sum() * 0
            if self.confidence_model_cfg.uv_confidence.enabled:
                losses["loss_densepose_UV"] = (u.sum() + v.sum()) * 0
                if conf_type == DensePoseUVConfidenceType.IID_ISO:
                    losses["loss_densepose_UV"] += sigma_2.sum() * 0
                elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO:
                    losses["loss_densepose_UV"] += (
                        sigma_2.sum() + kappa_u.sum() + kappa_v.sum()
                    ) * 0
            else:
                losses["loss_densepose_U"] = u.sum() * 0
                losses["loss_densepose_V"] = v.sum() * 0
            return losses

        zh = u.size(2)
        zw = u.size(3)

        (
            j_valid,
            y_lo,
            y_hi,
            x_lo,
            x_hi,
            w_ylo_xlo,
            w_ylo_xhi,
            w_yhi_xlo,
            w_yhi_xhi,
        ) = _grid_sampling_utilities(  # noqa
            zh, zw, bbox_xywh_est, bbox_xywh_gt, index_gt_all, x_norm, y_norm, index_bbox
        )

        j_valid_fg = j_valid * (index_gt_all > 0)

        u_gt = u_gt_all[j_valid_fg]
        u_est_all = _extract_at_points_packed(
            u[i_with_dp],
            index_bbox,
            index_gt_all,
            y_lo,
            y_hi,
            x_lo,
            x_hi,
            w_ylo_xlo,
            w_ylo_xhi,
            w_yhi_xlo,
            w_yhi_xhi,
        )
        u_est = u_est_all[j_valid_fg]

        v_gt = v_gt_all[j_valid_fg]
        v_est_all = _extract_at_points_packed(
            v[i_with_dp],
            index_bbox,
            index_gt_all,
            y_lo,
            y_hi,
            x_lo,
            x_hi,
            w_ylo_xlo,
            w_ylo_xhi,
            w_yhi_xlo,
            w_yhi_xhi,
        )
        v_est = v_est_all[j_valid_fg]

        index_uv_gt = index_gt_all[j_valid]
        index_uv_est_all = _extract_at_points_packed(
            index_uv[i_with_dp],
            index_bbox,
            slice(None),
            y_lo,
            y_hi,
            x_lo,
            x_hi,
            w_ylo_xlo[:, None],
            w_ylo_xhi[:, None],
            w_yhi_xlo[:, None],
            w_yhi_xhi[:, None],
        )
        index_uv_est = index_uv_est_all[j_valid, :]

        if self.confidence_model_cfg.uv_confidence.enabled:
            sigma_2_est_all = _extract_at_points_packed(
                sigma_2[i_with_dp],
                index_bbox,
                index_gt_all,
                y_lo,
                y_hi,
                x_lo,
                x_hi,
                w_ylo_xlo,
                w_ylo_xhi,
                w_yhi_xlo,
                w_yhi_xhi,
            )
            sigma_2_est = sigma_2_est_all[j_valid_fg]
            if conf_type in [DensePoseUVConfidenceType.INDEP_ANISO]:
                kappa_u_est_all = _extract_at_points_packed(
                    kappa_u[i_with_dp],
                    index_bbox,
                    index_gt_all,
                    y_lo,
                    y_hi,
                    x_lo,
                    x_hi,
                    w_ylo_xlo,
                    w_ylo_xhi,
                    w_yhi_xlo,
                    w_yhi_xhi,
                )
                kappa_u_est = kappa_u_est_all[j_valid_fg]
                kappa_v_est_all = _extract_at_points_packed(
                    kappa_v[i_with_dp],
                    index_bbox,
                    index_gt_all,
                    y_lo,
                    y_hi,
                    x_lo,
                    x_hi,
                    w_ylo_xlo,
                    w_ylo_xhi,
                    w_yhi_xlo,
                    w_yhi_xhi,
                )
                kappa_v_est = kappa_v_est_all[j_valid_fg]

        # Resample everything to the estimated data size, no need to resample
        # S_est then:
        s_est = s[i_with_dp]
        with torch.no_grad():
            s_gt = _resample_data(
                s_gt.unsqueeze(1),
                bbox_xywh_gt,
                bbox_xywh_est,
                self.heatmap_size,
                self.heatmap_size,
                mode="nearest",
                padding_mode="zeros",
            ).squeeze(1)

        # add point-based losses:
        if self.confidence_model_cfg.uv_confidence.enabled:
            if conf_type == DensePoseUVConfidenceType.IID_ISO:
                uv_loss = (
                    self.uv_loss_with_confidences(u_est, v_est, sigma_2_est, u_gt, v_gt)
                    * self.w_points
                )
                losses["loss_densepose_UV"] = uv_loss
            elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO:
                uv_loss = (
                    self.uv_loss_with_confidences(
                        u_est, v_est, sigma_2_est, kappa_u_est, kappa_v_est, u_gt, v_gt
                    )
                    * self.w_points
                )
                losses["loss_densepose_UV"] = uv_loss
            else:
                raise ValueError(f"Unknown confidence model type: {conf_type}")
        else:
            u_loss = F.smooth_l1_loss(u_est, u_gt, reduction="sum") * self.w_points
            losses["loss_densepose_U"] = u_loss
            v_loss = F.smooth_l1_loss(v_est, v_gt, reduction="sum") * self.w_points
            losses["loss_densepose_V"] = v_loss
        index_uv_loss = F.cross_entropy(index_uv_est, index_uv_gt.long()) * self.w_part
        losses["loss_densepose_I"] = index_uv_loss

        if self.n_segm_chan == 2:
            s_gt = s_gt > 0
        s_loss = F.cross_entropy(s_est, s_gt.long()) * self.w_segm
        losses["loss_densepose_S"] = s_loss
        return losses
Beispiel #40
0
        y = x.mm(self.W) + self.b[0]  # must use b[0] as a number
        return Variable(x), Variable(y)


# Learning target
poly = Polynom(degree=4)

# The model
nnet = torch.nn.Linear(poly.degree, 1)

# Train it
print('------- TRAINING ---------')
for batch_idx in count(1):
    batch_x, batch_y = poly.get_batch(64)
    nnet.zero_grad()
    output = F.smooth_l1_loss(nnet(batch_x), batch_y)
    output.backward()
    batch_loss = output.data[0]

    # Upgrade model
    for param in nnet.parameters():
        param.data.add_(-0.003 * param.grad.data)

    if 0 == batch_idx % 100:        
        print('batch', batch_idx, 'loss', batch_loss)
    if batch_loss < 1e-3:
        break

print('------- RESULT ---------')
print('==> Learned function: {}'.format(
      Polynom.show(nnet.weight.data, nnet.bias.data)))
Beispiel #41
0
def learn(env,num_episodes,experiment_dir,replay_memory_size=500000,replay_memory_init_size=50000,update_target_estimator_every=10000,discount_factor=0.99,epsilon_start=1.0,epsilon_end=0.1,epsilon_decay_steps=500000,batch_size=32,
record_video_every=50):
    replay_memory = []
    # Keeps track of useful statistics
    stats = plotting.EpisodeStats(
        episode_lengths=np.zeros(num_episodes),
        episode_rewards=np.zeros(num_episodes))
    
    #monitor_path = os.path.join(experiment_dir, "monitor")
    
    #if not os.path.exists(monitor_path):
        #os.makedirs(monitor_path)
    
    num_steps = 0
    state = env.reset()
    state = process(state)
    state = torch.cat(tuple([state] * 4), dim=1)
    
    def get_action():
        sample = random.random()
        epsilon = epsilon_end + (epsilon_start - epsilon_end) * math.exp(-1. * num_steps / epsilon_decay_steps)
        if sample > epsilon:
            with torch.no_grad():
                return (policy_net(((torch.from_numpy(state)).float()/255).to(device)).max(1)[1].data[0])
        else:
            return random.randrange(4)
        
    for i in range(replay_memory_init_size):
        action = get_action()
        next_state, reward, done, _ = env.step(action)
        next_state = process(next_state)
        next_state = np.concatenate((state[:,1:,:,:],next_state), axis=1)
        replay_memory.append(Transition(state, [[int(action)]],[reward], next_state, [not done]))
        if done:
            state = env.reset()
            state = process(state)
            state = np.concatenate([state] * 4, axis = 1)
        else:
            state = next_state

    #env = Monitor(env, directory=monitor_path, video_callable=lambda count: count % record_video_every == 0, resume=True)
    for i_episode in range(num_episodes):
        state = env.reset()
        state = process(state)
        state = np.concatenate([state] * 4, axis = 1)
        loss = None
        for t in count():
            if num_steps % update_target_estimator_every == 0:
                target_net.load_state_dict(policy_net.state_dict())

            action = get_action()
            next_state, reward, done, _ = env.step(action)
            num_steps+=1
            next_state = process(next_state)
            next_state = np.concatenate((state[:,1:,:,:],next_state), axis=1)

            if len(replay_memory) == replay_memory_size:
                replay_memory.pop(0)
            replay_memory.append(Transition(state, [[int(action)]],[reward], next_state, [not done]))
            # Update statistics
            stats.episode_rewards[i_episode] += reward
            stats.episode_lengths[i_episode] +=1
            if(num_steps%4==0):
                transitions = random.sample(replay_memory, batch_size)
                for i,t in enumerate(transitions):
                    transitions[i] = Transition(torch.tensor(t.state, device= device, dtype = torch.float)/255,torch.tensor(t.action, device= device, dtype = torch.long), torch.tensor(t.reward, device= device, dtype = torch.float), torch.tensor(t.next_state, device= device, dtype = torch.float)/255 , torch.tensor(t.done, device= device, dtype = torch.float))
                batch = Transition(*zip(*transitions))
                state_batch = (torch.cat(batch.state))
                action_batch = (torch.cat(batch.action))                
                reward_batch = torch.cat(batch.reward)  
                next_state_batch = torch.cat(batch.next_state)
                done_batch = torch.cat(batch.done) 
                state_action_values = policy_net(state_batch).gather(1, action_batch)
                q_next_batch = target_net(next_state_batch).max(1)[0].detach()
                target_batch = reward_batch + discount_factor*done_batch*q_next_batch
                loss = F.smooth_l1_loss(state_action_values, target_batch.unsqueeze(1))
                optimizer.zero_grad()
                loss.backward()
                for param in policy_net.parameters():
                      param.grad.data.clamp_(-1, 1)
                optimizer.step()
            state = next_state
            if done:
                break
            if i_episode % 100 == 0:
                    gc.collect()
            if(i_episode%1000):
                torch.save({
                    'episode' : i_episode//1000+1,
                    'state_dict' : policy_net.state_dict(),
                    'optimizer' : optimizer.state_dict()},
                    'dqn{}.model'.format(i_episode//1000+1))

                
        yield num_steps, plotting.EpisodeStats(
            episode_lengths=stats.episode_lengths[:i_episode+1],
            episode_rewards=stats.episode_rewards[:i_episode+1])

    return stats
Beispiel #42
0
    def forward(self, predictions, priors, targets, using_gpu):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data, landm_data = predictions
        priors = priors
        num = loc_data.size(0)  # num = batch_size
        num_priors = (priors.size(0))

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        landm_t = torch.Tensor(num, num_priors, 10)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :4].data  # [num_objs, 4]
            labels = targets[idx][:, -1].data  # [num_objs]
            landms = targets[idx][:, 4:14].data  # [num_objs, 10]
            defaults = priors.data
            # 关键函数, 实现候选框与真实框之间的匹配
            match(self.threshold, truths, defaults, self.variance, labels,
                  landms, loc_t, conf_t, landm_t, idx)

        zeros = torch.tensor(0)
        if using_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            landm_t = landm_t.cuda()
            zeros = zeros.cuda()

        # landm Loss (Smooth L1)
        # Shape: [batch,num_priors,10]
        pos1 = conf_t > zeros  # 筛选出 >0 的box下标(大部分都是=0的)
        # 求和, 取得满足条件的box的数量
        num_pos_landm = pos1.long().sum(1, keepdim=True)
        N1 = max(num_pos_landm.data.sum().float(), 1)
        pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
        landm_p = landm_data[pos_idx1].view(-1, 10)
        landm_t = landm_t[pos_idx1].view(-1, 10)
        loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')

        pos = conf_t != zeros
        conf_t[pos] = 1

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        # conf_t: [batch, num_priors]
        # loss_c: [batch*num_priors, 1], 计算每个priorbox预测后的损失
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        # 难负样本挖掘, 按照loss进行排序, 取loss最大的负样本参与更新
        # 将所有的pos下标的box的loss置为0(pos指示的是正样本的下标)
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        # 将 loss_c 的shape 从 [batch*num_priors, 1] 转换成 [batch, num_priors]
        loss_c = loss_c.view(num, -1)
        # 进行降序排序, 并获取到排序的下标
        _, loss_idx = loss_c.sort(1, descending=True)
        # 将下标进行升序排序, 并获取到下标的下标
        _, idx_rank = loss_idx.sort(1)
        # num_pos: [batch, 1], 统计每个样本中的obj个数
        num_pos = pos.long().sum(1, keepdim=True)
        # 根据obj的个数, 确定负样本的个数(正样本的3倍)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        # 获取到负样本的下标
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        # 计算包括正样本和负样本的置信度损失
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        # 按照pos_idx和neg_idx指示的下标筛选参与计算损失的预测数据
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        # 将损失函数归一化后返回
        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N
        loss_landm /= N1

        return loss_l, loss_c, loss_landm
Beispiel #43
0
    def learn_from_experience(self,
                              data,
                              entropy_coeff,
                              normalize_returns=True,
                              normalize_advantages=True,
                              clip_grad=True):

        # Sanity Check
        assert len(data['tstep']) == len(data['obs']) == len(data['act']) == len(data['logp']) == len(data['val']) \
               == len(data['rew']) == len(data['entropy']) == len(data['disc_rtg_rews']) == len(data['disc_rtg_rews'])
        assert len(data['per_episode_rews']) == len(data['per_episode_length'])

        # Don't need to backprop through returns
        returns = torch.tensor(data['disc_rtg_rews'])

        if normalize_returns:
            # returns = (returns - returns.mean()) / returns.std()
            returns = (returns) / returns.std()

        # Calculate advantages separately (to apply normalization)
        advantages = []
        for return_, value in zip(returns, data['val']):
            advantages.append(return_ - value)
            #advantages.append(return_)

        advantages = torch.tensor(advantages)

        if normalize_advantages:
            advantages = (advantages - advantages.mean()) / advantages.std()

        assert len(advantages) == len(data['tstep'])

        # Zero out gradients before calculating loss
        model.optimizer.zero_grad()

        # Calculate actor and critic loss
        actor_loss = []
        critic_loss = []
        for logprob, advantage, return_, value in zip(data['logp'], advantages,
                                                      returns, data['val']):
            actor_loss.append(-(logprob * advantage))
            # Why L1 loss? From pytorch doc:
            # It is less sensitive to outliers than the MSELoss and in some cases prevents exploding gradients
            critic_loss.append(F.smooth_l1_loss(return_, torch.squeeze(value)))
            # critic_loss.append(advantage.pow(2))

            # Entropy Loss (https://medium.com/@awjuliani/maximum-entropy-policies-in-reinforcement-learning-everyday-life-f5a1cc18d32d)
            # https://jaromiru.com/2017/03/26/lets-make-an-a3c-implementation/

        actor_loss = torch.stack(actor_loss).mean()
        critic_loss = 0.5 * torch.stack(critic_loss).mean()
        entropy_avg = torch.stack(data['entropy']).mean()
        entropy_loss = -(entropy_coeff * entropy_avg)
        total_loss = actor_loss + critic_loss + entropy_loss

        # Perform backprop step
        total_loss.backward()
        if clip_grad:
            torch.nn.utils.clip_grad_norm_(self.parameters(), 0.5)
        model.optimizer.step()

        # Compute info for logging
        avg_ep_len = torch.tensor(data['per_episode_length'],
                                  requires_grad=False,
                                  dtype=torch.float).mean().item()
        avg_ep_raw_rew = torch.tensor(data['per_episode_rews'],
                                      requires_grad=False,
                                      dtype=torch.float).mean().item()
        epoch_timesteps = data['tstep'][-1]
        num_episodes = len(data['per_episode_length'])

        # Return logging info
        return dict(actor_loss=actor_loss,
                    critic_loss=critic_loss,
                    entropy_loss=entropy_loss,
                    entropy_avg=entropy_avg,
                    total_loss=total_loss,
                    avg_ep_len=avg_ep_len,
                    avg_ep_raw_rew=avg_ep_raw_rew,
                    epoch_timesteps=epoch_timesteps,
                    num_episodes=num_episodes,
                    advantages=advantages,
                    pred_values=data['val'],
                    disc_rews=returns)
Beispiel #44
0
def train(epoch):
    epoch_time = time.time()
    epoch_loss = 0
    epoch_error0 = 0
    epoch_error1 = 0
    epoch_error2 = 0
    valid_iteration = 0

    if opt.model == 'MyGANet3' or opt.model == 'MyGANet4' or opt.model == 'MyGANet4_8' or opt.model == 'MyGANet5'\
            or opt.model == 'MyGANet4_8_rf'or opt.model == 'MyGANet9_t2'or opt.model == 'MyGANet9_t3':
        pass
    else:
        model.train()

    for iteration, batch in enumerate(training_data_loader):
        input1, input2, target = Variable(batch[0], requires_grad=True), Variable(batch[1],
                                                                                  requires_grad=True), Variable(
            batch[2], requires_grad=False)
        if cuda:
            input1 = input1.cuda()
            input2 = input2.cuda()
            target = target.cuda()

        target = torch.squeeze(target, 1)
        mask = target < opt.max_disp
        mask.detach_()
        valid = target[mask].size()[0]

        start_full_time = time.time()
        if valid > 0:
            optimizer.zero_grad()
            # T1 train
            if opt.model == 'GANet11' or opt.model == 'MyGANet' or opt.model == 'MyGANet2' or opt.model == 'MyGANet4_8_t1' \
                    or opt.model == 'MyGANet5_t1' or opt.model == 'MyGANet4_8_rf_t1'or opt.model == 'MyGANet9_t1':
                disp1, disp2 = model(input1, input2)
                disp0 = (disp1 + disp2) / 2.
                if opt.kitti or opt.kitti2015:
                    loss = 0.4 * F.smooth_l1_loss(disp1[mask], target[mask], reduction='mean') + 1.2 * criterion(
                        disp2[mask], target[mask])
                else:
                    loss = 0.4 * F.smooth_l1_loss(disp1[mask], target[mask], reduction='mean') + 1.2 * F.smooth_l1_loss(
                        disp2[mask], target[mask], reduction='mean')
            # T2 train
            elif opt.model == 'MyGANet5'or opt.model == 'MyGANet4_8_rf'or opt.model == 'MyGANet9_t2':
                disp0, disp1, disp2 = model(input1, input2)
                loss0 = F.smooth_l1_loss(disp0[mask], target[mask], reduction='mean')
                if opt.kitti or opt.kitti2015:
                    loss = 0.4 * (0.9 - (loss0 - F.smooth_l1_loss(disp1[mask], target[mask], reduction='mean'))) + \
                           1.2 * (0.9 - (loss0 - criterion(disp2[mask],target[mask])))
                else:
                    loss = 0.4 * (0.9 - (loss0 - F.smooth_l1_loss(disp1[mask], target[mask], reduction='mean'))) + \
                           1.2 * (0.9 - (loss0 - F.smooth_l1_loss(disp2[mask], target[mask], reduction='mean')))
            # T3 train
            elif opt.model == 'MyGANet3' or opt.model == 'MyGANet4' or opt.model == 'MyGANet4_8'or opt.model == 'MyGANet9_t3':
                disp0, disp1, disp2 = model(input1, input2)
                loss1 = F.smooth_l1_loss(disp0[mask], target[mask], reduction='mean')
                if opt.kitti or opt.kitti2015:
                    loss = 0.9-(loss1-criterion(disp2[mask], target[mask]))
                else:
                    loss = 0.9-(loss1-F.smooth_l1_loss(disp2[mask], target[mask], reduction='mean'))
            elif opt.model == 'MyGANet9':
                disp00, disp0, disp11, disp1, disp2 = model(input1, input2)
                if opt.kitti or opt.kitti2015:
                    loss = 0.2 * F.smooth_l1_loss(disp00[mask], target[mask], reduction='mean') + \
                           0.4 * F.smooth_l1_loss(disp0[mask], target[mask], reduction='mean') + \
                           0.6 * F.smooth_l1_loss(disp11[mask], target[mask], reduction='mean') + \
                           1 * F.smooth_l1_loss(disp1[mask], target[mask], reduction='mean') + \
                           1 * criterion(disp2[mask], target[mask])
                else:
                    loss = 0.2 * F.smooth_l1_loss(disp00[mask], target[mask], reduction='mean') + \
                           0.4 * F.smooth_l1_loss(disp0[mask], target[mask], reduction='mean') + \
                           0.6 * F.smooth_l1_loss(disp11[mask], target[mask], reduction='mean') + \
                           1 * F.smooth_l1_loss(disp1[mask], target[mask], reduction='mean') + \
                           1 * F.smooth_l1_loss(disp2[mask], target[mask], reduction='mean')
            elif opt.model == 'GANet_deep' or opt.model == 'CasGANet10':
                disp0, disp1, disp2 = model(input1, input2)
                if opt.kitti or opt.kitti2015:
                    loss = 0.2 * F.smooth_l1_loss(disp0[mask], target[mask], reduction='mean') + 0.6 * F.smooth_l1_loss(
                        disp1[mask], target[mask], reduction='mean') + criterion(disp2[mask], target[mask])
                else:
                    loss = 0.2 * F.smooth_l1_loss(disp0[mask], target[mask], reduction='mean') + 0.6 * F.smooth_l1_loss(
                        disp1[mask], target[mask], reduction='mean') + F.smooth_l1_loss(disp2[mask], target[mask],
                                                                                        reduction='mean')
            else:
                raise Exception("No suitable model found ...")

            loss.backward()
            optimizer.step()
            error0 = torch.mean(torch.abs(disp0[mask] - target[mask]))
            error1 = torch.mean(torch.abs(disp1[mask] - target[mask]))
            error2 = torch.mean(torch.abs(disp2[mask] - target[mask]))

            epoch_loss += loss.item()
            valid_iteration += 1
            epoch_error0 += error0.item()
            epoch_error1 += error1.item()
            epoch_error2 += error2.item()
            print("===> Epoch[{}]({}/{}): Loss: {:.4f}, Error: ({:.4f} {:.4f} {:.4f}), Time:{:.2f}s".format(epoch,
                                                                                                            iteration,
                                                                                                            len(
                                                                                                                training_data_loader),
                                                                                                            loss.item(),
                                                                                                            error0.item(),
                                                                                                            error1.item(),
                                                                                                            error2.item(),
                                                                                                            time.time() - start_full_time))
            sys.stdout.flush()

    print("===> Epoch {} Complete: Avg. Loss: {:.4f}, Avg. Error: ({:.4f} {:.4f} {:.4f}), Time:{:.2f}min".format(epoch,
                                                                                                                 epoch_loss / valid_iteration,
                                                                                                                 epoch_error0 / valid_iteration,
                                                                                                                 epoch_error1 / valid_iteration,
                                                                                                                 epoch_error2 / valid_iteration,
                                                                                                                 (time.time() - epoch_time) / 60))
    return epoch_loss / valid_iteration
    def finetunning(self, spt_ms, spt_rgb, qry_ms, qry_rgb):
        """
        :param spt_ms:    [task_num, setsz, 16, h, w]
        :param spt_rgb:   [task_num, setsz, 16, h, w]
        :param qry_ms:    [task_num, setsz, 16, h, w]
        :param qry_rgb:   [task_num, setsz, 16, h, w]
        :return:
        """
        assert len(spt_ms.shape) == 4

        querysz = qry_ms.size(0)

        corrects = [0 for _ in range(self.update_step_test + 1)]

        # in order to not ruin the state of running_mean/variance and bn_weight/bias
        # we finetunning on the copied model instead of self.net
        net = deepcopy(self.net)

        # 1. run the i-th task and compute loss for k=0
        logits = net(spt_ms)
        loss = F.cross_entropy(logits, spt_rgb)
        grad = torch.autograd.grad(loss, net.parameters())
        fast_weights = list(
            map(lambda p: p[1] - self.update_lr * p[0],
                zip(grad, net.parameters())))

        # this is the loss and accuracy before first update
        with torch.no_grad():
            # [setsz, nway]
            pred_q = net(qry_ms, net.parameters(), bn_training=True)
            # [setsz]
            # pred_q = F.softmax(logits_q, dim=1).argmax(dim=1)
            # scalar
            correct = torch.eq(pred_q, qry_rgb).sum().item()
            corrects[0] = corrects[0] + correct

        # this is the loss and accuracy after the first update
        with torch.no_grad():
            # [setsz, nway]
            pred_q = net(qry_ms, fast_weights, bn_training=True)
            # [setsz]
            # pred_q = F.softmax(logits_q, dim=1).argmax(dim=1)
            # scalar
            correct = torch.eq(pred_q, qry_rgb).sum().item()
            corrects[1] = corrects[1] + correct

        for k in range(1, self.update_step_test):
            # 1. run the i-th task and compute loss for k=1~K-1
            logits = net(spt_ms, fast_weights, bn_training=True)
            loss = F.cross_entropy(logits, spt_rgb)
            # 2. compute grad on theta_pi
            grad = torch.autograd.grad(loss, fast_weights)
            # 3. theta_pi = theta_pi - train_lr * grad
            fast_weights = list(
                map(lambda p: p[1] - self.update_lr * p[0],
                    zip(grad, fast_weights)))

            pred_q = net(qry_ms, fast_weights, bn_training=True)
            # loss_q will be overwritten and just keep the loss_q on last update step.
            loss_q = F.smooth_l1_loss(pred_q, qry_rgb)

            with torch.no_grad():
                # pred_q = F.softmax(logits_q, dim=1).argmax(dim=1)
                correct = torch.eq(pred_q,
                                   qry_rgb).sum().item()  # convert to numpy
                corrects[k + 1] = corrects[k + 1] + correct

        del net

        accs = np.array(corrects) / querysz

        return accs
Beispiel #46
0
def regression_loss(prediction_normalized, meta, alpha=1., **kwargs):
    pred = prediction_normalized[0]
    kp = meta['keypts_normalized'].to(pred.device)
    B, nA, _ = pred.shape
    return F.smooth_l1_loss(pred * alpha, kp * alpha)
Beispiel #47
0
def finish_episode(*,
                   model,
                   optimizer,
                   history,
                   gamma: float = 0.1,
                   return_means: bool = False) -> Optional[tuple]:
    """Calculate the losses and backprop them through the models NN."""
    # initialize a few variables
    # eps needs to be tensor now{}
    eps = Tensor([np.finfo(np.float32).eps])  # machine epsilon
    losses = deque()
    returns_to_average = deque()
    species_actions = deque()  # init
    actions_per_agent = deque()  # init
    for (_, agent_rewards, saved_actions) in history:

        R = 0  # The discounted reward
        rewards = deque()
        policy_losses = deque()
        state_value_losses = deque()

        # reverse rewards (its a deque!)
        agent_rewards.reverse()

        # iterate over all rewards that we got during the play
        for r in agent_rewards:  # backwards to account for the more recent actions
            returns_to_average.append(r)  # for later averaging
            R = r + gamma * R  # discount!
            rewards.appendleft(R)  # deque power baby!

        rewards = torch.Tensor(rewards).type(dtype)  # use gpu if available
        rewards = (rewards - rewards.mean()) / (rewards.std() + eps)
        # I think the eps should take care of my problem of NaNs. Somehow it
        # doesn't work, but the effect is the same as if I just switch the NaNs
        # to 0.
        # converting NaNs to 0.
        rewards[rewards != rewards] = 0  # should convert all NaN to 0

        actions_per_agent.clear()  # clear the deque
        # now interate over all probability-state value-reward pairs
        for (log_prob, state_value, action), r in zip(saved_actions, rewards):
            actions_per_agent.append(action)  # save action for later
            reward = r - state_value.item()  # get the value, needs `Variable`
            policy_losses.append(-log_prob * reward)
            # calculate the (smooth) L^1 loss = least absolute deviation
            state_value_losses.append(
                F.smooth_l1_loss(state_value,
                                 Variable(torch.Tensor([r]).type(dtype))))

        species_actions.append(actions_per_agent.copy())

        # empty the gradient of the optimizer
        optimizer.zero_grad()

        # calculate the loss
        losses.append(
            torch.stack(list(policy_losses)).sum() +
            torch.stack(list(state_value_losses)).sum())

    # average all losses
    loss = torch.stack(list(losses)).mean()

    # backpropagate the loss
    loss.backward()
    optimizer.step()

    # free memory
    losses.clear()  # its a deque

    # if output is wanted
    if return_means:
        ret_avg = np.mean(returns_to_average)
        returns_to_average.clear()
        return loss, ret_avg, species_actions.copy()
Beispiel #48
0
    def forward(self,
                odm_data,
                priors,
                loc_targets,
                cls_targets,
                arm_data=None,
                filter_object=False):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)
            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
            arm_data (tuple): arm branch containg arm_loc and arm_conf
            filter_object: whether filter out the  prediction according to the arm conf score
        """

        loc_data, conf_data = odm_data
        if arm_data:
            arm_loc, arm_conf = arm_data

        num = loc_data.size(0)
        num_priors = (priors.size(0))

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.Tensor(num, num_priors)
        for idx in range(num):
            truths = loc_targets[idx]
            labels = cls_targets[idx] + 1  # background as 0

            truths = truths.to(self.opt.device)
            labels = labels.to(self.opt.device)

            # for object detection
            if self.num_classes == 2:
                labels = labels > 0
            if arm_data:
                refine_match(self.threshold, truths, priors, self.variance,
                             labels, loc_t, conf_t, idx, arm_loc[idx])
            else:
                match(self.threshold, truths, priors, self.variance, labels,
                      loc_t, conf_t, idx)

        # wrap targets
        loc_t = loc_t
        conf_t = conf_t
        if arm_data and filter_object:
            arm_conf_data = arm_conf[:, :, 1]
            pos = conf_t > 0
            object_score_index = arm_conf_data <= self.object_score
            pos[object_score_index] = 0

        else:
            pos = conf_t > 0

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loc_t = loc_t.detach()
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1,
            conf_t.view(-1, 1).long())

        # Hard Negative Mining
        loss_c[pos.view(-1).long()] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1).detach()
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p,
                                 targets_weighted.long(),
                                 size_average=False)

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = num_pos.sum().item()
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c
optimizer = torch.optim.SGD(fc.parameters(), lr=0.1, momentum=0.9)

y_old = []
x_old = []
for batch_idx in count(1):
    # Get data
    batch_x, batch_y = get_batch()

    # Reset gradients
    fc.zero_grad()

    # Forward pass
    y_est = fc(batch_x)
    y_old.append(y_est.squeeze().data.numpy())
    x_old.append(batch_x[:,0].data.numpy())
    output = F.smooth_l1_loss(y_est, batch_y)
    loss = output.data[0]

    # Backward pass
    output.backward()

    # Apply gradients
    for param in fc.parameters():
        param.data.add_(-0.1 * param.grad.data)

    # Stop criterion
    if loss < 1e-3:
        break

print('Loss: {:.6f} after {} batches'.format(loss, batch_idx))
print('==> Learned function:\t' + poly_desc(fc.weight.data.view(-1), fc.bias.data))
        q_t = agent.forward(s_t)
        # epsilon greedy action selection
        if np.random.uniform() > epsilon:
            a_t = torch.argmax(q_t)
        else:
            a_t = np.random.randint(n_actions)
        # transition and get reward
        r_t = env.step(a_t)
        # get next states info
        s_next = to_torch(env.get_agent_loc().reshape(1, -1))
        max_q_next = torch.max(agent.forward(s_next))
        # compute TD target
        q_target = r_t + gamma * max_q_next

        # update weights
        loss = F.smooth_l1_loss(q_t[:, a_t], q_target.data)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # update R and n steps
        step += 1
        cumulative_reward += r_t * gamma**step

        # termination condition
        if env.is_terminal():
            break

    log_return.append(cumulative_reward)
    log_steps.append(step)
'''
Beispiel #51
0
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """

        loc_data, conf_data = predictions
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:,:-1].data
            labels = targets[idx][:,-1].data
            defaults = priors.data
            match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t,requires_grad=False)

        pos = conf_t > 0

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1,4)
        loc_t = loc_t[pos_idx].view(-1,4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1,self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1))

        # Hard Negative Mining
        loss_c[pos.view(-1)] = 0 # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _,loss_idx = loss_c.sort(1, descending=True)
        _,idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1,keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
        targets_weighted = conf_t[(pos+neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N

        N = num_pos.data.sum()
        loss_l/=N
        loss_c/=N
        return loss_l,loss_c
Beispiel #52
0
    def forward(self, inputs_, return_frames=False, autoreg=False):
        if isinstance(inputs_, list):
            inputs_ = inputs_[0]
        x = inputs_
        
        for gnl in self.group_norm_layers:
            gnl.reset_stats()

        current_loc = 0
        
        timesteps = x.size(2)
        
        x = x.transpose(1,2)
        x = x.reshape((-1, x.size(2), x.size(3), x.size(4)))

        conv_input = self.stem(x)

        conv_input = self.ds_block(conv_input, current_loc, self.h_units[0][0])
        
        conv_input = conv_input.reshape((-1, timesteps, conv_input.size(1), conv_input.size(2), conv_input.size(3))).transpose(1,2)

        current_loc = self.h_units[0][0]
        
        hidden_states= {}
        if self.pred_gn:
            errors = 0
            if return_frames:
                frames = []
        if self.cpc_gn:
            cpc_targets = []
            cpc_preds = {step: [] for step in self.cpc_steps}
        
        output = {}

        if self.hidden_init=='learned':
            # h_unit, h_name =  self.h_units_and_names[-1]
            # # x = torch.zeros_like(conv_input[:,:,0])[:,:,None]
            # x = inputs_.new(torch.zeros([inputs_.shape[0],
            #                        self._out_feature_channels[-1],  
            #                        1,    
            #                         inputs_.shape[3]//self._strides[-1], 
            #                         inputs_.shape[4]//self._strides[-1]]))
            # hidden_states[h_name] = x #torch.zeros_like(x)
            
            # hidden_states[h_name] = h_unit(x, hidden_states[h_name], timestep=0, return_extra=[])
            
            # x = hidden_states[h_name]

            x = torch.zeros_like(conv_input[:,:,0])
            current_loc = self.h_units[0][0]
            for j, (h_unit, h_name) in enumerate(self.h_units_and_names):
                loc = int(h_name.strip('horizontal'))
                
                if j > 0:
                    # x = self.ds_block(x[:,:,None], current_loc, loc).squeeze(2)
                    x = self.ds_block(x, current_loc, loc)

                hidden_states[h_name] = F.softplus(torch.zeros_like(x))
                
                hidden_states[h_name] = h_unit(F.softplus(x), hidden_states[h_name], timestep=0)
                
                x = hidden_states[h_name]

                x = self.horizontal_norms[h_name](x)
                x = F.relu_(x)
            
                current_loc = loc
            
            for j, (td_unit, td_name) in enumerate(self.td_units_and_names):
                loc = int(td_name.strip('topdown'))
                h_name = 'horizontal'+str(loc)
                # print(x.shape)
                
                # hidden_states[h_name] = x.new(torch.zeros([x.shape[0],
                #                    self._out_feature_channels[loc],  
                #                    1,    
                #                     inputs_.shape[3]//self._strides[loc], 
                #                     inputs_.shape[4]//self._strides[loc]]))
        
                hidden_states[h_name] = td_unit(hidden_states[h_name], x, timestep=0)
                x = hidden_states[h_name]
        
        
        for i in range(timesteps):
            if autoreg and i>timesteps//2:
                x = frame
            else:
                x = conv_input[:,:,0]
            current_loc = self.h_units[0][0]
            for j, (h_unit, h_name) in enumerate(self.h_units_and_names):
                loc = int(h_name.strip('horizontal'))
                
                if j > 0:
                    # x = self.ds_block(x[:,:,None], current_loc, loc).squeeze(2)
                    x = self.ds_block(x, current_loc, loc)
                
                if i == 0 and h_name not in hidden_states:    
                    hidden_states[h_name] = F.softplus(torch.zeros_like(x))
                
                hidden_states[h_name], extra = h_unit(F.softplus(x), hidden_states[h_name], timestep=i, return_extra=['error'])
                # errors = errors + torch.norm(extra['error'].view(extra['error'].shape[0],-1), p=1, dim=1)/1e4
                # if i > 0:
                #     errors = errors + torch.abs(extra['error'].view(extra['error'].shape[0],-1)).mean(-1)
                
                x = hidden_states[h_name]
                
                if (x>1e6).any():
                    logger.info('variable %s at timestep %d out of bound: %f'%(h_name,i, x.max().item()))

                x = self.horizontal_norms[h_name](x)
                x = F.relu_(x)
            
                current_loc = loc
            
            if self.cpc_gn:
                if i >= min(self.cpc_steps):
                    cpc_targets.append(self.W_cpc_target(x.transpose(1,3).detach()).view([-1,self.cpc_fan_out]))
                for step in self.cpc_steps:
                    if i < timesteps-step: 
                        cpc_preds[step].append(self.W_cpc_preds[step](x.transpose(1,3)).view([-1,self.cpc_fan_out]))

            if i <timesteps-1:
                for j, (td_unit, td_name) in enumerate(self.td_units_and_names):
                    loc = int(td_name.strip('topdown'))
                    h_name = 'horizontal'+str(loc)
                    
                    hidden_states[h_name] = td_unit(hidden_states[h_name], x, timestep=i)
                    x = hidden_states[h_name]

                    if (x>1e6).any():
                        logger.info('variable %s at timestep %d out of bound: %f'%(td_name, i, x.max().item()))

                # prediction error -> next step lower layer is detached to avoid gradients flowing through lower layers
                # pred_error = F.interpolate(x, conv_input.shape[2:], mode='trilinear', align_corners=True)
                # pred_error = self.final_remap(pred_error) - conv_input[:,:,1][:,:,None].detach()
                # errors = errors + torch.abs(pred_error.view(pred_error.shape[0],-1)).mean(-1)
                
                ## change architecture to take different locations into account
                if self.pred_gn:
                    frame = self.final_remap(x)
                    if (frame != frame).any():
                        logger.info('variable frame at timestep %d out of bound'%(i))
                    if return_frames:
                        frames.append(frame)
                    pred_error = F.smooth_l1_loss(frame, inputs_[:,:,i+1]) # conv_input[:,:,1][:,:,None].detach()
                    errors = errors + pred_error

                conv_input = conv_input[:,:,1:]
         
        logits = self.head(hidden_states[self.h_units_and_names[-1][1]].detach()) #[:,:,None]
        
        if (logits != logits).any():
            logger.info('variable logits out of bound')
        
        output['logits'] = logits
        # del hidden_states
        # del conv_input
        # del x
        if self.pred_gn:
            output['pred_errors'] = errors
            if return_frames:
                frames = torch.stack(frames, 2)
                output['frames'] = frames

        if self.cpc_gn:
            # calculate CPC
        
            # levels of difficulty
            # easy : across batches (dim = 0)
            # medium : across space within sample (fixed dim = 0, dims = 3,4)
            # hard : across time within sample (fixed dim = 0, fixed dims = 3,4, dim=2) 
            
            # label smoothing -> S,T block diag matrix within B,S block diag matrix
            
            cpc_loss = 0
            cpc_targets = torch.stack(cpc_targets,0)
            for step in self.cpc_steps:
                if len(cpc_preds[step])>1:
                    cpc_preds[step] = torch.cat(cpc_preds[step], 0)
        
                    cpc_output = torch.matmul(cpc_targets[step-min(self.cpc_steps):].view([-1, cpc_preds[step].shape[-1]]), cpc_preds[step].t())

                    labels = torch.cumsum(torch.ones_like(cpc_preds[step][:,0]).long(), 0) -1
                    cpc_loss = cpc_loss + F.cross_entropy(cpc_output, labels)

                    if (cpc_loss != cpc_loss).any():
                        logger.info('variable CPC at timestep %d out of bound'%(step,))
            
            output['cpc_loss'] = cpc_loss

        # {'logits': output, 
        # 'cpc_loss': cpc_loss, 
        # 'frames': frames
        # 'pred_errors': errors}
        return output
    def part_forward(self, predictions, targets , arm_data=None , filter_negative=False ):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            targets (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        loc_data, conf_data, priors = predictions
        if arm_data:
          arm_loc_data ,arm_conf_data = arm_data

        num = loc_data.size(0)
        priors = priors[:loc_data.size(1), :]
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            defaults = priors.data
            # sft match strategy , swordli
            if ac: 
                sfd_match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx)
            else:
                if arm_data:
                    refine_match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx, arm_loc_data[idx].data)
                else:
                    match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx)
                
        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
       
        # compute matched anchor number for each gt
        '''
        for i in targets:
            self.tmp[0] = self.tmp[0]+i.shape[0]
        self.tmp[1] = self.tmp[1] + conf_t.sum()
        print(self.tmp[1]/self.tmp[0])
        '''
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)
        pos = conf_t > 0
        num_pos = pos.sum(dim=1, keepdim=True)
        '''
        numpos1 = sum(sum(pos[:,:160*160].data))
        numpos2 = sum(sum(pos[:,160*160: 160*160+80*80].data))
        numpos3 = sum(sum(pos[:,160*160+80*80: 160*160+80*80+40*40].data))
        numpos4 = sum(sum(pos[:,160*160+80*80+40*40: 160*160+80*80+40*40+20*20].data))
        numpos5 = sum(sum(pos[:,-10*10-5*5:-5*5:].data))
        numpos6 = sum(sum(pos[:,-5*5:].data))
        numposall = sum(sum(pos.data))
        print(numpos1, numpos2, numpos3, numpos4, numpos5, numpos6 , numposall)
        '''
        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)
        
        # Compute max conf across batch for hard negative mining
        ignore = conf_t < 0
        #print(sum(conf_t[0].data.cpu().numpy()==1) , sum(conf_t[0].data.cpu().numpy()==-1))
        conf_t[ignore] = 0
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
        # Hard Negative Mining
        loss_c[pos] = 0  # filter out pos boxes for now
        loss_c[ignore] = 0  #filter out ignore

        loss_c = loss_c.view(num, -1)
        #loss_c[pos] = 0  # filter out pos boxes for now
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
        neg = idx_rank < num_neg.expand_as(idx_rank)
        
        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes)
        targets_weighted = conf_t[(pos+neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        #pdb.set_trace()
        N = num_pos.data.sum()
        loss_l /= N
        loss_c /= N
        #pdb.set_trace()
        return loss_l, loss_c
Beispiel #54
0
 def loss_fnc(self, state_action_values, expected_state_action_values):
     return F.smooth_l1_loss(state_action_values,
                             expected_state_action_values)
def train_seg_semisup_ict(
        submit_config: job_helper.SubmitConfig, dataset, model, arch,
        freeze_bn, opt_type, sgd_momentum, sgd_nesterov, sgd_weight_decay,
        learning_rate, lr_sched, lr_step_epochs, lr_step_gamma, lr_poly_power,
        teacher_alpha, bin_fill_holes, crop_size, aug_hflip, aug_vflip,
        aug_hvflip, aug_scale_hung, aug_max_scale, aug_scale_non_uniform,
        aug_rot_mag, ict_alpha, cons_loss_fn, cons_weight, conf_thresh,
        conf_per_pixel, rampup, unsup_batch_ratio, num_epochs, iters_per_epoch,
        batch_size, n_sup, n_unsup, n_val, split_seed, split_path, val_seed,
        save_preds, save_model, num_workers):
    settings = locals().copy()
    del settings['submit_config']

    import os
    import math
    import time
    import itertools
    import numpy as np
    import torch.nn as nn, torch.nn.functional as F
    from architectures import network_architectures
    import torch.utils.data
    from datapipe import datasets
    from datapipe import seg_data, seg_transforms, seg_transforms_cv
    import evaluation
    import optim_weight_ema
    import lr_schedules

    if crop_size == '':
        crop_size = None
    else:
        crop_size = [int(x.strip()) for x in crop_size.split(',')]

    torch_device = torch.device('cuda:0')

    #
    # Load data sets
    #
    ds_dict = datasets.load_dataset(dataset, n_val, val_seed, n_sup, n_unsup,
                                    split_seed, split_path)

    ds_src = ds_dict['ds_src']
    ds_tgt = ds_dict['ds_tgt']
    tgt_val_ndx = ds_dict['val_ndx_tgt']
    src_val_ndx = ds_dict['val_ndx_src'] if ds_src is not ds_tgt else None
    test_ndx = ds_dict['test_ndx_tgt']
    sup_ndx = ds_dict['sup_ndx']
    unsup_ndx = ds_dict['unsup_ndx']

    n_classes = ds_src.num_classes
    root_n_classes = math.sqrt(n_classes)

    if bin_fill_holes and n_classes != 2:
        print(
            'Binary hole filling can only be used with binary (2-class) segmentation datasets'
        )
        return

    print('Loaded data')

    # Build network
    NetClass = network_architectures.seg.get(arch)

    student_net = NetClass(ds_src.num_classes).to(torch_device)

    if opt_type == 'adam':
        student_optim = torch.optim.Adam([
            dict(params=student_net.pretrained_parameters(),
                 lr=learning_rate * 0.1),
            dict(params=student_net.new_parameters(), lr=learning_rate)
        ])
    elif opt_type == 'sgd':
        student_optim = torch.optim.SGD([
            dict(params=student_net.pretrained_parameters(),
                 lr=learning_rate * 0.1),
            dict(params=student_net.new_parameters(), lr=learning_rate)
        ],
                                        momentum=sgd_momentum,
                                        nesterov=sgd_nesterov,
                                        weight_decay=sgd_weight_decay)
    else:
        raise ValueError('Unknown opt_type {}'.format(opt_type))

    if model == 'mean_teacher':
        teacher_net = NetClass(ds_src.num_classes).to(torch_device)

        for p in teacher_net.parameters():
            p.requires_grad = False

        teacher_optim = optim_weight_ema.EMAWeightOptimizer(
            teacher_net, student_net, teacher_alpha)
        eval_net = teacher_net
    elif model == 'pi':
        teacher_net = student_net
        teacher_optim = None
        eval_net = student_net
    else:
        print('Unknown model type {}'.format(model))
        return

    BLOCK_SIZE = student_net.BLOCK_SIZE
    NET_MEAN, NET_STD = seg_transforms.get_mean_std(ds_tgt, student_net)

    if freeze_bn:
        if not hasattr(student_net, 'freeze_batchnorm'):
            raise ValueError(
                'Network {} does not support batchnorm freezing'.format(arch))

    clf_crossent_loss = nn.CrossEntropyLoss(ignore_index=255)

    print('Built network')

    if iters_per_epoch == -1:
        iters_per_epoch = len(unsup_ndx) // batch_size
    total_iters = iters_per_epoch * num_epochs

    lr_epoch_scheduler, lr_iter_scheduler = lr_schedules.make_lr_schedulers(
        optimizer=student_optim,
        total_iters=total_iters,
        schedule_type=lr_sched,
        step_epochs=lr_step_epochs,
        step_gamma=lr_step_gamma,
        poly_power=lr_poly_power)

    # Train data pipeline: transforms
    train_transforms = []

    if crop_size is not None:
        if aug_scale_hung:
            train_transforms.append(
                seg_transforms_cv.SegCVTransformRandomCropScaleHung(
                    crop_size, (0, 0),
                    uniform_scale=not aug_scale_non_uniform))
        elif aug_max_scale != 1.0 or aug_rot_mag != 0.0:
            train_transforms.append(
                seg_transforms_cv.SegCVTransformRandomCropRotateScale(
                    crop_size, (0, 0),
                    rot_mag=aug_rot_mag,
                    max_scale=aug_max_scale,
                    uniform_scale=not aug_scale_non_uniform,
                    constrain_rot_scale=True))
        else:
            train_transforms.append(
                seg_transforms_cv.SegCVTransformRandomCrop(crop_size, (0, 0)))
    else:
        if aug_scale_hung:
            raise NotImplementedError('aug_scale_hung requires a crop_size')

    if aug_hflip or aug_vflip or aug_hvflip:
        train_transforms.append(
            seg_transforms_cv.SegCVTransformRandomFlip(aug_hflip, aug_vflip,
                                                       aug_hvflip))
    train_transforms.append(
        seg_transforms_cv.SegCVTransformNormalizeToTensor(NET_MEAN, NET_STD))

    # Train data pipeline: supervised and unsupervised data sets
    train_sup_ds = ds_src.dataset(
        labels=True,
        mask=False,
        xf=False,
        pair=False,
        transforms=seg_transforms.SegTransformCompose(train_transforms),
        pipeline_type='cv')
    train_unsup_ds = ds_src.dataset(
        labels=False,
        mask=True,
        xf=False,
        pair=False,
        transforms=seg_transforms.SegTransformCompose(train_transforms),
        pipeline_type='cv')

    collate_fn = seg_data.SegCollate(BLOCK_SIZE)

    # Train data pipeline: data loaders
    sup_sampler = seg_data.RepeatSampler(
        torch.utils.data.SubsetRandomSampler(sup_ndx))
    train_sup_loader = torch.utils.data.DataLoader(train_sup_ds,
                                                   batch_size,
                                                   sampler=sup_sampler,
                                                   collate_fn=collate_fn,
                                                   num_workers=num_workers)
    if cons_weight > 0.0:
        unsup_sampler = seg_data.RepeatSampler(
            torch.utils.data.SubsetRandomSampler(unsup_ndx))
        train_unsup_loader = torch.utils.data.DataLoader(
            train_unsup_ds,
            batch_size,
            sampler=unsup_sampler,
            collate_fn=collate_fn,
            num_workers=num_workers)
    else:
        train_unsup_loader = None

    # Eval pipeline
    src_val_loader, tgt_val_loader, test_loader = datasets.eval_data_pipeline(
        ds_src, ds_tgt, src_val_ndx, tgt_val_ndx, test_ndx, batch_size,
        collate_fn, NET_MEAN, NET_STD, num_workers)

    # Report setttings
    print('Settings:')
    print(', '.join([
        '{}={}'.format(key, settings[key])
        for key in sorted(list(settings.keys()))
    ]))

    # Report dataset size
    print('Dataset:')
    print('len(sup_ndx)={}'.format(len(sup_ndx)))
    print('len(unsup_ndx)={}'.format(len(unsup_ndx)))
    if ds_src is not ds_tgt:
        print('len(src_val_ndx)={}'.format(len(tgt_val_ndx)))
        print('len(tgt_val_ndx)={}'.format(len(tgt_val_ndx)))
    else:
        print('len(val_ndx)={}'.format(len(tgt_val_ndx)))
    if test_ndx is not None:
        print('len(test_ndx)={}'.format(len(test_ndx)))

    if n_sup != -1:
        print('sup_ndx={}'.format(sup_ndx.tolist()))

    # Track mIoU for early stopping
    best_tgt_miou = None
    best_epoch = 0

    eval_net_state = {
        key: value.detach().cpu().numpy()
        for key, value in eval_net.state_dict().items()
    }

    # Create iterators
    train_sup_iter = iter(train_sup_loader)
    train_unsup_iter = iter(
        train_unsup_loader) if train_unsup_loader is not None else None

    iter_i = 0
    print('Training...')
    for epoch_i in range(num_epochs):
        if lr_epoch_scheduler is not None:
            lr_epoch_scheduler.step(epoch_i)

        t1 = time.time()

        if rampup > 0:
            ramp_val = network_architectures.sigmoid_rampup(epoch_i, rampup)
        else:
            ramp_val = 1.0

        student_net.train()
        if teacher_net is not student_net:
            teacher_net.train()

        if freeze_bn:
            student_net.freeze_batchnorm()
            if teacher_net is not student_net:
                teacher_net.freeze_batchnorm()

        sup_loss_acc = 0.0
        consistency_loss_acc = 0.0
        conf_rate_acc = 0.0
        n_sup_batches = 0
        n_unsup_batches = 0

        src_val_iter = iter(
            src_val_loader) if src_val_loader is not None else None
        tgt_val_iter = iter(
            tgt_val_loader) if tgt_val_loader is not None else None

        for sup_batch in itertools.islice(train_sup_iter, iters_per_epoch):
            if lr_iter_scheduler is not None:
                lr_iter_scheduler.step(iter_i)
            student_optim.zero_grad()

            #
            # Supervised branch
            #

            batch_x = sup_batch['image'].to(torch_device)
            batch_y = sup_batch['labels'].to(torch_device)

            logits_sup = student_net(batch_x)
            sup_loss = clf_crossent_loss(logits_sup, batch_y[:, 0, :, :])
            sup_loss.backward()

            if cons_weight > 0.0:
                for _ in range(unsup_batch_ratio):
                    #
                    # Unsupervised branch
                    #

                    # Mix mode: batch consists of paired unsupervised samples
                    unsup_batch0 = next(train_unsup_iter)
                    unsup_batch1 = next(train_unsup_iter)
                    batch_ux0 = unsup_batch0['image'].to(torch_device)
                    batch_um0 = unsup_batch0['mask'].to(torch_device)
                    batch_ux1 = unsup_batch1['image'].to(torch_device)
                    batch_um1 = unsup_batch1['mask'].to(torch_device)

                    # ICT mix factors
                    ict_mix_factors = np.random.beta(ict_alpha,
                                                     ict_alpha,
                                                     size=(len(batch_ux0), 1,
                                                           1, 1))
                    ict_mix_factors = torch.tensor(ict_mix_factors,
                                                   dtype=torch.float,
                                                   device=torch_device)

                    # Mix images
                    batch_ux_mixed = batch_ux0 * (
                        1.0 - ict_mix_factors) + batch_ux1 * ict_mix_factors
                    batch_um_mixed = batch_um0 * (
                        1.0 - ict_mix_factors) + batch_um1 * ict_mix_factors

                    # Get teacher predictions for original images
                    with torch.no_grad():
                        logits_u0_tea = teacher_net(batch_ux0).detach()
                        logits_u1_tea = teacher_net(batch_ux1).detach()
                    # Get student prediction for mixed image
                    logits_cons_stu = student_net(batch_ux_mixed)

                    # Logits -> probs
                    prob_u0_tea = F.softmax(logits_u0_tea, dim=1)
                    prob_u1_tea = F.softmax(logits_u1_tea, dim=1)
                    prob_cons_stu = F.softmax(logits_cons_stu, dim=1)

                    # Mix teacher predictions using same mask
                    # It makes no difference whether we do this with logits or probabilities as
                    # the mask pixels are either 1 or 0
                    logits_cons_tea = logits_u0_tea * (
                        1 - ict_mix_factors) + logits_u1_tea * ict_mix_factors
                    prob_cons_tea = prob_u0_tea * (
                        1 - ict_mix_factors) + prob_u1_tea * ict_mix_factors

                    loss_mask = batch_um_mixed

                    # Confidence thresholding
                    if conf_thresh > 0.0:
                        # Compute probabilities then confidence of each teacher prediction
                        prob_u0_tea = F.softmax(logits_u0_tea, dim=1)
                        prob_u1_tea = F.softmax(logits_u1_tea, dim=1)
                        conf_u0_tea = prob_u0_tea.max(dim=1, keepdim=True)[0]
                        conf_u1_tea = prob_u1_tea.max(dim=1, keepdim=True)[0]
                        # Mix confidences
                        conf_tea = conf_u0_tea * (
                            1 -
                            ict_mix_factors) + conf_u1_tea * ict_mix_factors
                        # Compute confidence mask
                        conf_mask = (conf_tea >=
                                     conf_thresh).float()[:, None, :, :]
                        # Record rate for reporting
                        conf_rate_acc += float(conf_mask.mean())
                        # Average confidence mask if requested
                        if not conf_per_pixel:
                            conf_mask = conf_mask.mean()

                        loss_mask = loss_mask * conf_mask
                    elif rampup > 0:
                        conf_rate_acc += ramp_val

                    # Compute per-pixel consistency loss
                    # Note that the way we aggregate the loss across the class/channel dimension (1)
                    # depends on the loss function used. Generally, summing over the class dimension
                    # keeps the magnitude of the gradient of the loss w.r.t. the logits
                    # nearly constant w.r.t. the number of classes. When using logit-variance,
                    # dividing by `sqrt(num_classes)` helps.
                    if cons_loss_fn == 'var':
                        delta_prob = prob_cons_stu - prob_cons_tea
                        consistency_loss = delta_prob * delta_prob
                        consistency_loss = consistency_loss.sum(dim=1,
                                                                keepdim=True)
                    elif cons_loss_fn == 'logits_var':
                        delta_logits = logits_cons_stu - logits_cons_tea
                        consistency_loss = delta_logits * delta_logits
                        consistency_loss = consistency_loss.sum(
                            dim=1, keepdim=True) / root_n_classes
                    elif cons_loss_fn == 'logits_smoothl1':
                        consistency_loss = F.smooth_l1_loss(logits_cons_stu,
                                                            logits_cons_tea,
                                                            reduce=False)
                        consistency_loss = consistency_loss.sum(
                            dim=1, keepdim=True) / root_n_classes
                    elif cons_loss_fn == 'bce':
                        consistency_loss = network_architectures.robust_binary_crossentropy(
                            prob_cons_stu, prob_cons_tea)
                        consistency_loss = consistency_loss.sum(dim=1,
                                                                keepdim=True)
                    elif cons_loss_fn == 'kld':
                        consistency_loss = F.kl_div(F.log_softmax(
                            logits_cons_stu, dim=1),
                                                    prob_cons_tea,
                                                    reduce=False)
                        consistency_loss = consistency_loss.sum(dim=1,
                                                                keepdim=True)
                    else:
                        raise ValueError(
                            'Unknown consistency loss function {}'.format(
                                cons_loss_fn))

                    # Apply consistency loss mask and take the mean over pixels and images
                    consistency_loss = (consistency_loss * loss_mask).mean()

                    # Modulate with rampup if desired
                    if rampup > 0:
                        consistency_loss = consistency_loss * ramp_val

                    # Weight the consistency loss and back-prop
                    unsup_loss = consistency_loss * cons_weight
                    unsup_loss.backward()

                    consistency_loss_acc += float(consistency_loss.detach())

                    n_unsup_batches += 1

            student_optim.step()
            if teacher_optim is not None:
                teacher_optim.step()

            sup_loss_acc += float(sup_loss.detach())
            n_sup_batches += 1
            iter_i += 1

        sup_loss_acc /= n_sup_batches
        if n_unsup_batches > 0:
            consistency_loss_acc /= n_unsup_batches
            conf_rate_acc /= n_unsup_batches

        eval_net.eval()

        if ds_src is not ds_tgt:
            src_iou_eval = evaluation.EvaluatorIoU(ds_src.num_classes,
                                                   bin_fill_holes)
            with torch.no_grad():
                for batch in src_val_iter:
                    batch_x = batch['image'].to(torch_device)
                    batch_y = batch['labels'].numpy()

                    logits = eval_net(batch_x)
                    pred_y = torch.argmax(logits, dim=1).detach().cpu().numpy()

                    for sample_i in range(len(batch_y)):
                        src_iou_eval.sample(batch_y[sample_i, 0],
                                            pred_y[sample_i],
                                            ignore_value=255)

            src_iou = src_iou_eval.score()
            src_miou = src_iou.mean()
        else:
            src_iou_eval = src_iou = src_miou = None

        tgt_iou_eval = evaluation.EvaluatorIoU(ds_tgt.num_classes,
                                               bin_fill_holes)
        with torch.no_grad():
            for batch in tgt_val_iter:
                batch_x = batch['image'].to(torch_device)
                batch_y = batch['labels'].numpy()

                logits = eval_net(batch_x)
                pred_y = torch.argmax(logits, dim=1).detach().cpu().numpy()

                for sample_i in range(len(batch_y)):
                    tgt_iou_eval.sample(batch_y[sample_i, 0],
                                        pred_y[sample_i],
                                        ignore_value=255)

        tgt_iou = tgt_iou_eval.score()
        tgt_miou = tgt_iou.mean()

        t2 = time.time()

        if ds_src is not ds_tgt:
            print(
                'Epoch {}: took {:.3f}s, TRAIN clf loss={:.6f}, consistency loss={:.6f}, conf rate={:.3%}, '
                'SRC VAL mIoU={:.3%}, TGT VAL mIoU={:.3%}'.format(
                    epoch_i + 1, t2 - t1, sup_loss_acc, consistency_loss_acc,
                    conf_rate_acc, src_miou, tgt_miou))
            print('-- SRC {}'.format(', '.join(
                ['{:.3%}'.format(x) for x in src_iou])))
            print('-- TGT {}'.format(', '.join(
                ['{:.3%}'.format(x) for x in tgt_iou])))
        else:
            print(
                'Epoch {}: took {:.3f}s, TRAIN clf loss={:.6f}, consistency loss={:.6f}, conf rate={:.3%}, VAL mIoU={:.3%}'
                .format(epoch_i + 1, t2 - t1, sup_loss_acc,
                        consistency_loss_acc, conf_rate_acc, tgt_miou))
            print('-- {}'.format(', '.join(
                ['{:.3%}'.format(x) for x in tgt_iou])))

    if save_model:
        model_path = os.path.join(submit_config.run_dir, "model.pth")
        torch.save(eval_net, model_path)

    if save_preds:
        out_dir = os.path.join(submit_config.run_dir, 'preds')
        os.makedirs(out_dir, exist_ok=True)
        with torch.no_grad():
            for batch in tgt_val_loader:
                batch_x = batch['image'].to(torch_device)
                batch_ndx = batch['index'].numpy()

                logits = eval_net(batch_x)
                pred_y = torch.argmax(logits, dim=1).detach().cpu().numpy()

                for sample_i, sample_ndx in enumerate(batch_ndx):
                    ds_tgt.save_prediction_by_index(
                        out_dir, pred_y[sample_i].astype(np.uint32),
                        sample_ndx)
    else:
        out_dir = None

    if test_loader is not None:
        test_iou_eval = evaluation.EvaluatorIoU(ds_tgt.num_classes,
                                                bin_fill_holes)
        with torch.no_grad():
            for batch in test_loader:
                batch_x = batch['image'].to(torch_device)
                batch_ndx = batch['index'].numpy()

                logits = eval_net(batch_x)
                pred_y = torch.argmax(logits, dim=1).detach().cpu().numpy()

                for sample_i, sample_ndx in enumerate(batch_ndx):
                    if save_preds:
                        ds_tgt.save_prediction_by_index(
                            out_dir, pred_y[sample_i].astype(np.uint32),
                            sample_ndx)
                    test_iou_eval.sample(batch_y[sample_i, 0],
                                         pred_y[sample_i],
                                         ignore_value=255)

        test_iou = test_iou_eval.score()
        test_miou = test_iou.mean()

        print('FINAL TEST: mIoU={:.3%}'.format(test_miou))
        print('-- TEST {}'.format(', '.join(
            ['{:.3%}'.format(x) for x in test_iou])))
    def loss(self, input, target, weights):
        if self.config.use_IS:
            loss = torch.abs(target - input) * torch.from_numpy(weights).to(device=self.config.device)
            return loss.mean()

        return F.smooth_l1_loss(input, target)
Beispiel #57
0
    def learn(self, epoch_index):
        self.training_step += 1
        """Update policy and value parameters using given batch of experience tuples.
        Q_targets = r + γ * critic_target(next_state, actor_target(next_state))
        where:
            actor_target(state) -> action
            critic_target(state, action) -> Q-value

        Params
        ======
            experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples 
            gamma (float): discount factor
        """
        state = torch.tensor([t.state for t in self.memory],
                             dtype=torch.float).to(device)
        action = torch.tensor([t.action for t in self.memory],
                              dtype=torch.float).to(device)
        reward = torch.tensor([t.reward for t in self.memory],
                              dtype=torch.float).to(device).unsqueeze(-1)
        next_state = torch.tensor([t.next_state for t in self.memory],
                                  dtype=torch.float).to(device)
        old_action_log_prob = torch.tensor([t.a_log_prob for t in self.memory],
                                           dtype=torch.float).to(device)

        #print('reward full pure shape', reward.shape)

        reward = (reward - reward.mean()) / (reward.std() + 0.00001)

        #print('reward full scaled shape', reward.shape)

        target_v = []

        for mem_index in range(len(state)):
            with torch.no_grad():
                target_v.append(reward[mem_index] + self.gamma *
                                self.critic_local(next_state[mem_index]))

        target_v = torch.stack(target_v).to(device)
        #print('target_v.shape',target_v.shape)

        advantage = []
        for mem_index in range(len(state)):
            with torch.no_grad():
                advantage.append(target_v[mem_index] -
                                 self.critic_local(state[mem_index]))
        advantage = torch.stack(advantage).to(device)
        #print('advantage shape', advantage.shape)

        for i in range(PPO_UPDATE_PERIOD):
            for index in BatchSampler(
                    SubsetRandomSampler(range(len(self.memory))), BATCH_SIZE,
                    False):
                #states, actions, old_action_probs, rewards, next_states, dones = self.memory.sample()
                #print('index', index)
                #print('state_indexed_shape', state[index].shape)
                (mu, sigma) = self.actor_local(state[index])
                dist = Normal(mu, sigma)

                #print('action full size',action.shape)
                #print('action indexed',action[index])
                action_prob = dist.log_prob(action[index])

                #print('action_prob', action_prob.shape, '\nold_action_prob shape', old_action_log_prob.shape)

                ratio = torch.exp(action_prob - old_action_log_prob[index])

                #print('ratio_shape',ratio.shape)
                #print('ratio_shape', advantage[index].shape)

                L_left = ratio * advantage[index]
                L_right = torch.clamp(ratio, 1 - self.clip_param - 0.5, 1 +
                                      self.clip_param + 0.5) * advantage[index]

                #update actor network
                action_loss = -torch.min(L_left, L_right).mean()
                self.actor_optimizer.zero_grad()
                action_loss.backward()
                nn.utils.clip_grad_norm_(self.actor_local.parameters(), 1.0)
                self.actor_optimizer.step()

                #update critic optimizer
                value_loss = F.smooth_l1_loss(self.critic_local(state[index]),
                                              target_v[index])
                if i == 5:
                    print('val_loss', value_loss)
                self.critic_optimizer.zero_grad()
                value_loss.backward()
                nn.utils.clip_grad_norm_(self.critic_local.parameters(), 1.0)
                self.critic_optimizer.step()

            print('PPO step', i)

        if (epoch_index % 10 == 0) & (epoch_index > 1):
            del self.memory[:]
Beispiel #58
0
    def forward(self, predicts, targets):
        # 回归信息,置信度,先验框
        loc_data, conf_data, priors = predicts
        # print(conf_data.shape) torch.Size([batch_size, 8732, num_classes+1])
        # print(conf_data[0][2]) tensor([ 0.5261, -0.1007,  0.1242, -0.0905,  0.0839, -0.7308,  0.0174],device='cuda:0', grad_fn=<SelectBackward>)
        # 计算出batch_size
        num = loc_data.size(0)
        # print(loc_data.shape) torch.Size([1, 8732, 4])
        # print('1',priors.shape) torch.Size([8732, 4])
        # 取出所有的先验框
        priors = priors[:loc_data.size(1), :]  # 这一步就是保证priors的个数是和loc_data、conf_data的大小一样,其实本身就是一样的
        # print('2',priors.shape) torch.Size([8732, 4])
        # 先验框的数量
        num_priors = (priors.size(0))
        # 创建一个tensor进行处理
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)


        loc_t = loc_t.cuda()
        conf_t = conf_t.cuda()
        priors = priors.cuda()

        for idx in range(num):
            # 获得框
            truths = targets[idx][:, :-1]  # target存放的很多行,每一行就是一张照片,里面包括了照片里面的每一个框和对应的标签
            # 获得标签
            labels = targets[idx][:, -1]
            # 获得先验框
            defaults = priors
            # 找到标签对应的先验框
            match(self.threshold, truths, defaults, self.variance, labels,
                  # 每一个标签都对应了先验框,虽然这里没有返回值,但是loc_t和conf_t是一个tensor,函数里面对其改变了值,主函数也会跟着变化
                  loc_t, conf_t, idx)
        # 转化成Variable
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        # 所有conf_t>0的地方,代表内部包含物体
        pos = conf_t > 0  # conf_t 有8732行,找到大于0的个数,相当于一张图片中8732个先验框中有pos个框是正样本
        # print(pos.shape) torch.Size([1, 8732])
        # 求和得到每一个图片内部有多少正样本
        num_pos = pos.sum(dim=1, keepdim=True)
        # print(num_pos) tensor([[12]], device='cuda:0')
        # 计算回归loss,只是对正样本进行求解回归loss
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        # print(pos_idx)
        loc_p = loc_data[pos_idx].view(-1, 4)  # 此时loc_data和pos_idx维度一样,选择出positive的loc
        # print(loc_p.shape)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        # 转化形式
        batch_conf = conf_data.view(-1, self.num_classes)
        # 你可以把softmax函数看成一种接受任何数字并转换为概率分布的非线性方法
        # 获得每个框预测到真实框的类的概率
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
        loss_c = loss_c.view(num, -1)

        loss_c[pos] = 0
        # 获得每一张图新的softmax的结果
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        # 计算每一张图的正样本数量
        num_pos = pos.long().sum(1, keepdim=True)
        # 限制负样本数量
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # 计算正样本的loss和负样本的loss
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(-1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        N = num_pos.data.sum()
        loss_l /= N
        loss_c /= N
        total_loss = loss_l + loss_c
        losses = [loss_l,loss_c,total_loss]
        return LossTuple(*losses)
Beispiel #59
0
            a_lst.append(a)
            r_lst.append(r / 100.0)
            mask_lst.append(1 - done)

            s = s_prime
            step_idx += 1

        s_final = torch.from_numpy(s_prime).float()
        v_final = model.v(s_final).detach().clone().numpy()
        td_target = compute_target(v_final, r_lst, mask_lst)

        td_target_vec = td_target.reshape(-1)
        s_vec = torch.tensor(s_lst).float().reshape(
            -1, 4)  # 4 == Dimension of state
        a_vec = torch.tensor(a_lst).reshape(-1).unsqueeze(1)
        advantage = td_target_vec - model.v(s_vec).reshape(-1)

        pi = model.pi(s_vec, softmax_dim=1)
        pi_a = pi.gather(1, a_vec).reshape(-1)
        loss = -(torch.log(pi_a) * advantage.detach()).mean() + \
               F.smooth_l1_loss(model.v(s_vec).reshape(-1), td_target_vec)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step_idx % PRINT_INTERVAL == 0:
            test(step_idx, model)

    envs.close()
Beispiel #60
0
    def train(self):
        if len(self.memory) < self.minibatch_size:
            return
        for i in range(0, len(self.memory), self.minibatch_size):
            #transitions = self.memory.sample(self.minibatch_size)
            transitions = self.memory.pull(self.minibatch_size)

            print('Batch train: ' + str(int(i / self.minibatch_size) + 1) +
                  "/" + str(int(len(self.memory) / self.minibatch_size) + 1))

            aux_transitions = []
            for t in transitions:
                proc_sgray = torch.Tensor(self.state_size, self.state_dim,
                                          self.state_dim).to(self.device)
                proc_sdepth = torch.Tensor(self.state_size, self.state_dim,
                                           self.state_dim).to(self.device)
                proc_next_sgray = torch.Tensor(self.state_size, self.state_dim,
                                               self.state_dim).to(self.device)
                proc_next_sdepth = torch.Tensor(self.state_size,
                                                self.state_dim,
                                                self.state_dim).to(self.device)
                count = 0
                for sgray, sdepth, next_sgray, next_sdepth in zip(
                        t.sgray, t.sdepth, t.next_sgray, t.next_sdepth):
                    proc_sgray[count] = self.get_tensor_from_image(sgray)
                    proc_sdepth[count] = self.get_tensor_from_image(sdepth)
                    proc_next_sgray[count] = self.get_tensor_from_image(
                        next_sgray)
                    proc_next_sdepth[count] = self.get_tensor_from_image(
                        next_sdepth)
                    count += 1

                proc_sgray = proc_sgray.unsqueeze(0).to(self.device)
                proc_sdepth = proc_sdepth.unsqueeze(0).to(self.device)
                proc_next_sgray = proc_next_sgray.unsqueeze(0).to(self.device)
                proc_next_sdepth = proc_next_sdepth.unsqueeze(0).to(
                    self.device)
                #('sgray','sdepth','action','next_sgray','next_sdepth','reward')
                one_transition = Transition(proc_sgray, proc_sdepth, t.action,
                                            proc_next_sgray, proc_next_sdepth,
                                            t.reward)
                aux_transitions.append(one_transition)
            transitions = aux_transitions

            # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for
            # detailed explanation). This converts batch-array of Transitions
            # to Transition of batch-arrays.
            batch = Transition(*zip(*transitions))
            #print(batch.sgray)

            # Compute a mask of non-final states and concatenate the batch elements
            # (a final state would've been the one after which simulation ended)
            gray_non_final_mask = torch.tensor(tuple(
                map(lambda s: s is not None, batch.next_sgray)),
                                               device=self.device,
                                               dtype=torch.bool)
            gray_non_final_next_states = torch.cat(
                [s for s in batch.next_sgray if s is not None])

            depth_non_final_mask = torch.tensor(tuple(
                map(lambda s: s is not None, batch.next_sdepth)),
                                                device=self.device,
                                                dtype=torch.bool)
            depth_non_final_next_states = torch.cat(
                [s for s in batch.next_sdepth if s is not None])
            sgray_batch = torch.cat(batch.sgray)
            sdepth_batch = torch.cat(batch.sdepth)

            action_batch = torch.cat(batch.action)
            reward_batch = torch.cat(batch.reward)

            # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
            # columns of actions taken. These are the actions which would've been taken
            # for each batch state according to policy_net
            sgray_action_values = self.gray_policy_net(sgray_batch).gather(
                1, action_batch)
            sdepth_action_values = self.depth_policy_net(sdepth_batch).gather(
                1, action_batch)

            # Compute V(s_{t+1}) for all next states.
            # Expected values of actions for non_final_next_states are computed based
            # on the "older" target_net; selecting their best reward with max(1)[0].
            # This is merged based on the mask, such that we'll have either the expected
            # state value or 0 in case the state was final.
            next_sgray_values = torch.zeros(self.minibatch_size,
                                            device=self.device)
            next_sgray_values[gray_non_final_mask] = self.gray_target_net(
                gray_non_final_next_states).max(1)[0].detach()

            next_sdepth_values = torch.zeros(self.minibatch_size,
                                             device=self.device)
            next_sdepth_values[depth_non_final_mask] = self.depth_target_net(
                depth_non_final_next_states).max(1)[0].detach()
            # Compute the expected Q values
            expected_sgray_action_values = (next_sgray_values *
                                            self.discount) + reward_batch
            expected_sdepth_action_values = (next_sdepth_values *
                                             self.discount) + reward_batch

            # Compute Huber loss
            gray_loss = F.smooth_l1_loss(
                sgray_action_values, expected_sgray_action_values.unsqueeze(1))
            depth_loss = F.smooth_l1_loss(
                sdepth_action_values,
                expected_sdepth_action_values.unsqueeze(1))

            # Optimize the model
            self.gray_optimizer.zero_grad()
            gray_loss.backward()
            for param in self.gray_policy_net.parameters():
                param.grad.data.clamp_(-1, 1)
            self.gray_optimizer.step()

            # Optimize the model
            self.depth_optimizer.zero_grad()
            depth_loss.backward()
            for param in self.depth_policy_net.parameters():
                param.grad.data.clamp_(-1, 1)
            self.depth_optimizer.step()