def forward(self, inp, mask):
        if self.att:
            weights_unnorm = self.att(inp).squeeze(-1)
            weights_unnorm = weights_unnorm.masked_fill_(mask, self.pre_softmax_mask_fill)
            weights = F.softmax(weights_unnorm, dim=1)
        else:
            weights_unnorm = mask.logical_not().type_as(inp)
            weights = weights_unnorm / weights_unnorm.sum(dim=1)[:, None]

        self.last_weights = weights.detach().cpu()
        if self.agg_dims:
            to_agg = self.agg(inp)
        else:
            to_agg = inp
        self.last_features = to_agg.detach().cpu()
        weighted = to_agg * weights.unsqueeze(-1).expand_as(to_agg)
        res = weighted.sum(1)
        return res
Exemple #2
0
 def forward(self, x):
     x = x[::-1]  # to lowest resolution first
     top_down_feature = None
     for i, feature in enumerate(x):
         feature = self.backbone_feature_reduction[i](feature)
         if i == 0:
             top_down_feature = feature
         else:
             upsampled_feature = F.interpolate(top_down_feature,
                                               size=feature.size()[-2:],
                                               mode='bilinear',
                                               align_corners=True)
             if i < len(x) - 1:
                 top_down_feature = self.top_down_feature_reduction[i - 1](
                     feature + upsampled_feature)
             else:
                 top_down_feature = feature + upsampled_feature
     return top_down_feature
Exemple #3
0
def metrop_step(grid, conv, beta):
    rix = np.random.randint(0, high=3, size=2)
    grid = T.roll(grid, shifts=tuple(rix), dims=(2,3))
    
    dE = 2*conv(grid)[0,0]
    
    scatter_ixs = [np.arange(1, d-1, 3) for d in grid.shape[2:]]
    ixs = (0,0) + np.ix_(*scatter_ixs)
    sub = grid[ixs]
    dE = sub*(dE + 2*conv.mu)
    
    acc_prob = T.exp(-beta*F.relu(dE))
    random = T.rand_like(acc_prob)
    sub[acc_prob > random] *= -1
    grid[ixs] = sub
    dE[acc_prob < random] *= 0
    sub[acc_prob < random] *= 0
    return grid, float(dE.sum().detach()), 2*float(sub.sum().detach())
Exemple #4
0
 def forward(self, node, node_mask, adj_mat):
     out = self._compute_one_direction(node, self.fc_dir_weight[1],
                                       self.fc_dir_bias2, adj_mat,
                                       range(2, self.num_relations + 1),
                                       self.fc_gate_weight[1],
                                       self.fc_gate_bias2)
     adj_mat = adj_mat.transpose(-1, -2)
     out += self._compute_one_direction(node, self.fc_dir_weight[2],
                                        self.fc_dir_bias3, adj_mat,
                                        range(2, self.num_relations + 1),
                                        self.fc_gate_weight[2],
                                        self.fc_gate_bias3)
     # adj_mat = torch.eye(adj_mat.size(1)).type_as(adj_mat)
     out += self._compute_one_direction(node, self.fc_dir_weight[0],
                                        self.fc_dir_bias1, adj_mat, [1],
                                        self.fc_gate_weight[0],
                                        self.fc_gate_bias1)
     return F.relu(out)
Exemple #5
0
def _compute_embedding_grad_sample(
    layer: nn.Embedding, A: torch.Tensor, B: torch.Tensor, batch_dim: int = 0
) -> None:
    """
    Computes per sample gradients for ``nn.Embedding`` layer

    Args:
        layer: Layer
        A: Activations
        B: Backpropagations
        batch_dim: Batch dimension position
    """
    one_hot = F.one_hot(A, num_classes=layer.weight.shape[0])
    gs = torch.einsum("n...i,n...j->n...ij", one_hot, B)

    _create_or_extend_grad_sample(
        layer.weight, torch.einsum("n...ij->nij", gs), batch_dim
    )
Exemple #6
0
    def forward(self, x):
        h_diff = self.conv1_tdn(roll(x, shift=-1, dim=2) - x)
        h_diff = self.maxpool(h_diff)  # x112 ->  x56
        h = self.conv1(x)   # x224 -> x112
        h = self.maxpool(h)  # x112 ->  x56
        h_diff = self.conv2_tdn(h_diff + (roll(h, shift=-1, dim=2) - h))
        h = self.conv2(h)  # x56 ->  x56
        h_diff = self.conv3_tdn(h_diff + (roll(h, shift=-1, dim=2) - h))
        h = self.conv3(h)  # x56 ->  x28
        h_diff = self.conv4_tdn(h_diff + (roll(h, shift=-1, dim=2) - h))
        h = self.conv4(h)  # x28 ->  x14
        h_diff = self.conv5_tdn(h_diff + (roll(h, shift=-1, dim=2) - h))
        h = self.conv5(h)  # x14 ->   x7

        h = self.tail(h)
        h_diff = self.tail_tdn(h_diff)
        coords, heatmaps, probabilities = None, None, None
        if self.num_coords > 0:
            coords, heatmaps, probabilities = self.coord_layers(h)

        if not self.training and self.ensemble_eval: # not fully supported yet
            h_ens = F.avg_pool3d(h, (1, self.s_dim_in//32, self.s_dim_in//32), (1, 1, 1))
            h_ens = h_ens.view(h_ens.shape[0], h_ens.shape[1], -1)
            h_ens = [self.classifier_list(h_ens[:, :, ii]) for ii in range(h_ens.shape[2])]

        h = self.globalpool(h)
        h = h.view(h.shape[0], -1)
        h_out = self.classifier_list(h)

        h_diff = self.globalpool(h_diff)
        h_diff = h_diff.view(h_diff.shape[0], -1)
        h_diff_out = self.classifier_list_tdn(h_diff)

        objects = None
        # if self.num_objects:
        #     objects = [self.__getattr__('object_presence_layer_{}'.format(ii))(h) for ii in range(len(self.num_objects))]
        cat_obj = None
        # if self.num_obj_cat:
        #     cat_obj = [self.__getattr__('objcat_presence_layer_{}'.format(ii))(h) for ii in range(len(self.num_obj_cat))]
        if not self.training and self.ensemble_eval:
            h_out = [h_out, h_ens]
        else:
            h_out = [h_out, h_diff_out]
        return h_out, coords, heatmaps, probabilities, objects, cat_obj
def interpolate_batch(frames, factor):

    frame0 = torch.stack(frames[:-1])
    frame1 = torch.stack(frames[1:])

    i0 = frame0.to(device)
    i1 = frame1.to(device)
    ix = torch.cat([i0, i1], dim=1)

    flow_out = flow(ix)
    f01 = flow_out[:, :2, :, :]
    f10 = flow_out[:, 2:, :, :]

    frame_buffer = []
    for i in range(1, factor):
        t = i / factor
        temp = -t * (1 - t)
        co_eff = [temp, t * t, (1 - t) * (1 - t), temp]

        ft0 = co_eff[0] * f01 + co_eff[1] * f10
        ft1 = co_eff[2] * f01 + co_eff[3] * f10

        gi0ft0 = back_warp(i0, ft0)
        gi1ft1 = back_warp(i1, ft1)

        iy = torch.cat((i0, i1, f01, f10, ft1, ft0, gi1ft1, gi0ft0), dim=1)
        io = interp(iy)

        ft0f = io[:, :2, :, :] + ft0
        ft1f = io[:, 2:4, :, :] + ft1
        vt0 = F.sigmoid(io[:, 4:5, :, :])
        vt1 = 1 - vt0

        gi0ft0f = back_warp(i0, ft0f)
        gi1ft1f = back_warp(i1, ft1f)

        co_eff = [1 - t, t]

        ft_p = (co_eff[0] * vt0 * gi0ft0f + co_eff[1] * vt1 * gi1ft1f) / \
               (co_eff[0] * vt0 + co_eff[1] * vt1)

        frame_buffer.append(ft_p)

    return frame_buffer
Exemple #8
0
    def forward(self, s, enc_output):
        """
        :param s: (batch_size, dec_hid_dim)
        :param enc_output: (seq_len, batch_size, enc_hidden_dim)
        :return:
        """
        batch_size = enc_output.shape[1]
        src_len = enc_output.shape[0]

        # 对于Linear层,我们第一个维度需要是batch_size
        # 将s维度变为 (batch_size, seq_len, dec_hid_dim)
        # enc_output维度 (batch_size, seq_len, enc_hidden_dim)
        s = s.unsqueeze(1).repeat(1,src_len,1)
        enc_output = enc_output.transpose(0,1)

        # (batch_size, seq_len, 1).squeeze(2)
        score = self.v(torch.tanh(self.attn(torch.cat((s,enc_output),dim=2)))).squeeze(2)

        return F.softmax(score)
Exemple #9
0
def test(model, loader):
    global device
    model.eval()
    loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)

            output = model(data)
            loss += F.nll_loss(output, target,
                               reduction='sum').item()  # sum up batch loss
            pred = output.max(
                dim=1,
                keepdim=True)[1]  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).cpu().sum().item()

    loss /= len(loader.dataset)
    return loss, correct / len(loader.dataset)
Exemple #10
0
    def forward(self, lstm_output):
        # page 1482 top right
        # eq 5, with tanh (from our report)
        u_it = torch.tanh(self.dropout(self.word_attn(lstm_output)))
        # eq 6
        a_it = F.softmax(self.dropout(self.context_vec(u_it)), dim=1)
        # eq 7
        attns = torch.Tensor().to(device)
        for (h, a) in zip(lstm_output, a_it):
            h_i = a * h
            h_i = h_i.unsqueeze(0)
            # add them to the attention vectors
            attns = torch.cat([attns, h_i])

        s_i = torch.sum(self.dropout(attns), 1)
        # unsqueeze to give back to FC layers
        s_i = s_i.unsqueeze(0)

        return s_i, attns
Exemple #11
0
 def __call__(self, outputs, targets):
     loss_dice = 0
     eps = 1e-7
     smooth = 1.
     outputs = F.softmax(outputs, dim=1)
     for cls in range(self.num_classes):
         jaccard_target = (targets == cls).float()
         jaccard_output = outputs[:, cls]
         intersection = (jaccard_output * jaccard_target).sum()
         if self.class_weights is not None:
             w = self.class_weights[cls]
         else:
             w = 1.
         union = jaccard_output.sum() + jaccard_target.sum()
         #                loss -= torch.log((intersection + eps) / (union - intersection + eps)) * self.jaccard_weight
         loss_dice += w * (1 - (2. * intersection + smooth) /
                           (union + smooth + eps))
         # three kinds of loss formulas: (1) 1 - iou (2) -iou (3) -torch.log(iou)
     return loss_dice / self.num_classes
Exemple #12
0
    def forward(self, h, encoder_out):
        attended_encoder_out = torch.zeros_like(encoder_out)
        for seq_index in range(self.max_seq_len):
            cat_for_attn = torch.cat((h[seq_index], encoder_out[seq_index]), 1)
            attn = self.attn(cat_for_attn)
            attn = F.softmax(attn, dim=1)
            attn_applied = torch.bmm(attn.unsqueeze(1),
                                     torch.transpose(encoder_out, 0, 1))
            temp_encoder_out = torch.cat((h[seq_index], attn_applied[:, 0, :]),
                                         1)
            attended_encoder_out[seq_index] = self.attn_combine(
                temp_encoder_out)

        batch_size = h.size(1)
        h0 = torch.zeros(1, batch_size, self.hidden_size, device=h.device)
        c0 = torch.zeros(1, batch_size, self.hidden_size, device=h.device)
        decoder_out, (ht, ct) = self.decoder(h, (h0, c0))

        return decoder_out
 def forward(self, x):
     out = F.relu(self.conv1(x))
     out = F.max_pool2d(out, 2)
     out = F.relu(self.conv2(out))
     out = F.max_pool2d(out, 2)
     out = out.view(out.size(0), -1)
     # print(out.data.size())
     out = F.relu(self.fc1(out))
     out = F.relu(self.fc2(out))
     out = out.view(out.size(0), -1)
     # print(out.data.size())
     out = self.fc3(out)
     # out = self.sig(out)
     return out
Exemple #14
0
    def forward(self, x):
        h = self.conv1(x)   # x224 -> x112
        h = self.maxpool(h)  # x112 ->  x56
        h = self.conv2(h)  # x56 ->  x56
        h = self.conv3(h)  # x56 ->  x28
        h = self.conv4(h)  # x28 ->  x14

        # local branch
        h2 = self.conv5_2(h)
        h2 = F.interpolate(h2, scale_factor=(1, 2, 2), mode='trilinear')
        h2 = torch.cat([h, h2], dim=1)

        h = self.conv5(h)  # x14 ->   x7
        h = self.tail(h)
        coords, heatmaps, probabilities = None, None, None
        if self.num_coords > 0:
            coords, heatmaps, probabilities = self.coord_layers(h)

        # if not self.training and self.ensemble_eval: # not fully supported yet
        #     h_ens = F.avg_pool3d(h, (1, self.s_dim_in//32, self.s_dim_in//32), (1, 1, 1))
        #     h_ens = h_ens.view(h_ens.shape[0], h_ens.shape[1], -1)
        #     h_ens = [self.classifier_list(h_ens[:, :, ii]) for ii in range(h_ens.shape[2])]

        h_ch, h_max = self.dfb_classifier_list(h2)
        # h_ch = self.dfb_classifier_list(h)

        h = self.globalpool(h)
        h = h.view(h.shape[0], -1)
        h_out = self.classifier_list(h)

        objects = None
        # if self.num_objects:
        #     objects = [self.__getattr__('object_presence_layer_{}'.format(ii))(h) for ii in range(len(self.num_objects))]
        cat_obj = None
        # if self.num_obj_cat:
        #     cat_obj = [self.__getattr__('objcat_presence_layer_{}'.format(ii))(h) for ii in range(len(self.num_obj_cat))]
        # if not self.training and self.ensemble_eval:
        #     return h_out, h_ens, coords, heatmaps, probabilities, objects, cat_obj
        h_out = [h_out, h_ch, h_max]
        # h_out = h_ch
        # h_out = [out + ch + hmax for out, ch, hmax in zip(h_out, h_ch, h_max)]
        return h_out, coords, heatmaps, probabilities, objects, cat_obj
Exemple #15
0
 def attention(
     self,
     char_embed_matrix,
     batch_size,
     hid,
 ):
     att = self.v_c(
         torch.tanh(
             self.Wchar(char_embed_matrix.contiguous().view(
                 -1, char_embed_matrix.size(2))) +
             self.Wh(hid.squeeze(0))))  # (b*6,2h) + (b*6,2h) --> (b*6,1)
     # print(att.size())
     # print(hid.size())
     attn_score = F.softmax(att.view(batch_size,
                                     hid.squeeze(0).size()[-1]),
                            dim=1)  # (b, 1)
     # char_attn = torch.bmm(attn_score.unsqueeze(0), hid)  # [b x 1 x 6] * [b x 6 x hidden*2]
     char_attn = attn_score.unsqueeze(0) * hid
     char_attn = char_attn.squeeze(1)  # [x b hidden*2]
     return char_attn
    def forward(self, input, sample=False, calculate_log_probs=False):
        # 1. Sample weights and bias from variational posterior
        if self.training or sample:
            weight = self.weight.sample()
            bias = self.bias.sample()
        else:
            weight = self.weight.mu
            bias = self.bias.mu

        # 2. Update log_prior and log_posterior according to current approximation
        if self.training or calculate_log_probs:
            self.log_prior = self.weight_prior.log_prob(
                weight) + self.bias_prior.log_prob(bias)
            self.log_variational_posterior = self.weight.log_prob(
                weight) + self.bias.log_prob(bias)
        else:
            self.log_prior, self.log_variational_posterior = 0, 0

        # 3. Do a forward pass through the layer
        return F.linear(input, weight, bias)
Exemple #17
0
    def forward(self, h):
        probabilities = torch.zeros(
            0, device=h.device
        )  # torch.nn.ReLU(self.probability(torch.squeeze(h)))
        # 1. Use a 1x1 conv to get one unnormalized heatmap per location
        if self.temporal_interpolate > 1:
            h = F.interpolate(h,
                              scale_factor=(self.temporal_interpolate, 1, 1),
                              mode='trilinear')
        unnormalized_heatmaps = self.hm_conv(h)
        # 2. Transpose the heatmap volume to keep the temporal dimension in the volume
        unnormalized_heatmaps.transpose_(2, 1).transpose_(1, 0)
        # 3. Normalize the heatmaps
        heatmaps = [dsntnn.flat_softmax(uhm) for uhm in unnormalized_heatmaps]
        # 4. Calculate the coordinates
        coords = [dsntnn.dsnt(hm) for hm in heatmaps]
        heatmaps = torch.stack(heatmaps, 1)
        coords = torch.stack(coords, 1)

        return coords, heatmaps, probabilities
Exemple #18
0
def lovasz_loss_flat(logits, labels, error_func):
    """
    Binary Lovasz hinge loss
      logits: [P] Variable, logits at each prediction (between -\infty and +\infty)
      labels: [P] Tensor, binary ground truth labels (0 or 1)
      ignore: label to ignore
    """
    if len(labels) == 0:
        # only void pixels, the gradients should be 0
        return logits.sum() * 0.

    errors = error_func(logits, labels)

    errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
    perm = perm.data
    gt_sorted = labels[perm]
    grad = lovasz_grad(gt_sorted)
    # loss = torch.dot(F.relu(errors_sorted), Variable(grad))
    loss = torch.dot(F.elu(errors_sorted) + 1, Variable(grad))
    return loss
Exemple #19
0
    def update_from_memory(self):
        if isinstance(self.memory, WeightedMemory):
            tree_idx, batch, sample_weights = self.memory.sample(self.batch_size)
            sample_weights = torch.tensor(sample_weights, device=device)
        else:
            batch = self.memory.sample(self.batch_size)
        batch_t = Transition(*zip(*batch))  # transposed batch
        s_batch, a_batch, r_batch, done_batch, s_next_batch = batch_t
        s_batch = torch.cat(s_batch)
        a_batch = torch.stack(a_batch)
        r_batch = torch.stack(r_batch).view(-1, 1)
        s_next_batch = torch.cat(s_next_batch)
        done_batch = torch.stack(done_batch).view(-1, 1)
        q = self._state_action_value(s_batch, a_batch)

        # Get Actual Q values

        double_actions = self.policy_net(s_next_batch).max(1)[1].detach()  # used for double q learning
        q_next = self._state_action_value(s_next_batch, double_actions)

        q_next_actual = (~done_batch) * q_next  # Removes elements thx`at are done
        q_target = r_batch + self.gamma * q_next_actual
        ###TEST if clamping works or is even good practise
        q_target = q_target.clamp(-1, 1)
        ###/TEST

        if isinstance(self.memory, WeightedMemory):
            absolute_loss = torch.abs(q - q_target).detach().cpu().numpy()
            loss = weighted_smooth_l1_loss(
                q, q_target, sample_weights
            )  # TODO fix potential non-linearities using huber loss
            self.memory.batch_update(tree_idx, absolute_loss)

        else:
            loss = F.smooth_l1_loss(q, q_target)

        self.optim.zero_grad()
        loss.backward()
        for param in self.policy_net.parameters():  # see if this ends up doing anything - should just be relu
            param.grad.data.clamp_(-1, 1)
        self.optim.step()
    def forward(self, s):
        x = self.preprocess(s)
        x = F.leaky_relu(self.bn1(self.conv1(x)))
        x = F.leaky_relu(self.bn2(self.conv2(x)))
        x = F.leaky_relu(self.bn3(self.conv3(x)))
        x = F.leaky_relu(self.bn4(self.conv4(x)))
        x = F.leaky_relu(self.bn5(self.conv5(x)))
        x = F.leaky_relu(self.bn6(self.conv6(x)))

        value = self.value(self.value_fc(x.view(x.size(0), -1)))
        advantage = self.advantage(self.advantage_fc(x.view(x.size(0), -1)))

        output = value + (advantage -
                          torch.mean(advantage, dim=1, keepdim=True))
        return output
Exemple #21
0
    def forward(self, s):
        x = self.preprocess(s)
        x = F.leaky_relu(self.bn1(self.conv1(x)))
        x = F.leaky_relu(self.bn2(self.conv2(x)))
        x = F.leaky_relu(self.bn3(self.conv3(x)))
        # x = x.view(x.size(0), -1)

        policy = F.leaky_relu(self.policy_bn(self.conv_policy(x))).view(x.size(0), -1)
        policy = self.softmax(self.linear_policy(policy))

        value = F.leaky_relu(self.value_bn(self.conv_value(x))).view(x.size(0), -1)
        value = F.leaky_relu(self.fc_value(value))
        value = torch.tanh(self.linear_output(value))

        return policy, value
Exemple #22
0
    def forward(self, query, keys, keys_length):
        """
        Parameters
        ----------
        query: 2D tensor, [B, H]
        kerys: 3D tensor, [B, T, H]
        keys_length: 1D tensor, [B]

        Returns
        -------
        outputs: 2D tensor, if return_scores=False [B, H], otherwise [B, T]
        """
        batch_size, max_length, dim = keys.size()

        query = query.unsqueeze(1).expand(-1, max_length, -1)

        din_all = torch.cat([query, keys, query - keys, query * keys], dim=-1)

        din_all = din_all.view(batch_size * max_length, -1)

        outputs = self.mlp(din_all)

        outputs = self.fc(outputs).view(batch_size, max_length)  # [B, T]

        # Scale
        outputs = outputs / (dim**0.5)

        # Mask
        mask = (torch.arange(max_length, device=keys_length.device).repeat(
            batch_size, 1) < keys_length.view(-1, 1))
        outputs[~mask] = -np.inf

        # Activation
        outputs = F.softmax(outputs, dim=1)  # [B, T]

        if not self.return_scores:
            # Weighted sum
            outputs = torch.matmul(outputs.unsqueeze(1),
                                   keys).squeeze()  # [B, H]

        return outputs
Exemple #23
0
    def forward(self, frames, seglens, x, node_mask):
        """
        frames [B, seg, vdim] segfeats
        seglens [B]
        x [B, len, wdim] wordfeats
        node_mask [B, len] wordmasks
        """
        frames_len = frames.shape[1]
        #attentive
        x1_att, x2_att, _, _ = self.atten(frames, x, node_mask)
        x1_m, x2_m = x1_att, x2_att#self.message_v(x1_att), self.message_s(x2_att)
        frames1 = self.update_v(x1_m, frames)
        x1 = self.update_s(x2_m, x)

        x1_m, _, a1, _ = self.intra_v(frames1, frames1, node_mask)
        x2_m, _, a2, _ = self.intra_s(x1, x1, node_mask)
        frames1 = self.update_v_intra(x1_m, frames1)
        x1 = self.update_s_intra(x2_m, x1)
        
        """
        Below is what exactly appeared in CSMGAN's offical code
        """
        #layer 2
        #x1_att, x2_att, a1, a2 = self.atten(frames1, x1, node_mask)
        #x1_m, x2_m = x1_att, x2_att#self.message_v(x1_att), self.message_s(x2_att)
        #frames1 = self.update_v(x1_m, frames1)
        #x1 = self.update_s(x2_m, x1)
        #x1_m, _, a1, _ = self.intra_v(frames1, frames1, node_mask)
        #x2_m, _, a2, _ = self.intra_s(x1, x1, node_mask)
        #frames1 = self.update_v_intra(x1_m, frames1)
        #x1 = self.update_s_intra(x2_m, x1)
        
        #frames1, x1 = frames, x
        #a1, a2 = 1, 1
        # interactive
        x1 = self.v2s(frames1, x1, node_mask)
        x = torch.cat([frames1, x1], -1) #x1
        x = self.rnn(x, seglens, frames_len)
        x = F.dropout(x, self.dropout, self.training)
        
        return x
Exemple #24
0
def model_training(model,
                   device,
                   train_dataloader,
                   optimizer,
                   train_acc,
                   train_losses,
                   l1_loss=False):

    model.train()
    pbar = tqdm(train_dataloader)
    correct = 0
    processed = 0

    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        y_pred = model(data)
        loss = F.nll_loss(y_pred, target)

        # IF L1 Loss
        if l1_loss:
            lambda_l1 = 0.0001
            l1 = 0
            for p in model.parameters():
                l1 = l1 + p.abs().sum()
                loss = loss + lambda_l1 * l1

        train_losses.append(loss)
        loss.backward()
        optimizer.step()

        pred = y_pred.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()
        processed += len(data)

        pbar.set_description(
            desc=
            f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}'
        )
        train_acc.append(100 * correct / processed)
        torch.save(model.state_dict(), model_dir)
Exemple #25
0
def test(network, testloader, writer, epoch, i):
    network.eval()
    test_loss = 0
    correct = 0
    test_losses = []
    with torch.no_grad():
        for data, target in testloader:
            data = data.to(network.device).cuda(network.device)
            target = target.to(network.device).cuda(network.device)
            output = network(data)
            test_loss += F.nll_loss(output, target, size_average=False).item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()

        test_loss /= len(testloader.dataset)
        writer.add_scalar('Test loss', test_loss, epoch * len(testloader) + i)
        test_losses.append(test_loss)
        print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.
              format(test_loss, correct, len(testloader.dataset),
                     100. * correct / len(testloader.dataset)))
    return
 def compose(self, word1, word2):
     """
     This functions composes two input representations with the transformation weighting model. If set to True,
     the composed representation is normalized
     :param word1: the representation of the first word (torch tensor)
     :param word2: the representation of the second word (torch tensor)
     :param training: True if the model should be trained, False if the model is in inference
     :return: the composed vector representation, eventually normalized to unit norm
     """
     composed_phrase = transweigh(
         word1=word1,
         word2=word2,
         transformation_tensor=self.transformation_tensor,
         transformation_bias=self.transformation_bias,
         combining_bias=self.combining_bias,
         combining_tensor=self.combining_tensor,
         dropout_rate=self.dropout_rate,
         training=self.training)
     if self.normalize_embeddings:
         composed_phrase = F.normalize(composed_phrase, p=2, dim=1)
     return composed_phrase
    def compute_critic_loss(self, trajectory):
        states = [transition[0] for transition in trajectory]
        rewards = [transition[2] for transition in trajectory]
        dones = [transition[3] for transition in trajectory]

        discounted_rewards = []
        cumulative_reward = 0
        for step in reversed(range(len(rewards))):
            cumulative_reward = rewards[step] + self.gamma * cumulative_reward * (1 - int(dones[step]))
            discounted_rewards.insert(0, cumulative_reward)
        discounted_rewards = torch.FloatTensor(discounted_rewards).to(self.device)

        target_values = torch.FloatTensor(rewards).view(-1, 1).to(self.device) + discounted_rewards.view(-1, 1)

        states = torch.cat(states).to(self.device)
        _, actual_values = self.model.forward(states)

        critic_loss = F.l1_loss(actual_values, target_values.view(-1, 1))
        advantage = target_values - actual_values

        return critic_loss, advantage.detach()
Exemple #28
0
    def train(self, states, actions, qtargets):
        with autograd.detect_anomaly():
            # transform to torch tensors
            states = torch.from_numpy(states).float().to(self._device)
            actions = torch.from_numpy(actions).float().to(self._device)
            qtargets = torch.from_numpy(qtargets).float().to(self._device)

            # compute q-values for Q(s,a), where s,a come from the given ...
            # states and actions batches passed along the q-targets
            _qvalues = self._backbone([states, actions])

            # compute loss for the critic
            self._optimizer.zero_grad()
            _lossCritic = F.mse_loss(_qvalues, qtargets)
            _lossCritic.backward()
            if self._backbone.config.clipGradients:
                nn.utils.clip_grad_norm(
                    self._backbone.parameters(),
                    self._backbone.config.gradientsClipNorm)
            # take a step with the optimizer
            self._optimizer.step()
Exemple #29
0
    def evaluate(self, sample, model_out):
        depth_pred = model_out[1][0][1]
        label = sample[1]
        loss_fn = l2_loss if self.use_l2 else l1_loss

        # Resize ground-truth appropriately
        out_res = depth_pred.shape[2]
        label = F.interpolate(label.float(), size=out_res)[:, 2]

        # Remap 0 to max dist
        label[label == 0] = label.max()

        loss, d_thr, d_strict, rmse = depth_loss_and_accuracy(
            depth_pred[:, 0].float(), label / 25, loss_fn=loss_fn)

        return loss, {
            'accuracy': d_thr,
            'depth_loss': loss,
            'rmse': rmse,
            'd_strict': d_strict
        }
Exemple #30
0
 def __call__(self, scores2d, ious2d):
     #  clamp()的参数
     # input (Tensor) – 输入张量
     # min (Number) – 限制范围下限
     # max (Number) – 限制范围上限
     # out (Tensor, optional) – 输出张量
     # 下面语句的作用也就是将iou之外的置为0/1
     ious2d = self.scale(ious2d).clamp(0, 1)
     # binary_cross_entropy_with_logits
     # 接受任意形状的输入,target要求与输入形状一致。切记:target的值必须在[0,N-1]之间,
     # 其中N为类别数,否则会出现莫名其妙的错误,比如loss为负数。
     # 计算其实就是交叉熵,不过输入不要求在0,1之间,该函数会自动添加sigmoid运算
     # 默认的reduction方式为mean
     return F.binary_cross_entropy_with_logits(
         # mask_select会将满足mask(掩码、遮罩等等,随便翻译)的指示,将满足条件的点选出来
         # 根据掩码张量mask中的二元值,取输入张量中的指定项( mask为一个 ByteTensor),将取值返回到一个新的1D张量,
         # 张量 mask须跟input张量有相同数量的元素数目,但形状或维度不需要相同。
         # 注意: 返回的张量不与原始张量共享内存空间。
         # !!!! 输出的为一维向量
         scores2d.masked_select(self.mask2d),
         ious2d.masked_select(self.mask2d))