Beispiel #1
0
 def gradient_default(self, X, Y):
     N = X.size()[1]
     W_ext = unsqueeze(self.forward_model.W, 0).expand(N, -1, -1)
     w0_ext = unsqueeze(self.forward_model.w0, 0).expand(N, -1, -1)
     X_ext = transpose(unsqueeze(X, 0), 0, 2)
     Y_ext = transpose(unsqueeze(Y, 0), 0, 2)
     cuda.synchronize()
     return (
         torch_sum(bmm(
             bmm(W_ext, X_ext) + w0_ext - Y_ext, transpose(X_ext, 1, 2)),
                   dim=0) * 2 / N,  # W gradient
         unsqueeze(torch_sum(self.forward_model(X) - Y, dim=1) * 2 / N,
                   1)  # w0 gradient
     )
Beispiel #2
0
 def lossMAE(self, v, t):
   """
   calculate the loss for MAE
   :param v:
   :param t:
   :return:
   """
   return torch_sum(torch_abs(v-t))
Beispiel #3
0
    def forward(self, input, target, mask):
        #
        # Why would dim be 3? and why reduce to 2?
        if target.ndim == 3:
            target = target.reshape(-1, target.shape[2])
            mask = mask.reshape(-1, mask.shape[2])
        #
        # Truncate to the same size
        target = target[:, :input.size(1)]
        mask = mask[:, :input.size(1)]

        output = -input.gather(2, target.unsqueeze(2)).squeeze(2) * mask
        #
        # # Average over each token
        output = torch_sum(output) / torch_sum(mask)

        return output
Beispiel #4
0
 def lossMSE(self, v, t):
   """
   calculate the loss for MSE
   :param v:
   :param t:
   :return:
   """
   return torch_sum((v-t).pow(2))
 def update(self, state: np.array, reward_baseline: Tensor,
            action: np.array):
     state_tensor = FloatTensor(state).to(device=self.device)
     action_tensor = FloatTensor(np.array(
         action, dtype=np.float32)).to(device=self.device)
     """ Update logic from the Policy Gradient theorem. """
     action_probabilities = self.model(state_tensor)
     action_distribution = Categorical(action_probabilities)
     selected_log_probabilities = action_distribution.log_prob(
         action_tensor)
     loss = torch_sum(-selected_log_probabilities * reward_baseline)
     self.optimizer.zero_grad()
     loss.backward()
     self.optimizer.step()
     if self.device == "cuda":
         return loss.detach().cpu().numpy()
     else:
         return loss.detach().numpy()
 def torch_sum2(x, y):
     return torch_sum(torch_sum(x, y[1]), y[0])
 def torch_sum1(x, y):
     return torch_sum(x, y[0])
Beispiel #8
0
    def forward(self, input, seq, data_gts):
        """
        Input is either logits or log softmax
        """
        out = {}

        batch_size = input.size(0)  # batch_size = sample_size * seq_per_img
        seq_per_img = batch_size // len(data_gts)

        assert seq_per_img == self.opt.train_sample_n, seq_per_img

        mask = (seq > 0).float()
        mask = torch_cat([mask.new_full((mask.size(0), 1), 1), mask[:, :-1]],
                         1)

        scores = get_scores(data_gts, seq, self.opt)
        scores = from_numpy(scores).type_as(input).view(-1, seq_per_img)
        out["reward"] = scores  # .mean()
        if self.opt.entropy_reward_weight > 0:
            entropy = (-(F.softmax(input, dim=2) *
                         F.log_softmax(input, dim=2)).sum(2).data)
            entropy = (entropy * mask).sum(1) / mask.sum(1)
            print("entropy", entropy.mean().item())
            scores = scores + self.opt.entropy_reward_weight * entropy.view(
                -1, seq_per_img)
        # rescale cost to [0,1]
        costs = -scores
        if self.loss_type == "risk" or self.loss_type == "softmax_margin":
            costs = costs - costs.min(1, keepdim=True)[0]
            costs = costs / costs.max(1, keepdim=True)[0]
        # in principle
        # Only risk need such rescale
        # margin should be alright; Let's try.

        # Gather input: BxTxD -> BxT
        input = input.gather(2, seq.unsqueeze(2)).squeeze(2)

        if self.loss_type == "seqnll":
            # input is logsoftmax
            input = input * mask
            input = input.sum(1) / mask.sum(1)
            input = input.view(-1, seq_per_img)

            target = costs.min(1)[1]
            output = F.cross_entropy(input, target)
        elif self.loss_type == "risk":
            # input is logsoftmax
            input = input * mask
            input = input.sum(1)
            input = input.view(-1, seq_per_img)

            output = (F.softmax(input.exp()) * costs).sum(1).mean()

            # test
            # avg_scores = input
            # probs = F.softmax(avg_scores.exp_())
            # loss = (probs * costs.type_as(probs)).sum() / input.size(0)
            # print(output.item(), loss.item())

        elif self.loss_type == "max_margin":
            # input is logits
            input = input * mask
            input = input.sum(1) / mask.sum(1)
            input = input.view(-1, seq_per_img)
            _, __ = costs.min(1, keepdim=True)
            costs_star = _
            input_star = input.gather(1, __)
            output = F.relu(costs - costs_star - input_star +
                            input).max(1)[0] / 2
            output = output.mean()

            # sanity test
            # avg_scores = input + costs
            # scores_with_high_target = avg_scores.clone()
            # scores_with_high_target.scatter_(1, costs.min(1)[1].view(-1, 1), 1e10)

            # target_and_offender_index = scores_with_high_target.sort(1, True)[1][:, 0:2]
            # avg_scores = avg_scores.gather(1, target_and_offender_index)
            # target_index = avg_scores.new_zeros(avg_scores.size(0), dtype=torch.long)
            # loss = F.multi_margin_loss(avg_scores, target_index, size_average=True, margin=0)
            # print(loss.item() * 2, output.item())

        elif self.loss_type == "multi_margin":
            # input is logits
            input = input * mask
            input = input.sum(1) / mask.sum(1)
            input = input.view(-1, seq_per_img)
            _, __ = costs.min(1, keepdim=True)
            costs_star = _
            input_star = input.gather(1, __)
            output = F.relu(costs - costs_star - input_star + input)
            output = output.mean()

            # sanity test
            # avg_scores = input + costs
            # loss = F.multi_margin_loss(avg_scores, costs.min(1)[1], margin=0)
            # print(output, loss)

        elif self.loss_type == "softmax_margin":
            # input is logsoftmax
            input = input * mask
            input = input.sum(1) / mask.sum(1)
            input = input.view(-1, seq_per_img)

            input = input + costs
            target = costs.min(1)[1]
            output = F.cross_entropy(input, target)

        elif self.loss_type == "real_softmax_margin":
            # input is logits
            # This is what originally defined in Kevin's paper
            # The result should be equivalent to softmax_margin
            input = input * mask
            input = input.sum(1) / mask.sum(1)
            input = input.view(-1, seq_per_img)

            input = input + costs
            target = costs.min(1)[1]
            output = F.cross_entropy(input, target)

        elif self.loss_type == "new_self_critical":
            """
            A different self critical
            Self critical uses greedy decoding score as baseline;
            This setting uses the average score of the rest samples as baseline
            (suppose c1...cn n samples, reward1 = score1 - 1/(n-1)(score2+..+scoren) )
            """
            baseline = (scores.sum(1, keepdim=True) -
                        scores) / (scores.shape[1] - 1)
            scores = scores - baseline
            # self cider used as reward to promote diversity (not working that much in this way)
            if getattr(self.opt, "self_cider_reward_weight", 0) > 0:
                _scores = get_self_cider_scores(data_gts, seq, self.opt)
                _scores = from_numpy(_scores).type_as(scores).view(-1, 1)
                _scores = _scores.expand_as(scores - 1)
                scores += self.opt.self_cider_reward_weight * _scores
            output = -input * mask * scores.view(-1, 1)
            output = torch_sum(output) / torch_sum(mask)

        out["loss"] = output
        return out
    def test_gradients_and_parameter_updates(self):
        """
        Test that all parameters undergo loss gradient computation with
        respect to them and are subsequently updated.
        """
        # switching to training mode so that all parameters can undergo
        # backpropagation:
        self.layer.train()

        # defining an optimizer for updating all parameters of the layer -
        # learning rate is exaggerated to have meaningful updates for all
        # parameters even where their gradient is very weak:
        learning_rate = 1e12
        optimizer = SGD(self.layer.parameters(), lr=learning_rate)

        # making sure there is no gradient computation cumulated for any
        # parameter making each parameter's gradient is not defined yet:
        optimizer.zero_grad(set_to_none=True)

        # taking an initial snapshot of all parameters before any
        # backpropagation pass:
        initial_parameter_dict = {
            name: deepcopy(parameter_vector)
            for name, parameter_vector in self.layer.named_parameters()
        }

        # computing the layer outputs after a forward propagation pass:
        outputs = self.layer(**self.forward_propagation_kwargs)

        # computing an hypothetical loss - averaging outputs for convenience:
        loss = outputs.mean()

        # computing loss gradients with respect to all layer parameters that
        # require gradient computation:
        loss.backward()

        # asserting that every parameter that requires gradient computation
        # has undergone loss gradient computation:

        subtest_base_name = "gradients"
        # for every parameter vector:
        for name, parameter_vector in self.layer.named_parameters():
            subtest_name = subtest_base_name + ' - ' + name
            with self.subTest(subtest_name):
                # only parameters that require gradient computation are
                # considered:
                if parameter_vector.requires_grad:
                    gradients = parameter_vector.grad
                    self.assertIsNotNone(gradients)
                    # asserting that at least a single parameter gradient in
                    # the vector of parameters is different from zero:
                    self.assertNotEqual(0., torch_sum(torch_abs(gradients)))

        # updating all layer parameters based on their gradients:
        optimizer.step()

        # asserting that every parameter has been updated:

        subtest_base_name = "parameter updates"
        # for every parameter vector:
        for name, updated_parameter_vector in self.layer.named_parameters():
            subtest_name = subtest_base_name + ' - ' + name
            with self.subTest(subtest_name):
                # only parameters that require gradient computation. i.e.
                # adjustment, are considered:
                if updated_parameter_vector.requires_grad:
                    self.assertFalse(
                        torch_equal(
                            initial_parameter_dict[name],  # initial values
                            updated_parameter_vector  # updated values
                        ))
Beispiel #10
0
def inner_product(xs, ys):
    return sum([torch_sum(x * y) for x, y in zip(xs, ys)])
Beispiel #11
0
    def _ssd_discrete_metrics(self, predictions, targets, is_cuda=False, *unused_args, **unused_kwargs):
        def __to_cuda(obj):
            if is_cuda:
                obj = obj.cuda()
            return obj

        predicted_boxes = predictions['boxes']
        predicted_labels = predictions['labels']
        predicted_class_scores = predictions['scores']

        target_boxes = targets['boxes']
        target_labels = targets['labels']

        assert len(predicted_boxes) == len(predicted_labels) == len(predicted_class_scores) == len(
            target_boxes) == len(target_labels)

        target_images = list()
        for i in range(len(target_labels)):
            target_images.extend([i] * target_labels[i].size(0))
        target_images = __to_cuda(LongTensor(target_images))
        target_boxes = torch_cat(target_boxes, dim=0)
        target_labels = torch_cat(target_labels, dim=0)

        assert target_images.size(0) == target_boxes.size(0) == target_labels.size(0)

        predicted_images = list()
        for i in range(len(predicted_labels)):
            predicted_images.extend([i] * predicted_labels[i].size(0))
        predicted_images = __to_cuda(LongTensor(predicted_images))
        predicted_boxes = torch_cat(predicted_boxes, dim=0)
        predicted_labels = torch_cat(predicted_labels, dim=0)
        predicted_class_scores = torch_cat(predicted_class_scores, dim=0)

        assert predicted_images.size(0) == predicted_boxes.size(0) == predicted_labels.size(
            0) == predicted_class_scores.size(0)

        average_precisions = torch_zeros(self.num_classes, dtype=torch_float)
        recalls = torch_zeros(self.num_classes, dtype=torch_float)
        precisions = torch_zeros(self.num_classes, dtype=torch_float)
        for c in range(self.num_classes):
            target_class_images = target_images[target_labels == c]
            target_class_boxes = target_boxes[target_labels == c]

            total_objects = target_class_boxes.size(0)

            target_class_boxes_detected = __to_cuda(torch_zeros(total_objects, dtype=torch_uint8))

            class_c_predicted_images = predicted_images[predicted_labels == c]
            class_c_predicted_boxes = predicted_boxes[predicted_labels == c]
            class_c_predicted_class_scores = predicted_class_scores[predicted_labels == c]
            class_c_num_detections = class_c_predicted_boxes.size(0)
            if class_c_num_detections == 0:
                continue

            class_c_predicted_class_scores, sort_ind = torch_sort(class_c_predicted_class_scores, dim=0,
                                                                  descending=True)
            class_c_predicted_images = class_c_predicted_images[sort_ind]
            class_c_predicted_boxes = class_c_predicted_boxes[sort_ind]

            true_positives = __to_cuda(torch_zeros(class_c_num_detections, dtype=torch_float))
            false_positives = __to_cuda(torch_zeros(class_c_num_detections, dtype=torch_float))
            for d in range(class_c_num_detections):
                this_detection_box = shapely_box(*class_c_predicted_boxes[d].data)
                this_image = class_c_predicted_images[d]

                object_boxes = target_class_boxes[target_class_images == this_image]
                if object_boxes.size(0) == 0:
                    false_positives[d] = 1
                    continue

                ground_truth_contains_prediction_center = [
                    shapely_box(*box.data).contains(this_detection_box.centroid) for box in object_boxes]
                for ind, prediction_center_in_ground_truth in enumerate(ground_truth_contains_prediction_center):
                    original_ind = LongTensor(range(target_class_boxes.size(0)))[target_class_images == this_image][ind]

                    if prediction_center_in_ground_truth:
                        if target_class_boxes_detected[original_ind] == 0:
                            true_positives[d] = 1
                            target_class_boxes_detected[original_ind] = 1
                        else:
                            false_positives[d] = 1
                    else:
                        false_positives[d] = 1

            cumul_true_positives = torch_cumsum(true_positives, dim=0)
            cumul_false_positives = torch_cumsum(false_positives, dim=0)
            cumul_precision = cumul_true_positives / (cumul_true_positives + cumul_false_positives + 1e-10)
            cumul_recall = cumul_true_positives / total_objects

            recall_thresholds = [x / 10 for x in range(11)]
            interpolated_precisions = __to_cuda(torch_zeros((len(recall_thresholds)), dtype=torch_float))
            for i, threshold in enumerate(recall_thresholds):
                recalls_above_threshold = cumul_recall >= threshold
                if recalls_above_threshold.any():
                    interpolated_precisions[i] = cumul_precision[recalls_above_threshold].max()
                else:
                    interpolated_precisions[i] = 0.
            average_precisions[c] = interpolated_precisions.mean()

            total_true_positives = torch_sum(true_positives)
            recalls[c] = total_true_positives / max(float(total_objects), 1e-10)
            precisions[c] = total_true_positives / max(
                total_true_positives + torch_sum(false_positives), torch_tensor(1e-10))
        return average_precisions.tolist(), recalls.tolist(), precisions.tolist()
Beispiel #12
0
    def _ssd_discrete_metrics(self, predictions, targets, iou_threshold=0.5, is_cuda=False):
        def __to_cuda(obj):
            if is_cuda:
                obj = obj.cuda()
            return obj

        predicted_boxes = predictions['boxes']
        predicted_labels = predictions['labels']
        predicted_class_scores = predictions['scores']

        target_boxes = targets['boxes']
        target_labels = targets['labels']

        assert len(predicted_boxes) == len(predicted_labels) == len(predicted_class_scores) == len(
            target_boxes) == len(target_labels)

        target_images = list()
        for i in range(len(target_labels)):
            target_images.extend([i] * target_labels[i].size(0))
        target_images = __to_cuda(LongTensor(target_images))
        target_boxes = torch_cat(target_boxes, dim=0)
        target_labels = torch_cat(target_labels, dim=0)

        assert target_images.size(0) == target_boxes.size(0) == target_labels.size(0)

        predicted_images = list()
        for i in range(len(predicted_labels)):
            predicted_images.extend([i] * predicted_labels[i].size(0))
        predicted_images = __to_cuda(LongTensor(predicted_images))
        predicted_boxes = torch_cat(predicted_boxes, dim=0)
        predicted_labels = torch_cat(predicted_labels, dim=0)
        predicted_class_scores = torch_cat(predicted_class_scores, dim=0)

        assert predicted_images.size(0) == predicted_boxes.size(0) == predicted_labels.size(
            0) == predicted_class_scores.size(0)

        average_precisions = torch_zeros(self.num_classes, dtype=torch_float)
        recalls = torch_zeros(self.num_classes, dtype=torch_float)
        precisions = torch_zeros(self.num_classes, dtype=torch_float)
        for c in range(self.num_classes):
            target_class_images = target_images[target_labels == c]
            target_class_boxes = target_boxes[target_labels == c]

            total_objects = target_class_boxes.size(0)

            target_class_boxes_detected = __to_cuda(torch_zeros(total_objects, dtype=torch_uint8))

            class_c_predicted_images = predicted_images[predicted_labels == c]
            class_c_predicted_boxes = predicted_boxes[predicted_labels == c]
            class_c_predicted_class_scores = predicted_class_scores[predicted_labels == c]
            class_c_num_detections = class_c_predicted_boxes.size(0)
            if class_c_num_detections == 0:
                continue

            class_c_predicted_class_scores, sort_ind = torch_sort(class_c_predicted_class_scores, dim=0,
                                                                  descending=True)
            class_c_predicted_images = class_c_predicted_images[sort_ind]
            class_c_predicted_boxes = class_c_predicted_boxes[sort_ind]

            true_positives = __to_cuda(torch_zeros(class_c_num_detections, dtype=torch_float))
            false_positives = __to_cuda(torch_zeros(class_c_num_detections, dtype=torch_float))
            for d in range(class_c_num_detections):
                this_detection_box = class_c_predicted_boxes[d].unsqueeze(0)
                this_image = class_c_predicted_images[d]

                object_boxes = target_class_boxes[target_class_images == this_image]
                if object_boxes.size(0) == 0:
                    false_positives[d] = 1
                    continue

                overlaps = find_jaccard_overlap(this_detection_box, object_boxes)
                max_overlap, ind = torch_max(overlaps.squeeze(0), dim=0)

                original_ind = LongTensor(range(target_class_boxes.size(0)))[target_class_images == this_image][ind]

                if max_overlap.item() > iou_threshold:
                    if target_class_boxes_detected[original_ind] == 0:
                        true_positives[d] = 1
                        target_class_boxes_detected[original_ind] = 1
                    else:
                        false_positives[d] = 1
                else:
                    false_positives[d] = 1

            cumul_true_positives = torch_cumsum(true_positives, dim=0)
            cumul_false_positives = torch_cumsum(false_positives, dim=0)
            cumul_precision = cumul_true_positives / (cumul_true_positives + cumul_false_positives + 1e-10)
            cumul_recall = cumul_true_positives / total_objects

            recall_thresholds = [x / 10 for x in range(11)]
            interpolated_precisions = __to_cuda(torch_zeros((len(recall_thresholds)), dtype=torch_float))
            for i, threshold in enumerate(recall_thresholds):
                recalls_above_threshold = cumul_recall >= threshold
                if recalls_above_threshold.any():
                    interpolated_precisions[i] = cumul_precision[recalls_above_threshold].max()
                else:
                    interpolated_precisions[i] = 0.
            average_precisions[c] = interpolated_precisions.mean()

            total_true_positives = torch_sum(true_positives)
            recalls[c] = total_true_positives / max(float(total_objects), 1e-10)
            precisions[c] = total_true_positives / max(
                total_true_positives + torch_sum(false_positives), torch_tensor(1e-10))
        return average_precisions.tolist(), recalls.tolist(), precisions.tolist()