Beispiel #1
0
def get_sequence_from_user(max_sequence_length: int) -> Tuple[Tensor, Tensor]:
    """
    Ask the user to enter a sequence of token ids and convert it to source
    token tensor and source mask tensor for feeding the model.
    """
    enter_message = (
        "\nEnter the desired source sequence token ids separated by spaces: ")

    # asking for user input and splitting it into a sequence of token ids:
    src_seq = list(map(int, input(enter_message).split()))
    n_tokens = len(src_seq)

    if n_tokens > max_sequence_length:
        # truncating the sequence if its length is higher than allowed:
        n_tokens = max_sequence_length
        src_seq = src_seq[:max_sequence_length]

    # padding the sequence if its length is lower than the maximum one and
    # converting it to the right format:
    src_seq = torch_cat(
        (
            tensor(src_seq, dtype=torch_long),  # noqa: E501 pylint: disable=not-callable
            torch_zeros((max_sequence_length - n_tokens), dtype=torch_long)),
        dim=-1)
    src_seq = torch_unsqueeze(input=src_seq, dim=0)

    # creating the sequence mask based on the padding done:
    src_seq_mask = torch_cat(
        (torch_ones((1, 1, n_tokens), dtype=torch_long),
         torch_zeros(
             (1, 1, max_sequence_length - n_tokens), dtype=torch_long)),
        dim=-1)

    return src_seq, src_seq_mask
Beispiel #2
0
    def forward(self, predicted_locations, predicted_scores, boxes, labels):
        batch_size = predicted_locations.size(0)
        n_priors = self.priors_cxcy.size(0)
        n_classes = predicted_scores.size(2)

        assert n_priors == predicted_locations.size(
            1) == predicted_scores.size(1)

        ground_truth_locations = torch_zeros(
            (batch_size, n_priors, 4),
            dtype=torch_float).to(self._get_device())
        ground_truth_classes = torch_zeros(
            (batch_size, n_priors), dtype=torch_long).to(self._get_device())

        for i in range(batch_size):
            n_objects = boxes[i].size(0)

            overlap = find_jaccard_overlap(boxes[i], self.priors_xy)
            overlap_for_each_prior, object_for_each_prior = overlap.max(dim=0)

            _, prior_for_each_object = overlap.max(dim=1)
            object_for_each_prior[prior_for_each_object] = LongTensor(
                range(n_objects)).to(self._get_device())
            overlap_for_each_prior[prior_for_each_object] = 1.

            label_for_each_prior = labels[i][object_for_each_prior]
            label_for_each_prior[overlap_for_each_prior < self.threshold] = 120

            ground_truth_classes[i] = label_for_each_prior
            ground_truth_locations[i] = cxcy_to_gcxgcy(
                xy_to_cxcy(boxes[i][object_for_each_prior]), self.priors_cxcy)

        positive_priors = ground_truth_classes != 120

        localization_loss = self.smooth_l1(
            predicted_locations[positive_priors],
            ground_truth_locations[positive_priors])

        n_positives = positive_priors.sum(dim=1)
        n_hard_negatives = self.neg_pos_ratio * n_positives

        conf_loss_all = self.cross_entropy(
            predicted_scores.view(-1, n_classes),
            ground_truth_classes.view(-1)).view(batch_size, n_priors)
        conf_loss_pos = conf_loss_all[positive_priors]

        conf_loss_neg = conf_loss_all.clone()
        conf_loss_neg[positive_priors] = 0.
        conf_loss_neg, _ = conf_loss_neg.sort(dim=1, descending=True)
        hardness_ranks = LongTensor(
            range(n_priors)).unsqueeze(0).expand_as(conf_loss_neg).to(
                self._get_device())
        hard_negatives = hardness_ranks < n_hard_negatives.unsqueeze(1)
        conf_loss_hard_neg = conf_loss_neg[hard_negatives]

        confidence_loss = (conf_loss_hard_neg.sum() +
                           conf_loss_pos.sum()) / n_positives.sum().float()

        return confidence_loss + self.alpha * localization_loss
Beispiel #3
0
    def _ssd_continuous_metrics(self, predictions, targets, is_cuda=False):
        def __to_cuda(obj):
            if is_cuda:
                obj = obj.cuda()
            return obj

        predicted_boxes = predictions['boxes']
        target_boxes = targets['boxes']

        assert len(predicted_boxes) == len(target_boxes)
        total_images = len(target_boxes)

        image_ground_truths = LongTensor([target.size(0) for target in target_boxes])
        image_predictions = LongTensor([prediction.size(0) for prediction in predicted_boxes])
        continuous_recalls = torch_zeros(total_images, dtype=torch_float)
        continuous_precisions = torch_zeros(total_images, dtype=torch_float)
        image_dimensions = __to_cuda(LongTensor([300, 300, 300, 300]))
        for image_index in range(total_images):
            if len(target_boxes[image_index]) == 0:
                continue
            image_predicted_boxes = (predicted_boxes[image_index] * image_dimensions).tolist()
            image_target_boxes = (target_boxes[image_index] * image_dimensions).tolist()

            image_predicted_boxes = [shapely_box(*box) for box in image_predicted_boxes]
            image_target_boxes = [shapely_box(*box) for box in image_target_boxes]

            total_predictions = len(image_predicted_boxes)
            total_targets = len(image_target_boxes)
            if total_predictions == 0 or total_targets == 0:
                continue

            ground_truth_union = image_target_boxes[0]
            for image_target_box in image_target_boxes[1:]:
                ground_truth_union = ground_truth_union.union(image_target_box)

            prediction_union = image_predicted_boxes[0]
            for image_predicted_box in image_predicted_boxes[1:]:
                prediction_union = prediction_union.union(image_predicted_box)

            prediction_ground_truth_intersection = prediction_union.intersection(ground_truth_union)

            ground_truth_union = ground_truth_union.area
            prediction_union = prediction_union.area
            prediction_ground_truth_intersection = prediction_ground_truth_intersection.area

            continuous_recalls[image_index] = torch_tensor(
                prediction_ground_truth_intersection / max(ground_truth_union, 1e-10))
            continuous_precisions[image_index] = torch_tensor(
                prediction_ground_truth_intersection / max(prediction_union, 1e-10))

        overall_recall = (image_ground_truths * continuous_recalls).sum() / max(
            image_ground_truths.sum(), torch_tensor(1e-10))
        overall_precision = (image_predictions * continuous_precisions).sum() / max(
            image_predictions.sum(), torch_tensor(1e-10))

        return overall_recall.item(), overall_precision.item(), continuous_recalls.tolist(), \
               continuous_precisions.tolist()
Beispiel #4
0
    def _yolo_continuous_metrics(self, predictions, targets):
        predicted_boxes = predictions['values']
        target_boxes = targets['values']

        predicted_boxes = [Tensor([box[2:] for box in image]) for image in predicted_boxes]
        target_boxes = [Tensor([box[1:] for box in image]) for image in target_boxes]

        assert len(predicted_boxes) == len(target_boxes)
        total_images = len(target_boxes)

        image_ground_truths = LongTensor([target.size(0) for target in target_boxes])
        image_predictions = LongTensor([prediction.size(0) for prediction in predicted_boxes])
        continuous_recalls = torch_zeros(total_images, dtype=torch_float)
        continuous_precisions = torch_zeros(total_images, dtype=torch_float)
        for image_index in range(total_images):
            image_predicted_boxes = (predicted_boxes[image_index]).tolist()
            image_target_boxes = (target_boxes[image_index]).tolist()

            image_predicted_boxes = [shapely_box(*box) for box in image_predicted_boxes]
            image_target_boxes = [shapely_box(*box) for box in image_target_boxes]

            total_predictions = len(image_predicted_boxes)
            total_targets = len(image_target_boxes)
            if total_predictions == 0 or total_targets == 0:
                continue

            ground_truth_union = image_target_boxes[0]
            for image_target_box in image_target_boxes[1:]:
                ground_truth_union = ground_truth_union.union(image_target_box)

            prediction_union = image_predicted_boxes[0]
            for image_predicted_box in image_predicted_boxes[1:]:
                prediction_union = prediction_union.union(image_predicted_box)

            prediction_ground_truth_intersection = prediction_union.intersection(ground_truth_union)

            ground_truth_union = ground_truth_union.area
            prediction_union = prediction_union.area
            prediction_ground_truth_intersection = prediction_ground_truth_intersection.area

            continuous_recalls[image_index] = torch_tensor(
                prediction_ground_truth_intersection / max(ground_truth_union, 1e-10))
            continuous_precisions[image_index] = torch_tensor(
                prediction_ground_truth_intersection / max(prediction_union, 1e-10))

        overall_recall = (image_ground_truths * continuous_recalls).sum() / max(
            image_ground_truths.sum(), torch_tensor(1e-10))
        overall_precision = (image_predictions * continuous_precisions).sum() / max(
            image_predictions.sum(), torch_tensor(1e-10))

        return overall_recall.item(), overall_precision.item(), continuous_recalls.tolist(), \
               continuous_precisions.tolist()
Beispiel #5
0
def r4_dnn_istft(target_dirname,
                 chandat_obj=None,
                 new_stft_object=None,
                 is_saving_chandat_dnn=True):
    LOGGER.info(
        '{}: r4: Doing istft on denoised stft...'.format(target_dirname))
    assert os_path_isdir(target_dirname)
    if chandat_obj is None:
        chandat_obj = loadmat(os_path_join(target_dirname, CHANDAT_FNAME))
    chandat_data = chandat_obj['chandat']
    num_rows, num_elements, num_beams = chandat_data.shape
    beam_position_x = chandat_obj['beam_position_x']
    if 'depth' in chandat_obj:
        depth = chandat_obj['depth']
    else:
        depth = chandat_obj['t'] / chandat_obj['fs'] * chandat_obj['c'] / 2
    f0 = chandat_obj['f0']

    del chandat_obj

    if new_stft_object is None:
        new_stft_object = loadmat(os_path_join(target_dirname, NEW_STFT_FNAME))
    new_stft = torch_stack(
        (torch_from_numpy(new_stft_object['new_stft_real']),
         torch_from_numpy(new_stft_object['new_stft_imag'])),
        axis=-1)
    # new_stft = new_stft_real + 1j*new_stft_imag

    del new_stft_object

    chandat_stft = stft(
        torch_zeros(num_rows, num_elements, num_beams, dtype=torch_float64),
        LEN_EACH_SECTION, FRAC_OVERLAP, PADDING)
    chandat_stft['origSigSize'] = [num_rows, num_elements, num_beams]

    # create new and old stfts
    # chandat_stft_new = copy(chandat_stft)
    # chandat_stft_new['stft'] = new_stft
    chandat_stft['stft'] = new_stft
    chandat_new = istft(chandat_stft)
    # chandat_new = non_iter_ls_inv_stft(chandat_stft)
    # what = istft(chandat_stft, N, window=window, hop_length=2, center=False, onesided=False, normalized=False, pad_mode='constant', length=y)

    chandat_new[-3:-1, :, :] = 0

    chandat_dnn_object = {
        'chandat_dnn': chandat_new,
        'beam_position_x': beam_position_x,
        'depth': depth,
        'f0': f0,
    }

    if is_saving_chandat_dnn is True:
        savemat(os_path_join(target_dirname, CHANDAT_DNN_SAVE_FNAME),
                chandat_dnn_object)

    LOGGER.info('{}: r4: Done'.format(target_dirname))
    return chandat_dnn_object
Beispiel #6
0
    def forward(self, h_enc):
        """The forward pass.

        :param h_enc: The output of the RNN encoder.
        :type h_enc: torch.autograd.variable.Variable
        :return: The output of the RNN dec (h_j_dec)
        :rtype: torch.autograd.variable.Variable
        """
        batch_size = h_enc.size()[0]
        seq_length = h_enc.size()[1]
        h_t_dec = Variable(torch_zeros(batch_size, self._input_dim))
        h_j_dec = Variable(torch_zeros(batch_size, seq_length,
                                       self._input_dim))

        if not self._debug and torch_has_cudnn:
            h_t_dec = h_t_dec.cuda()
            h_j_dec = h_j_dec.cuda()

        for ts in range(seq_length):
            h_t_dec = self.gru_dec(h_enc[:, ts, :], h_t_dec)
            h_j_dec[:, ts, :] = h_t_dec

        return h_j_dec
    def forward(self, v_in):
        """Forward pass.

        :param v_in: The input to the RNN encoder of the Masker.
        :type v_in: numpy.core.multiarray.ndarray
        :return: The output of the RNN encoder of the Masker.
        :rtype: torch.autograd.variable.Variable
        """
        batch_size = v_in.size()[0]
        seq_length = v_in.size()[1]

        h_t_f = Variable(torch_zeros(batch_size, self._input_dim))
        h_t_b = Variable(torch_zeros(batch_size, self._input_dim))
        h_enc = Variable(
            torch_zeros(batch_size, seq_length - (2 * self._context_length),
                        2 * self._input_dim))
        v_tr = v_in[:, :, :self._input_dim]

        if not self._debug and torch_has_cudnn:
            h_t_f = h_t_f.cuda()
            h_t_b = h_t_b.cuda()
            h_enc = h_enc.cuda()

        for t in range(seq_length):
            h_t_f = self.gru_enc_f((v_tr[:, t, :]), h_t_f)
            h_t_b = self.gru_enc_b((v_tr[:, seq_length - t - 1, :]), h_t_b)

            if self._context_length <= t < seq_length - self._context_length:
                h_t = torch_cat([
                    h_t_f + v_tr[:, t, :],
                    h_t_b + v_tr[:, seq_length - t - 1, :]
                ],
                                dim=1)
                h_enc[:, t - self._context_length, :] = h_t

        return h_enc
    def __init__(self, token_representation_dimension: int, dropout_prob:
                 float, max_sequence_length: int) -> None:
        super(PositionalEncoding, self).__init__()
        self.dropout_layer = Dropout(p=dropout_prob)

        # defining positional signals added to embeddings:

        # initialization:
        positional_signals = torch_zeros(
            (max_sequence_length, token_representation_dimension),
            requires_grad=False
        )

        positions = torch_arange(
            start=0,
            end=max_sequence_length,
            requires_grad=False
        ).unsqueeze(dim=1)

        wave_inputs = positions * torch_exp(
            torch_arange(
                start=0, end=token_representation_dimension, step=2
            ) * (-log(10000.0) / token_representation_dimension)
        )  # ✓ see demonstration on my notes ▢

        # interleaving sinusoidal and cosinusoidal components along feature
        # dimension (starting with sine), yielding positional signals for
        # all the allowed positions (for sequences up to the maximum allowed
        # length):
        positional_signals[:, 0::2] = torch_sin(wave_inputs)
        positional_signals[:, 1::2] = torch_cos(wave_inputs)
        positional_signals = positional_signals.unsqueeze(dim=0)

        # parameters not requiring backpropagation (i.e. gradient computation
        # and update):
        self.register_buffer('positional_signals', positional_signals)
Beispiel #9
0
 def __init__(self, feature_dimension: int, epsilon: float = 1e-6) -> None:
     super(LayerNorm, self).__init__()
     self.alpha = Parameter(data=torch_ones((feature_dimension)))
     self.beta = Parameter(data=torch_zeros((feature_dimension)))
     self.epsilon = epsilon
Beispiel #10
0
    def _build_targets(self, predictions, target_data, feature_map_width,
                       feature_map_height):
        batch_size = target_data.size(0)
        number_of_pixels = feature_map_height * feature_map_width
        anchors_over_pixels = self.num_anchors * number_of_pixels

        default_size = (batch_size, self.num_anchors, feature_map_height,
                        feature_map_width)

        _1obj = torch_zeros(*default_size)
        _1noobj = torch_ones(*default_size)

        target_center_x_values = torch_zeros(*default_size)
        target_center_y_values = torch_zeros(*default_size)
        target_width_values = torch_zeros(*default_size)
        target_height_values = torch_zeros(*default_size)
        target_confidence_score_values = torch_zeros(*default_size)
        target_class_values = torch_zeros(*default_size)

        for image_index in range(batch_size):
            start_index = image_index * anchors_over_pixels
            end_index = (image_index + 1) * anchors_over_pixels
            predicted_bounding_boxes = predictions[start_index:end_index].t()
            ious = torch_zeros(anchors_over_pixels)

            for t in range(self.max_object):
                if target_data[image_index][t * 5 + 1] == -1:
                    break

                ground_truth_center_x = target_data[image_index][
                    t * 5 + 1] * feature_map_width
                ground_truth_center_y = target_data[image_index][
                    t * 5 + 2] * feature_map_height
                ground_truth_width = target_data[image_index][
                    t * 5 + 3] * feature_map_width
                ground_truth_height = target_data[image_index][
                    t * 5 + 4] * feature_map_height
                ground_truth_bounding_boxes = FloatTensor([
                    ground_truth_center_x, ground_truth_center_y,
                    ground_truth_width, ground_truth_height
                ])
                ground_truth_bounding_boxes = ground_truth_bounding_boxes.repeat(
                    anchors_over_pixels, 1).t()
                ious = torch_max(
                    ious,
                    intersection_over_union(True,
                                            predicted_bounding_boxes,
                                            ground_truth_bounding_boxes,
                                            is_corner_coordinates=False))
            # https://github.com/marvis/pytorch-yolo2/issues/121#issuecomment-436388664
            _1noobj[image_index][torch_reshape(ious, (
                self.num_anchors, feature_map_height,
                feature_map_width)) > self.ignore_threshold] = 0

        for image_index in range(batch_size):
            for t in range(self.max_object):
                if target_data[image_index][t * 5 + 1] == -1:
                    break

                anchor_index, ground_truth_width, ground_truth_height = self._find_most_matching_anchor(
                    feature_map_width, feature_map_height, image_index, t,
                    target_data)

                ground_truth_center_x_pixel, ground_truth_center_y_pixel, ground_truth_bounding_box = \
                    self._compose_ground_truth_data(feature_map_width, feature_map_height, ground_truth_height,
                                                    ground_truth_width, image_index, t, target_data)

                predicted_bounding_box = predictions[
                    image_index * anchors_over_pixels +
                    anchor_index * number_of_pixels +
                    ground_truth_center_y_pixel * feature_map_width +
                    ground_truth_center_x_pixel]

                iou = intersection_over_union(False,
                                              ground_truth_bounding_box,
                                              predicted_bounding_box,
                                              is_corner_coordinates=False)

                _1obj[image_index][anchor_index][ground_truth_center_y_pixel][
                    ground_truth_center_x_pixel] = 1
                _1noobj[image_index][anchor_index][
                    ground_truth_center_y_pixel][
                        ground_truth_center_x_pixel] = 0

                target_center_x_values, target_center_y_values, target_width_values, target_height_values, \
                target_confidence_score_values, target_class_values = self._set_target_values(
                    feature_map_width, feature_map_height, image_index, t, target_data, anchor_index, iou,
                    ground_truth_center_x_pixel, ground_truth_center_y_pixel, ground_truth_height, ground_truth_width,
                    target_center_x_values, target_center_y_values, target_class_values,
                    target_confidence_score_values, target_height_values, target_width_values)

        return _1obj, _1noobj, target_center_x_values, target_center_y_values, target_width_values, \
               target_height_values, target_confidence_score_values, target_class_values
Beispiel #11
0
    def _calculate_loss(self, predicted, target, _1obj, _1noobj,
                        divide_by_mask, class_loss_reduction,
                        location_confidence_loss_reduction):
        mse_loss = MSELoss(reduction=location_confidence_loss_reduction)

        if divide_by_mask:
            total_objects = _1obj.sum()
            loss_x = self.coord_scale * (
                mse_loss(predicted['x'] * _1obj, target['x'] * _1obj) /
                total_objects) / 2.0
            loss_y = self.coord_scale * (
                mse_loss(predicted['y'] * _1obj, target['y'] * _1obj) /
                total_objects) / 2.0
            loss_w = self.coord_scale * (
                mse_loss(predicted['w'] * _1obj, target['w'] * _1obj) /
                total_objects) / 2.0
            loss_h = self.coord_scale * (
                mse_loss(predicted['h'] * _1obj, target['h'] * _1obj) /
                total_objects) / 2.0
            coordinates_loss = loss_x + loss_y + loss_w + loss_h

            object_loss = self.object_scale * (
                mse_loss(predicted['C'] * _1obj, target['C'] * _1obj) /
                total_objects) / 2.0
            no_object_loss = self.noobject_scale * (
                mse_loss(predicted['C'] * _1noobj, target['C'] * _1noobj) /
                _1noobj.sum()) / 2.0
            confidence_score_loss = object_loss + no_object_loss

            try:
                if self.is_multilabel:
                    class_loss = BCELoss(reduction=class_loss_reduction)(
                        predicted['p(c'],
                        self._to_cuda(torch_zeros(
                            predicted['p(c'].shape)).index_fill_(
                                1, target['p(c'].data.cpu().long(), 1.0))
                else:
                    class_loss = CrossEntropyLoss(
                        reduction=class_loss_reduction)(predicted['p(c)'],
                                                        target['p(c)'])
                class_loss = self.class_scale * class_loss / total_objects
            except:
                class_loss = 0
        else:
            loss_x = self.coord_scale * mse_loss(predicted['x'] * _1obj,
                                                 target['x'] * _1obj) / 2.0
            loss_y = self.coord_scale * mse_loss(predicted['y'] * _1obj,
                                                 target['y'] * _1obj) / 2.0
            loss_w = self.coord_scale * mse_loss(predicted['w'] * _1obj,
                                                 target['w'] * _1obj) / 2.0
            loss_h = self.coord_scale * mse_loss(predicted['h'] * _1obj,
                                                 target['h'] * _1obj) / 2.0
            coordinates_loss = loss_x + loss_y + loss_w + loss_h

            object_loss = self.object_scale * mse_loss(
                predicted['C'] * _1obj, target['C'] * _1obj) / 2.0
            no_object_loss = self.noobject_scale * mse_loss(
                predicted['C'] * _1noobj, target['C'] * _1noobj) / 2.0
            confidence_score_loss = object_loss + no_object_loss

            try:
                if self.is_multilabel:
                    class_loss = BCELoss(reduction=class_loss_reduction)(
                        predicted['p(c'],
                        self._to_cuda(torch_zeros(
                            predicted['p(c'].shape)).index_fill_(
                                1, target['p(c'].data.cpu().long(), 1.0))
                else:
                    class_loss = CrossEntropyLoss(
                        reduction=class_loss_reduction)(predicted['p(c)'],
                                                        target['p(c)'])
                class_loss = self.class_scale * class_loss
            except:
                class_loss = 0

        # Divided by 3 (number of predictors across scales - Large, Medium, Small) to give equivalent weight for each
        # predictor.
        return (coordinates_loss + confidence_score_loss + class_loss) / 3
Beispiel #12
0
    def _ssd_discrete_metrics(self, predictions, targets, is_cuda=False, *unused_args, **unused_kwargs):
        def __to_cuda(obj):
            if is_cuda:
                obj = obj.cuda()
            return obj

        predicted_boxes = predictions['boxes']
        predicted_labels = predictions['labels']
        predicted_class_scores = predictions['scores']

        target_boxes = targets['boxes']
        target_labels = targets['labels']

        assert len(predicted_boxes) == len(predicted_labels) == len(predicted_class_scores) == len(
            target_boxes) == len(target_labels)

        target_images = list()
        for i in range(len(target_labels)):
            target_images.extend([i] * target_labels[i].size(0))
        target_images = __to_cuda(LongTensor(target_images))
        target_boxes = torch_cat(target_boxes, dim=0)
        target_labels = torch_cat(target_labels, dim=0)

        assert target_images.size(0) == target_boxes.size(0) == target_labels.size(0)

        predicted_images = list()
        for i in range(len(predicted_labels)):
            predicted_images.extend([i] * predicted_labels[i].size(0))
        predicted_images = __to_cuda(LongTensor(predicted_images))
        predicted_boxes = torch_cat(predicted_boxes, dim=0)
        predicted_labels = torch_cat(predicted_labels, dim=0)
        predicted_class_scores = torch_cat(predicted_class_scores, dim=0)

        assert predicted_images.size(0) == predicted_boxes.size(0) == predicted_labels.size(
            0) == predicted_class_scores.size(0)

        average_precisions = torch_zeros(self.num_classes, dtype=torch_float)
        recalls = torch_zeros(self.num_classes, dtype=torch_float)
        precisions = torch_zeros(self.num_classes, dtype=torch_float)
        for c in range(self.num_classes):
            target_class_images = target_images[target_labels == c]
            target_class_boxes = target_boxes[target_labels == c]

            total_objects = target_class_boxes.size(0)

            target_class_boxes_detected = __to_cuda(torch_zeros(total_objects, dtype=torch_uint8))

            class_c_predicted_images = predicted_images[predicted_labels == c]
            class_c_predicted_boxes = predicted_boxes[predicted_labels == c]
            class_c_predicted_class_scores = predicted_class_scores[predicted_labels == c]
            class_c_num_detections = class_c_predicted_boxes.size(0)
            if class_c_num_detections == 0:
                continue

            class_c_predicted_class_scores, sort_ind = torch_sort(class_c_predicted_class_scores, dim=0,
                                                                  descending=True)
            class_c_predicted_images = class_c_predicted_images[sort_ind]
            class_c_predicted_boxes = class_c_predicted_boxes[sort_ind]

            true_positives = __to_cuda(torch_zeros(class_c_num_detections, dtype=torch_float))
            false_positives = __to_cuda(torch_zeros(class_c_num_detections, dtype=torch_float))
            for d in range(class_c_num_detections):
                this_detection_box = shapely_box(*class_c_predicted_boxes[d].data)
                this_image = class_c_predicted_images[d]

                object_boxes = target_class_boxes[target_class_images == this_image]
                if object_boxes.size(0) == 0:
                    false_positives[d] = 1
                    continue

                ground_truth_contains_prediction_center = [
                    shapely_box(*box.data).contains(this_detection_box.centroid) for box in object_boxes]
                for ind, prediction_center_in_ground_truth in enumerate(ground_truth_contains_prediction_center):
                    original_ind = LongTensor(range(target_class_boxes.size(0)))[target_class_images == this_image][ind]

                    if prediction_center_in_ground_truth:
                        if target_class_boxes_detected[original_ind] == 0:
                            true_positives[d] = 1
                            target_class_boxes_detected[original_ind] = 1
                        else:
                            false_positives[d] = 1
                    else:
                        false_positives[d] = 1

            cumul_true_positives = torch_cumsum(true_positives, dim=0)
            cumul_false_positives = torch_cumsum(false_positives, dim=0)
            cumul_precision = cumul_true_positives / (cumul_true_positives + cumul_false_positives + 1e-10)
            cumul_recall = cumul_true_positives / total_objects

            recall_thresholds = [x / 10 for x in range(11)]
            interpolated_precisions = __to_cuda(torch_zeros((len(recall_thresholds)), dtype=torch_float))
            for i, threshold in enumerate(recall_thresholds):
                recalls_above_threshold = cumul_recall >= threshold
                if recalls_above_threshold.any():
                    interpolated_precisions[i] = cumul_precision[recalls_above_threshold].max()
                else:
                    interpolated_precisions[i] = 0.
            average_precisions[c] = interpolated_precisions.mean()

            total_true_positives = torch_sum(true_positives)
            recalls[c] = total_true_positives / max(float(total_objects), 1e-10)
            precisions[c] = total_true_positives / max(
                total_true_positives + torch_sum(false_positives), torch_tensor(1e-10))
        return average_precisions.tolist(), recalls.tolist(), precisions.tolist()
Beispiel #13
0
    def _ssd_discrete_metrics(self, predictions, targets, iou_threshold=0.5, is_cuda=False):
        def __to_cuda(obj):
            if is_cuda:
                obj = obj.cuda()
            return obj

        predicted_boxes = predictions['boxes']
        predicted_labels = predictions['labels']
        predicted_class_scores = predictions['scores']

        target_boxes = targets['boxes']
        target_labels = targets['labels']

        assert len(predicted_boxes) == len(predicted_labels) == len(predicted_class_scores) == len(
            target_boxes) == len(target_labels)

        target_images = list()
        for i in range(len(target_labels)):
            target_images.extend([i] * target_labels[i].size(0))
        target_images = __to_cuda(LongTensor(target_images))
        target_boxes = torch_cat(target_boxes, dim=0)
        target_labels = torch_cat(target_labels, dim=0)

        assert target_images.size(0) == target_boxes.size(0) == target_labels.size(0)

        predicted_images = list()
        for i in range(len(predicted_labels)):
            predicted_images.extend([i] * predicted_labels[i].size(0))
        predicted_images = __to_cuda(LongTensor(predicted_images))
        predicted_boxes = torch_cat(predicted_boxes, dim=0)
        predicted_labels = torch_cat(predicted_labels, dim=0)
        predicted_class_scores = torch_cat(predicted_class_scores, dim=0)

        assert predicted_images.size(0) == predicted_boxes.size(0) == predicted_labels.size(
            0) == predicted_class_scores.size(0)

        average_precisions = torch_zeros(self.num_classes, dtype=torch_float)
        recalls = torch_zeros(self.num_classes, dtype=torch_float)
        precisions = torch_zeros(self.num_classes, dtype=torch_float)
        for c in range(self.num_classes):
            target_class_images = target_images[target_labels == c]
            target_class_boxes = target_boxes[target_labels == c]

            total_objects = target_class_boxes.size(0)

            target_class_boxes_detected = __to_cuda(torch_zeros(total_objects, dtype=torch_uint8))

            class_c_predicted_images = predicted_images[predicted_labels == c]
            class_c_predicted_boxes = predicted_boxes[predicted_labels == c]
            class_c_predicted_class_scores = predicted_class_scores[predicted_labels == c]
            class_c_num_detections = class_c_predicted_boxes.size(0)
            if class_c_num_detections == 0:
                continue

            class_c_predicted_class_scores, sort_ind = torch_sort(class_c_predicted_class_scores, dim=0,
                                                                  descending=True)
            class_c_predicted_images = class_c_predicted_images[sort_ind]
            class_c_predicted_boxes = class_c_predicted_boxes[sort_ind]

            true_positives = __to_cuda(torch_zeros(class_c_num_detections, dtype=torch_float))
            false_positives = __to_cuda(torch_zeros(class_c_num_detections, dtype=torch_float))
            for d in range(class_c_num_detections):
                this_detection_box = class_c_predicted_boxes[d].unsqueeze(0)
                this_image = class_c_predicted_images[d]

                object_boxes = target_class_boxes[target_class_images == this_image]
                if object_boxes.size(0) == 0:
                    false_positives[d] = 1
                    continue

                overlaps = find_jaccard_overlap(this_detection_box, object_boxes)
                max_overlap, ind = torch_max(overlaps.squeeze(0), dim=0)

                original_ind = LongTensor(range(target_class_boxes.size(0)))[target_class_images == this_image][ind]

                if max_overlap.item() > iou_threshold:
                    if target_class_boxes_detected[original_ind] == 0:
                        true_positives[d] = 1
                        target_class_boxes_detected[original_ind] = 1
                    else:
                        false_positives[d] = 1
                else:
                    false_positives[d] = 1

            cumul_true_positives = torch_cumsum(true_positives, dim=0)
            cumul_false_positives = torch_cumsum(false_positives, dim=0)
            cumul_precision = cumul_true_positives / (cumul_true_positives + cumul_false_positives + 1e-10)
            cumul_recall = cumul_true_positives / total_objects

            recall_thresholds = [x / 10 for x in range(11)]
            interpolated_precisions = __to_cuda(torch_zeros((len(recall_thresholds)), dtype=torch_float))
            for i, threshold in enumerate(recall_thresholds):
                recalls_above_threshold = cumul_recall >= threshold
                if recalls_above_threshold.any():
                    interpolated_precisions[i] = cumul_precision[recalls_above_threshold].max()
                else:
                    interpolated_precisions[i] = 0.
            average_precisions[c] = interpolated_precisions.mean()

            total_true_positives = torch_sum(true_positives)
            recalls[c] = total_true_positives / max(float(total_objects), 1e-10)
            precisions[c] = total_true_positives / max(
                total_true_positives + torch_sum(false_positives), torch_tensor(1e-10))
        return average_precisions.tolist(), recalls.tolist(), precisions.tolist()
Beispiel #14
0
def multivariate_normal_cdf(lower=None,
                            upper=None,
                            loc=None,
                            covariance_matrix=None,
                            scale_tril=None,
                            method="GenzBretz",
                            nmc=200,
                            maxpts=25000,
                            abseps=0.001,
                            releps=0,
                            error_info=False):
    """Compute rectangle probabilities for a multivariate normal random vector Z  ``P(l_i < Z_i < u_i, i = 1,...,d)``. Probability values can be returned with closed-form backward derivatives.
    

    Parameters
    ----------
    lower : torch.Tensor, optional
        Lower integration limits.  Can have batch shape. The
        last dimension is the dimension of the random vector.
        Default is ``None`` which is understood as minus infinity
        for all components. Values ``- numpy.Inf`` are supported, e.g.
        if only few components have an infinite boundary.
    upper : torch.Tensor, optional
        Upper integration limits. See `lower`.
    loc : torch.Tensor, optional
        Mean of the Gaussian vector. Default is zeros.
    covariance_matrix : torch.Tensor, optional
        Covariance matrix of the Gaussian vector. Must be provided if 
        `scale_tril` is not.
    scale_tril : torch.Tensor, optional
        A lower triangular root of the covariance matrix of the Gaussian 
        vector (e.g. a Cholesky factor). Must be provided if `covariance_matrix`
         is not. The method ``'GenzBretz'``, needs the covariance matrix and it
         will be computed from `scale_tril`.
    method : :obj: str, optional
        Method deployed for the integration. Either ``'MonteCarlo'`` or
        ``'GenzBretz'``.
    nmc : :obj: int, optional
        Number of Monte Carlo samples.
    maxpts :obj: int, optional
        Maximum number of integration points in the Fortran routine.
    abseps :obj: float, optional
        Absolute error tolerance.
    releps :obj: float, optional
        Relative error tolerance.
    error_info :obj: bool, optional
        Should an estimation of the integration error be returned.
        Not compatible with autograd.

    Returns
    -------
    value : torch.Tensor
        The probability of the event ``lower < Y < upper``, with
        ``Y`` a Gaussian vector defined by `loc` and `covariance_matrix`
        (or `scale_tril`). Closed form derivative are implemented if
        `lower`, `upper`,  `loc`,  `covariance_matrix` or 
         `scale_tril` require a gradient.
        
    error : torch.Tensor
        The estimated error for each component of `value`. **Returned only 
        if** `error_info` is ``True``.
    info : torch.Tensor
        Tensor of type ``int32`` informing on the execution for each 
        component.
            - If ``0``, normal completion with ``error < abseps``
            - If ``1``, completion with ``error > abseps`` and (for 
              ``method =  'GenzBretz'``) all maxpts evaluation budget is
              depleted.
            - If ``2``, N > 1000 or N < 1 (only for ``method =  'GenzBretz'``)
            - If ``3``, `covariance_matrix` is not positive semi-definite (only for ``method = 'GenzBretz'``)
        **Returned only if** `error_info` is ``True``.


    Notes
    -------
    Parameters `lower`, `upper` and `covariance_matrix` (or `scale_tril`), as 
    well as the returns `value`, `error` and `info` are broadcasted to their
    common batch shape. See PyTorch' `broadcasting semantics
    <https://pytorch.org/docs/stable/notes/broadcasting.html#broadcasting-semantics>`_.

    If any component of ``lower - upper`` is nonpositive, the function returns a null
    tensor with consistent shape.

    Method ``MonteCarlo`` uses Monte Carlo sampling for estimating `value`, whereas
    ``method = 'GenzBretz'`` call a Fortran routine [1]_.

    If `method` is ``MonteCarlo`` a Cholesky decomposition of the covariance matrix
    will be performed. Else if ``GenzBretz``, only the correlation matrix is 
    computed and passed to the Fortran routine.

    The parameter `maxpts` can be used to limit the time. A suggested calibration
    strategy is to start with 1000 times the integration dimension, and then
    increase it if the returned `error` is too large.

    Partial derivative are computed using non-trivial closed form formula, see e.g. Marmin et al. [2]_, p 13.


    References
    ----------

    .. [1] Alan Genz and Frank Bretz, "Comparison of Methods for the Computation of Multivariate 
       t-Probabilities", Journal of Computational and Graphical Statistics 11, pp. 950-971, 2002. `Source code <http://www.math.wsu.edu/faculty/genz/software/fort77/mvtdstpack.f>`_.

    .. [2] Sébastien Marmin, Clément Chevalier and David Ginsbourger, "Differentiating the multipoint Expected Improvement for optimal batch design", International Workshop on Machine learning, Optimization and big Data, Taormina, Italy, 2015. `PDF <https://hal.archives-ouvertes.fr/hal-01133220v4/document>`_.


    Examples
    --------

    >>> import torch
    >>> from torch.autograd import grad
    >>> n = 4
    >>> x = 1 + torch.randn(n)
    >>> x.requires_grad = True
    >>> # Make a positive semi-definite matrix
    >>> A = torch.randn(n,n)
    >>> C = 1/n*torch.matmul(A,A.t())
    >>> p = mvnorm.multivariate_normal_cdf(upper=x,covariance_matrix=C)
    >>> p
    tensor(0.3721, grad_fn=<MultivariateNormalCDFBackward>)
    >>> grad(p,(x,))
    >>> (tensor([0.0085, 0.2510, 0.1272, 0.0332]),)
    
    """
    if (covariance_matrix is not None) + (scale_tril is not None) != 1:
        raise ValueError(
            "Exactly one of covariance_matrix or scale_tril may be specified.")
    mat = scale_tril if covariance_matrix is None else covariance_matrix
    device, dtype = mat.device, mat.dtype
    d = mat.size(-1)
    if isinstance(lower, (int, float)):
        lower = Tensor.new_full((d, ),
                                float(lower),
                                dtype=dtype,
                                device=device)
    if isinstance(upper, (int, float)):
        upper = Tensor.new_full((d, ),
                                float(upper),
                                dtype=dtype,
                                device=device)
    lnone = lower is None
    unone = upper is None
    if not lnone and lower.max() == -Inf:
        lower = None
        lnone = True
    if not unone and upper.min() == Inf:
        upper = None
        unone = True

    if method == "MonteCarlo":  # Monte Carlo estimation
        if loc is None:
            loc = torch_zeros(d, device=device, dtype=dtype)
        p = MultivariateNormal(loc=loc,
                               scale_tril=scale_tril,
                               covariance_matrix=covariance_matrix)
        r = nmc % 5
        N = nmc if r == 0 else nmc + 5 - r  # rounded to the upper multiple of 5
        Y = p.sample(Size([N]))
        if lnone and unone:
            error = torch_zeros(p.batch_shape, device=device,
                                dtype=dtype) if error_info else -1
            info = torch_zeros(p.batch_shape, device=device,
                               dtype=int32) if error_info else -1
            value = torch_ones(p.batch_shape, device=device, dtype=dtype)

        else:
            if lnone:
                Z = (Y < upper).prod(-1)
            else:
                Z = (Y > lower).prod(-1) if unone else (Y < upper).prod(-1) * (
                    Y > lower).prod(-1)
            if error_info:  # Does NOT slow down significatively
                booleans = Z.view(
                    N // 5, 5, *Z.shape[1:]
                )  # divide in 5 groups to have an idea of the precision
                values = ((booleans.sum(0).type(dtype))) / N * 5
                value = values.mean(0)
                std = values.var(0).sqrt()
                error = 1.96 * std / sqrt5  # at 95 %
                info = (error > abseps).type(int32)
            else:
                value = Z.sum(0).type(dtype) / N
                error = info = -1
    elif method == "GenzBretz":  # Fortran routine
        if (d > 1000):
            raise ValueError("Only dimensions below 1000 are allowed. Got " +
                             str(d) + ".")
        # centralize the problem
        uppe = upper if loc is None else None if unone else upper - loc
        lowe = lower if loc is None else None if lnone else lower - loc

        c = matmul(scale_tril, scale_tril.transpose(
            -1, -2)) if covariance_matrix is None else covariance_matrix
        if (not unone and uppe.requires_grad) or (
                not lnone and lowe.requires_grad) or mat.requires_grad:
            if error_info:
                raise ValueError(
                    "Option 'error_info' is True, and one of x, loc, covariance_matrix or scale_tril requires gradient. With option 'GenzBretz', the estimation of CDF error is not compatible with autograd."
                )
            error = info = -1
            if lnone:
                upp = uppe
            elif unone:
                upp = -lowe
            else:
                raise ValueError(
                    "For autograd with option 'GenzBretz', at least lower or upper should be None (or with all components infinite)."
                )
            value = CDFapp(upp, c, maxpts, abseps, releps)
        else:
            if lnone and unone:
                value = torch_ones(c.shape[:-2], device=device, dtype=dtype)
                error = torch_zeros(c.shape[:-2], device=device,
                                    dtype=dtype) if error_info else -1
                info = torch_zeros(c.shape[:-2], device=device,
                                   dtype=int32) if error_info else -1
            else:
                stds = diagonal(c, dim1=-2, dim2=-1).sqrt()
                low, upp, corr = _cov2cor(lowe, uppe, c, stds)
                res = _hyperrectangle_integration(low,
                                                  upp,
                                                  corr,
                                                  maxpts,
                                                  abseps,
                                                  releps,
                                                  info=error_info)
                value, error, info = (res if error_info else (res, -1, -1))
    else:
        raise ValueError(
            "The 'method=' should be either 'GenzBretz' or 'MonteCarlo'.")

    #if error_info and error > abseps:
    #        warn("Estimated error is higher than abseps. Consider raising the computation budget (nmc for method='MonteCarlo' or maxpts for 'GenzBretz'). Switch 'error_info' to False to ignore.")
    if error_info:
        return value, error, info
    else:
        return value
Beispiel #15
0
    def detect_objects(self, image_as_tensor, min_score, max_overlap, top_k):
        predicted_locs, predicted_scores = self.forward(image_as_tensor)
        batch_size = predicted_locs.size(0)
        n_priors = self.priors_cxcy.size(0)
        predicted_scores = F.softmax(predicted_scores, dim=2)

        all_images_boxes = list()
        all_images_labels = list()
        all_images_scores = list()

        assert n_priors == predicted_locs.size(1) == predicted_scores.size(1)

        for i in range(batch_size):
            decoded_locs = cxcy_to_xy(
                gcxgcy_to_cxcy(predicted_locs[i], self.priors_cxcy))

            image_boxes = list()
            image_labels = list()
            image_scores = list()

            for c in range(self.num_classes - 1):
                class_scores = predicted_scores[i][:, c]
                score_above_min_score = class_scores > min_score
                n_above_min_score = score_above_min_score.sum().item()
                if n_above_min_score == 0:
                    continue
                class_scores = class_scores[score_above_min_score]
                class_decoded_locs = decoded_locs[score_above_min_score]

                class_scores, sort_ind = class_scores.sort(dim=0,
                                                           descending=True)
                class_decoded_locs = class_decoded_locs[sort_ind]

                overlap = find_jaccard_overlap(class_decoded_locs,
                                               class_decoded_locs)

                suppress = self._to_cuda(
                    torch_zeros((n_above_min_score), dtype=torch_uint8))
                for box in range(class_decoded_locs.size(0)):
                    if suppress[box] == 1:
                        continue

                    suppress = torch_max(
                        suppress,
                        (overlap[box] > max_overlap).type(torch_uint8))
                    suppress[box] = 0

                kept_indices = self._to_cuda(
                    suppress.type(BoolTensor).logical_not())
                locs = class_decoded_locs[kept_indices].tolist()
                for loc_index, loc in enumerate(locs):
                    locs[loc_index] = [
                        max(loc[0], 0.),
                        max(loc[1], 0.),
                        min(loc[2], 1.),
                        min(loc[3], 1.)
                    ]
                image_boxes.append(self._to_cuda(FloatTensor(locs)))
                image_labels.append(
                    self._to_cuda(LongTensor(kept_indices.sum().item() * [c])))
                image_scores.append(self._to_cuda(class_scores[kept_indices]))

            if len(image_boxes) == 0:
                image_boxes.append(
                    self._to_cuda(FloatTensor([[0., 0., 0., 0.]])))
                image_labels.append(self._to_cuda(LongTensor([120])))
                image_scores.append(self._to_cuda(FloatTensor([0.])))

            image_boxes = self._to_cuda(torch_cat(image_boxes, dim=0))
            image_labels = self._to_cuda(torch_cat(image_labels, dim=0))
            image_scores = self._to_cuda(torch_cat(image_scores, dim=0))
            n_objects = image_scores.size(0)

            if n_objects > top_k:
                image_scores, sort_ind = image_scores.sort(dim=0,
                                                           descending=True)
                image_scores = image_scores[:top_k]
                image_boxes = image_boxes[sort_ind][:top_k]
                image_labels = image_labels[sort_ind][:top_k]

            all_images_boxes.append(image_boxes)
            all_images_labels.append(image_labels)
            all_images_scores.append(image_scores)

        return all_images_boxes, all_images_labels, all_images_scores