def get_sequence_from_user(max_sequence_length: int) -> Tuple[Tensor, Tensor]: """ Ask the user to enter a sequence of token ids and convert it to source token tensor and source mask tensor for feeding the model. """ enter_message = ( "\nEnter the desired source sequence token ids separated by spaces: ") # asking for user input and splitting it into a sequence of token ids: src_seq = list(map(int, input(enter_message).split())) n_tokens = len(src_seq) if n_tokens > max_sequence_length: # truncating the sequence if its length is higher than allowed: n_tokens = max_sequence_length src_seq = src_seq[:max_sequence_length] # padding the sequence if its length is lower than the maximum one and # converting it to the right format: src_seq = torch_cat( ( tensor(src_seq, dtype=torch_long), # noqa: E501 pylint: disable=not-callable torch_zeros((max_sequence_length - n_tokens), dtype=torch_long)), dim=-1) src_seq = torch_unsqueeze(input=src_seq, dim=0) # creating the sequence mask based on the padding done: src_seq_mask = torch_cat( (torch_ones((1, 1, n_tokens), dtype=torch_long), torch_zeros( (1, 1, max_sequence_length - n_tokens), dtype=torch_long)), dim=-1) return src_seq, src_seq_mask
def forward(self, predicted_locations, predicted_scores, boxes, labels): batch_size = predicted_locations.size(0) n_priors = self.priors_cxcy.size(0) n_classes = predicted_scores.size(2) assert n_priors == predicted_locations.size( 1) == predicted_scores.size(1) ground_truth_locations = torch_zeros( (batch_size, n_priors, 4), dtype=torch_float).to(self._get_device()) ground_truth_classes = torch_zeros( (batch_size, n_priors), dtype=torch_long).to(self._get_device()) for i in range(batch_size): n_objects = boxes[i].size(0) overlap = find_jaccard_overlap(boxes[i], self.priors_xy) overlap_for_each_prior, object_for_each_prior = overlap.max(dim=0) _, prior_for_each_object = overlap.max(dim=1) object_for_each_prior[prior_for_each_object] = LongTensor( range(n_objects)).to(self._get_device()) overlap_for_each_prior[prior_for_each_object] = 1. label_for_each_prior = labels[i][object_for_each_prior] label_for_each_prior[overlap_for_each_prior < self.threshold] = 120 ground_truth_classes[i] = label_for_each_prior ground_truth_locations[i] = cxcy_to_gcxgcy( xy_to_cxcy(boxes[i][object_for_each_prior]), self.priors_cxcy) positive_priors = ground_truth_classes != 120 localization_loss = self.smooth_l1( predicted_locations[positive_priors], ground_truth_locations[positive_priors]) n_positives = positive_priors.sum(dim=1) n_hard_negatives = self.neg_pos_ratio * n_positives conf_loss_all = self.cross_entropy( predicted_scores.view(-1, n_classes), ground_truth_classes.view(-1)).view(batch_size, n_priors) conf_loss_pos = conf_loss_all[positive_priors] conf_loss_neg = conf_loss_all.clone() conf_loss_neg[positive_priors] = 0. conf_loss_neg, _ = conf_loss_neg.sort(dim=1, descending=True) hardness_ranks = LongTensor( range(n_priors)).unsqueeze(0).expand_as(conf_loss_neg).to( self._get_device()) hard_negatives = hardness_ranks < n_hard_negatives.unsqueeze(1) conf_loss_hard_neg = conf_loss_neg[hard_negatives] confidence_loss = (conf_loss_hard_neg.sum() + conf_loss_pos.sum()) / n_positives.sum().float() return confidence_loss + self.alpha * localization_loss
def _ssd_continuous_metrics(self, predictions, targets, is_cuda=False): def __to_cuda(obj): if is_cuda: obj = obj.cuda() return obj predicted_boxes = predictions['boxes'] target_boxes = targets['boxes'] assert len(predicted_boxes) == len(target_boxes) total_images = len(target_boxes) image_ground_truths = LongTensor([target.size(0) for target in target_boxes]) image_predictions = LongTensor([prediction.size(0) for prediction in predicted_boxes]) continuous_recalls = torch_zeros(total_images, dtype=torch_float) continuous_precisions = torch_zeros(total_images, dtype=torch_float) image_dimensions = __to_cuda(LongTensor([300, 300, 300, 300])) for image_index in range(total_images): if len(target_boxes[image_index]) == 0: continue image_predicted_boxes = (predicted_boxes[image_index] * image_dimensions).tolist() image_target_boxes = (target_boxes[image_index] * image_dimensions).tolist() image_predicted_boxes = [shapely_box(*box) for box in image_predicted_boxes] image_target_boxes = [shapely_box(*box) for box in image_target_boxes] total_predictions = len(image_predicted_boxes) total_targets = len(image_target_boxes) if total_predictions == 0 or total_targets == 0: continue ground_truth_union = image_target_boxes[0] for image_target_box in image_target_boxes[1:]: ground_truth_union = ground_truth_union.union(image_target_box) prediction_union = image_predicted_boxes[0] for image_predicted_box in image_predicted_boxes[1:]: prediction_union = prediction_union.union(image_predicted_box) prediction_ground_truth_intersection = prediction_union.intersection(ground_truth_union) ground_truth_union = ground_truth_union.area prediction_union = prediction_union.area prediction_ground_truth_intersection = prediction_ground_truth_intersection.area continuous_recalls[image_index] = torch_tensor( prediction_ground_truth_intersection / max(ground_truth_union, 1e-10)) continuous_precisions[image_index] = torch_tensor( prediction_ground_truth_intersection / max(prediction_union, 1e-10)) overall_recall = (image_ground_truths * continuous_recalls).sum() / max( image_ground_truths.sum(), torch_tensor(1e-10)) overall_precision = (image_predictions * continuous_precisions).sum() / max( image_predictions.sum(), torch_tensor(1e-10)) return overall_recall.item(), overall_precision.item(), continuous_recalls.tolist(), \ continuous_precisions.tolist()
def _yolo_continuous_metrics(self, predictions, targets): predicted_boxes = predictions['values'] target_boxes = targets['values'] predicted_boxes = [Tensor([box[2:] for box in image]) for image in predicted_boxes] target_boxes = [Tensor([box[1:] for box in image]) for image in target_boxes] assert len(predicted_boxes) == len(target_boxes) total_images = len(target_boxes) image_ground_truths = LongTensor([target.size(0) for target in target_boxes]) image_predictions = LongTensor([prediction.size(0) for prediction in predicted_boxes]) continuous_recalls = torch_zeros(total_images, dtype=torch_float) continuous_precisions = torch_zeros(total_images, dtype=torch_float) for image_index in range(total_images): image_predicted_boxes = (predicted_boxes[image_index]).tolist() image_target_boxes = (target_boxes[image_index]).tolist() image_predicted_boxes = [shapely_box(*box) for box in image_predicted_boxes] image_target_boxes = [shapely_box(*box) for box in image_target_boxes] total_predictions = len(image_predicted_boxes) total_targets = len(image_target_boxes) if total_predictions == 0 or total_targets == 0: continue ground_truth_union = image_target_boxes[0] for image_target_box in image_target_boxes[1:]: ground_truth_union = ground_truth_union.union(image_target_box) prediction_union = image_predicted_boxes[0] for image_predicted_box in image_predicted_boxes[1:]: prediction_union = prediction_union.union(image_predicted_box) prediction_ground_truth_intersection = prediction_union.intersection(ground_truth_union) ground_truth_union = ground_truth_union.area prediction_union = prediction_union.area prediction_ground_truth_intersection = prediction_ground_truth_intersection.area continuous_recalls[image_index] = torch_tensor( prediction_ground_truth_intersection / max(ground_truth_union, 1e-10)) continuous_precisions[image_index] = torch_tensor( prediction_ground_truth_intersection / max(prediction_union, 1e-10)) overall_recall = (image_ground_truths * continuous_recalls).sum() / max( image_ground_truths.sum(), torch_tensor(1e-10)) overall_precision = (image_predictions * continuous_precisions).sum() / max( image_predictions.sum(), torch_tensor(1e-10)) return overall_recall.item(), overall_precision.item(), continuous_recalls.tolist(), \ continuous_precisions.tolist()
def r4_dnn_istft(target_dirname, chandat_obj=None, new_stft_object=None, is_saving_chandat_dnn=True): LOGGER.info( '{}: r4: Doing istft on denoised stft...'.format(target_dirname)) assert os_path_isdir(target_dirname) if chandat_obj is None: chandat_obj = loadmat(os_path_join(target_dirname, CHANDAT_FNAME)) chandat_data = chandat_obj['chandat'] num_rows, num_elements, num_beams = chandat_data.shape beam_position_x = chandat_obj['beam_position_x'] if 'depth' in chandat_obj: depth = chandat_obj['depth'] else: depth = chandat_obj['t'] / chandat_obj['fs'] * chandat_obj['c'] / 2 f0 = chandat_obj['f0'] del chandat_obj if new_stft_object is None: new_stft_object = loadmat(os_path_join(target_dirname, NEW_STFT_FNAME)) new_stft = torch_stack( (torch_from_numpy(new_stft_object['new_stft_real']), torch_from_numpy(new_stft_object['new_stft_imag'])), axis=-1) # new_stft = new_stft_real + 1j*new_stft_imag del new_stft_object chandat_stft = stft( torch_zeros(num_rows, num_elements, num_beams, dtype=torch_float64), LEN_EACH_SECTION, FRAC_OVERLAP, PADDING) chandat_stft['origSigSize'] = [num_rows, num_elements, num_beams] # create new and old stfts # chandat_stft_new = copy(chandat_stft) # chandat_stft_new['stft'] = new_stft chandat_stft['stft'] = new_stft chandat_new = istft(chandat_stft) # chandat_new = non_iter_ls_inv_stft(chandat_stft) # what = istft(chandat_stft, N, window=window, hop_length=2, center=False, onesided=False, normalized=False, pad_mode='constant', length=y) chandat_new[-3:-1, :, :] = 0 chandat_dnn_object = { 'chandat_dnn': chandat_new, 'beam_position_x': beam_position_x, 'depth': depth, 'f0': f0, } if is_saving_chandat_dnn is True: savemat(os_path_join(target_dirname, CHANDAT_DNN_SAVE_FNAME), chandat_dnn_object) LOGGER.info('{}: r4: Done'.format(target_dirname)) return chandat_dnn_object
def forward(self, h_enc): """The forward pass. :param h_enc: The output of the RNN encoder. :type h_enc: torch.autograd.variable.Variable :return: The output of the RNN dec (h_j_dec) :rtype: torch.autograd.variable.Variable """ batch_size = h_enc.size()[0] seq_length = h_enc.size()[1] h_t_dec = Variable(torch_zeros(batch_size, self._input_dim)) h_j_dec = Variable(torch_zeros(batch_size, seq_length, self._input_dim)) if not self._debug and torch_has_cudnn: h_t_dec = h_t_dec.cuda() h_j_dec = h_j_dec.cuda() for ts in range(seq_length): h_t_dec = self.gru_dec(h_enc[:, ts, :], h_t_dec) h_j_dec[:, ts, :] = h_t_dec return h_j_dec
def forward(self, v_in): """Forward pass. :param v_in: The input to the RNN encoder of the Masker. :type v_in: numpy.core.multiarray.ndarray :return: The output of the RNN encoder of the Masker. :rtype: torch.autograd.variable.Variable """ batch_size = v_in.size()[0] seq_length = v_in.size()[1] h_t_f = Variable(torch_zeros(batch_size, self._input_dim)) h_t_b = Variable(torch_zeros(batch_size, self._input_dim)) h_enc = Variable( torch_zeros(batch_size, seq_length - (2 * self._context_length), 2 * self._input_dim)) v_tr = v_in[:, :, :self._input_dim] if not self._debug and torch_has_cudnn: h_t_f = h_t_f.cuda() h_t_b = h_t_b.cuda() h_enc = h_enc.cuda() for t in range(seq_length): h_t_f = self.gru_enc_f((v_tr[:, t, :]), h_t_f) h_t_b = self.gru_enc_b((v_tr[:, seq_length - t - 1, :]), h_t_b) if self._context_length <= t < seq_length - self._context_length: h_t = torch_cat([ h_t_f + v_tr[:, t, :], h_t_b + v_tr[:, seq_length - t - 1, :] ], dim=1) h_enc[:, t - self._context_length, :] = h_t return h_enc
def __init__(self, token_representation_dimension: int, dropout_prob: float, max_sequence_length: int) -> None: super(PositionalEncoding, self).__init__() self.dropout_layer = Dropout(p=dropout_prob) # defining positional signals added to embeddings: # initialization: positional_signals = torch_zeros( (max_sequence_length, token_representation_dimension), requires_grad=False ) positions = torch_arange( start=0, end=max_sequence_length, requires_grad=False ).unsqueeze(dim=1) wave_inputs = positions * torch_exp( torch_arange( start=0, end=token_representation_dimension, step=2 ) * (-log(10000.0) / token_representation_dimension) ) # ✓ see demonstration on my notes ▢ # interleaving sinusoidal and cosinusoidal components along feature # dimension (starting with sine), yielding positional signals for # all the allowed positions (for sequences up to the maximum allowed # length): positional_signals[:, 0::2] = torch_sin(wave_inputs) positional_signals[:, 1::2] = torch_cos(wave_inputs) positional_signals = positional_signals.unsqueeze(dim=0) # parameters not requiring backpropagation (i.e. gradient computation # and update): self.register_buffer('positional_signals', positional_signals)
def __init__(self, feature_dimension: int, epsilon: float = 1e-6) -> None: super(LayerNorm, self).__init__() self.alpha = Parameter(data=torch_ones((feature_dimension))) self.beta = Parameter(data=torch_zeros((feature_dimension))) self.epsilon = epsilon
def _build_targets(self, predictions, target_data, feature_map_width, feature_map_height): batch_size = target_data.size(0) number_of_pixels = feature_map_height * feature_map_width anchors_over_pixels = self.num_anchors * number_of_pixels default_size = (batch_size, self.num_anchors, feature_map_height, feature_map_width) _1obj = torch_zeros(*default_size) _1noobj = torch_ones(*default_size) target_center_x_values = torch_zeros(*default_size) target_center_y_values = torch_zeros(*default_size) target_width_values = torch_zeros(*default_size) target_height_values = torch_zeros(*default_size) target_confidence_score_values = torch_zeros(*default_size) target_class_values = torch_zeros(*default_size) for image_index in range(batch_size): start_index = image_index * anchors_over_pixels end_index = (image_index + 1) * anchors_over_pixels predicted_bounding_boxes = predictions[start_index:end_index].t() ious = torch_zeros(anchors_over_pixels) for t in range(self.max_object): if target_data[image_index][t * 5 + 1] == -1: break ground_truth_center_x = target_data[image_index][ t * 5 + 1] * feature_map_width ground_truth_center_y = target_data[image_index][ t * 5 + 2] * feature_map_height ground_truth_width = target_data[image_index][ t * 5 + 3] * feature_map_width ground_truth_height = target_data[image_index][ t * 5 + 4] * feature_map_height ground_truth_bounding_boxes = FloatTensor([ ground_truth_center_x, ground_truth_center_y, ground_truth_width, ground_truth_height ]) ground_truth_bounding_boxes = ground_truth_bounding_boxes.repeat( anchors_over_pixels, 1).t() ious = torch_max( ious, intersection_over_union(True, predicted_bounding_boxes, ground_truth_bounding_boxes, is_corner_coordinates=False)) # https://github.com/marvis/pytorch-yolo2/issues/121#issuecomment-436388664 _1noobj[image_index][torch_reshape(ious, ( self.num_anchors, feature_map_height, feature_map_width)) > self.ignore_threshold] = 0 for image_index in range(batch_size): for t in range(self.max_object): if target_data[image_index][t * 5 + 1] == -1: break anchor_index, ground_truth_width, ground_truth_height = self._find_most_matching_anchor( feature_map_width, feature_map_height, image_index, t, target_data) ground_truth_center_x_pixel, ground_truth_center_y_pixel, ground_truth_bounding_box = \ self._compose_ground_truth_data(feature_map_width, feature_map_height, ground_truth_height, ground_truth_width, image_index, t, target_data) predicted_bounding_box = predictions[ image_index * anchors_over_pixels + anchor_index * number_of_pixels + ground_truth_center_y_pixel * feature_map_width + ground_truth_center_x_pixel] iou = intersection_over_union(False, ground_truth_bounding_box, predicted_bounding_box, is_corner_coordinates=False) _1obj[image_index][anchor_index][ground_truth_center_y_pixel][ ground_truth_center_x_pixel] = 1 _1noobj[image_index][anchor_index][ ground_truth_center_y_pixel][ ground_truth_center_x_pixel] = 0 target_center_x_values, target_center_y_values, target_width_values, target_height_values, \ target_confidence_score_values, target_class_values = self._set_target_values( feature_map_width, feature_map_height, image_index, t, target_data, anchor_index, iou, ground_truth_center_x_pixel, ground_truth_center_y_pixel, ground_truth_height, ground_truth_width, target_center_x_values, target_center_y_values, target_class_values, target_confidence_score_values, target_height_values, target_width_values) return _1obj, _1noobj, target_center_x_values, target_center_y_values, target_width_values, \ target_height_values, target_confidence_score_values, target_class_values
def _calculate_loss(self, predicted, target, _1obj, _1noobj, divide_by_mask, class_loss_reduction, location_confidence_loss_reduction): mse_loss = MSELoss(reduction=location_confidence_loss_reduction) if divide_by_mask: total_objects = _1obj.sum() loss_x = self.coord_scale * ( mse_loss(predicted['x'] * _1obj, target['x'] * _1obj) / total_objects) / 2.0 loss_y = self.coord_scale * ( mse_loss(predicted['y'] * _1obj, target['y'] * _1obj) / total_objects) / 2.0 loss_w = self.coord_scale * ( mse_loss(predicted['w'] * _1obj, target['w'] * _1obj) / total_objects) / 2.0 loss_h = self.coord_scale * ( mse_loss(predicted['h'] * _1obj, target['h'] * _1obj) / total_objects) / 2.0 coordinates_loss = loss_x + loss_y + loss_w + loss_h object_loss = self.object_scale * ( mse_loss(predicted['C'] * _1obj, target['C'] * _1obj) / total_objects) / 2.0 no_object_loss = self.noobject_scale * ( mse_loss(predicted['C'] * _1noobj, target['C'] * _1noobj) / _1noobj.sum()) / 2.0 confidence_score_loss = object_loss + no_object_loss try: if self.is_multilabel: class_loss = BCELoss(reduction=class_loss_reduction)( predicted['p(c'], self._to_cuda(torch_zeros( predicted['p(c'].shape)).index_fill_( 1, target['p(c'].data.cpu().long(), 1.0)) else: class_loss = CrossEntropyLoss( reduction=class_loss_reduction)(predicted['p(c)'], target['p(c)']) class_loss = self.class_scale * class_loss / total_objects except: class_loss = 0 else: loss_x = self.coord_scale * mse_loss(predicted['x'] * _1obj, target['x'] * _1obj) / 2.0 loss_y = self.coord_scale * mse_loss(predicted['y'] * _1obj, target['y'] * _1obj) / 2.0 loss_w = self.coord_scale * mse_loss(predicted['w'] * _1obj, target['w'] * _1obj) / 2.0 loss_h = self.coord_scale * mse_loss(predicted['h'] * _1obj, target['h'] * _1obj) / 2.0 coordinates_loss = loss_x + loss_y + loss_w + loss_h object_loss = self.object_scale * mse_loss( predicted['C'] * _1obj, target['C'] * _1obj) / 2.0 no_object_loss = self.noobject_scale * mse_loss( predicted['C'] * _1noobj, target['C'] * _1noobj) / 2.0 confidence_score_loss = object_loss + no_object_loss try: if self.is_multilabel: class_loss = BCELoss(reduction=class_loss_reduction)( predicted['p(c'], self._to_cuda(torch_zeros( predicted['p(c'].shape)).index_fill_( 1, target['p(c'].data.cpu().long(), 1.0)) else: class_loss = CrossEntropyLoss( reduction=class_loss_reduction)(predicted['p(c)'], target['p(c)']) class_loss = self.class_scale * class_loss except: class_loss = 0 # Divided by 3 (number of predictors across scales - Large, Medium, Small) to give equivalent weight for each # predictor. return (coordinates_loss + confidence_score_loss + class_loss) / 3
def _ssd_discrete_metrics(self, predictions, targets, is_cuda=False, *unused_args, **unused_kwargs): def __to_cuda(obj): if is_cuda: obj = obj.cuda() return obj predicted_boxes = predictions['boxes'] predicted_labels = predictions['labels'] predicted_class_scores = predictions['scores'] target_boxes = targets['boxes'] target_labels = targets['labels'] assert len(predicted_boxes) == len(predicted_labels) == len(predicted_class_scores) == len( target_boxes) == len(target_labels) target_images = list() for i in range(len(target_labels)): target_images.extend([i] * target_labels[i].size(0)) target_images = __to_cuda(LongTensor(target_images)) target_boxes = torch_cat(target_boxes, dim=0) target_labels = torch_cat(target_labels, dim=0) assert target_images.size(0) == target_boxes.size(0) == target_labels.size(0) predicted_images = list() for i in range(len(predicted_labels)): predicted_images.extend([i] * predicted_labels[i].size(0)) predicted_images = __to_cuda(LongTensor(predicted_images)) predicted_boxes = torch_cat(predicted_boxes, dim=0) predicted_labels = torch_cat(predicted_labels, dim=0) predicted_class_scores = torch_cat(predicted_class_scores, dim=0) assert predicted_images.size(0) == predicted_boxes.size(0) == predicted_labels.size( 0) == predicted_class_scores.size(0) average_precisions = torch_zeros(self.num_classes, dtype=torch_float) recalls = torch_zeros(self.num_classes, dtype=torch_float) precisions = torch_zeros(self.num_classes, dtype=torch_float) for c in range(self.num_classes): target_class_images = target_images[target_labels == c] target_class_boxes = target_boxes[target_labels == c] total_objects = target_class_boxes.size(0) target_class_boxes_detected = __to_cuda(torch_zeros(total_objects, dtype=torch_uint8)) class_c_predicted_images = predicted_images[predicted_labels == c] class_c_predicted_boxes = predicted_boxes[predicted_labels == c] class_c_predicted_class_scores = predicted_class_scores[predicted_labels == c] class_c_num_detections = class_c_predicted_boxes.size(0) if class_c_num_detections == 0: continue class_c_predicted_class_scores, sort_ind = torch_sort(class_c_predicted_class_scores, dim=0, descending=True) class_c_predicted_images = class_c_predicted_images[sort_ind] class_c_predicted_boxes = class_c_predicted_boxes[sort_ind] true_positives = __to_cuda(torch_zeros(class_c_num_detections, dtype=torch_float)) false_positives = __to_cuda(torch_zeros(class_c_num_detections, dtype=torch_float)) for d in range(class_c_num_detections): this_detection_box = shapely_box(*class_c_predicted_boxes[d].data) this_image = class_c_predicted_images[d] object_boxes = target_class_boxes[target_class_images == this_image] if object_boxes.size(0) == 0: false_positives[d] = 1 continue ground_truth_contains_prediction_center = [ shapely_box(*box.data).contains(this_detection_box.centroid) for box in object_boxes] for ind, prediction_center_in_ground_truth in enumerate(ground_truth_contains_prediction_center): original_ind = LongTensor(range(target_class_boxes.size(0)))[target_class_images == this_image][ind] if prediction_center_in_ground_truth: if target_class_boxes_detected[original_ind] == 0: true_positives[d] = 1 target_class_boxes_detected[original_ind] = 1 else: false_positives[d] = 1 else: false_positives[d] = 1 cumul_true_positives = torch_cumsum(true_positives, dim=0) cumul_false_positives = torch_cumsum(false_positives, dim=0) cumul_precision = cumul_true_positives / (cumul_true_positives + cumul_false_positives + 1e-10) cumul_recall = cumul_true_positives / total_objects recall_thresholds = [x / 10 for x in range(11)] interpolated_precisions = __to_cuda(torch_zeros((len(recall_thresholds)), dtype=torch_float)) for i, threshold in enumerate(recall_thresholds): recalls_above_threshold = cumul_recall >= threshold if recalls_above_threshold.any(): interpolated_precisions[i] = cumul_precision[recalls_above_threshold].max() else: interpolated_precisions[i] = 0. average_precisions[c] = interpolated_precisions.mean() total_true_positives = torch_sum(true_positives) recalls[c] = total_true_positives / max(float(total_objects), 1e-10) precisions[c] = total_true_positives / max( total_true_positives + torch_sum(false_positives), torch_tensor(1e-10)) return average_precisions.tolist(), recalls.tolist(), precisions.tolist()
def _ssd_discrete_metrics(self, predictions, targets, iou_threshold=0.5, is_cuda=False): def __to_cuda(obj): if is_cuda: obj = obj.cuda() return obj predicted_boxes = predictions['boxes'] predicted_labels = predictions['labels'] predicted_class_scores = predictions['scores'] target_boxes = targets['boxes'] target_labels = targets['labels'] assert len(predicted_boxes) == len(predicted_labels) == len(predicted_class_scores) == len( target_boxes) == len(target_labels) target_images = list() for i in range(len(target_labels)): target_images.extend([i] * target_labels[i].size(0)) target_images = __to_cuda(LongTensor(target_images)) target_boxes = torch_cat(target_boxes, dim=0) target_labels = torch_cat(target_labels, dim=0) assert target_images.size(0) == target_boxes.size(0) == target_labels.size(0) predicted_images = list() for i in range(len(predicted_labels)): predicted_images.extend([i] * predicted_labels[i].size(0)) predicted_images = __to_cuda(LongTensor(predicted_images)) predicted_boxes = torch_cat(predicted_boxes, dim=0) predicted_labels = torch_cat(predicted_labels, dim=0) predicted_class_scores = torch_cat(predicted_class_scores, dim=0) assert predicted_images.size(0) == predicted_boxes.size(0) == predicted_labels.size( 0) == predicted_class_scores.size(0) average_precisions = torch_zeros(self.num_classes, dtype=torch_float) recalls = torch_zeros(self.num_classes, dtype=torch_float) precisions = torch_zeros(self.num_classes, dtype=torch_float) for c in range(self.num_classes): target_class_images = target_images[target_labels == c] target_class_boxes = target_boxes[target_labels == c] total_objects = target_class_boxes.size(0) target_class_boxes_detected = __to_cuda(torch_zeros(total_objects, dtype=torch_uint8)) class_c_predicted_images = predicted_images[predicted_labels == c] class_c_predicted_boxes = predicted_boxes[predicted_labels == c] class_c_predicted_class_scores = predicted_class_scores[predicted_labels == c] class_c_num_detections = class_c_predicted_boxes.size(0) if class_c_num_detections == 0: continue class_c_predicted_class_scores, sort_ind = torch_sort(class_c_predicted_class_scores, dim=0, descending=True) class_c_predicted_images = class_c_predicted_images[sort_ind] class_c_predicted_boxes = class_c_predicted_boxes[sort_ind] true_positives = __to_cuda(torch_zeros(class_c_num_detections, dtype=torch_float)) false_positives = __to_cuda(torch_zeros(class_c_num_detections, dtype=torch_float)) for d in range(class_c_num_detections): this_detection_box = class_c_predicted_boxes[d].unsqueeze(0) this_image = class_c_predicted_images[d] object_boxes = target_class_boxes[target_class_images == this_image] if object_boxes.size(0) == 0: false_positives[d] = 1 continue overlaps = find_jaccard_overlap(this_detection_box, object_boxes) max_overlap, ind = torch_max(overlaps.squeeze(0), dim=0) original_ind = LongTensor(range(target_class_boxes.size(0)))[target_class_images == this_image][ind] if max_overlap.item() > iou_threshold: if target_class_boxes_detected[original_ind] == 0: true_positives[d] = 1 target_class_boxes_detected[original_ind] = 1 else: false_positives[d] = 1 else: false_positives[d] = 1 cumul_true_positives = torch_cumsum(true_positives, dim=0) cumul_false_positives = torch_cumsum(false_positives, dim=0) cumul_precision = cumul_true_positives / (cumul_true_positives + cumul_false_positives + 1e-10) cumul_recall = cumul_true_positives / total_objects recall_thresholds = [x / 10 for x in range(11)] interpolated_precisions = __to_cuda(torch_zeros((len(recall_thresholds)), dtype=torch_float)) for i, threshold in enumerate(recall_thresholds): recalls_above_threshold = cumul_recall >= threshold if recalls_above_threshold.any(): interpolated_precisions[i] = cumul_precision[recalls_above_threshold].max() else: interpolated_precisions[i] = 0. average_precisions[c] = interpolated_precisions.mean() total_true_positives = torch_sum(true_positives) recalls[c] = total_true_positives / max(float(total_objects), 1e-10) precisions[c] = total_true_positives / max( total_true_positives + torch_sum(false_positives), torch_tensor(1e-10)) return average_precisions.tolist(), recalls.tolist(), precisions.tolist()
def multivariate_normal_cdf(lower=None, upper=None, loc=None, covariance_matrix=None, scale_tril=None, method="GenzBretz", nmc=200, maxpts=25000, abseps=0.001, releps=0, error_info=False): """Compute rectangle probabilities for a multivariate normal random vector Z ``P(l_i < Z_i < u_i, i = 1,...,d)``. Probability values can be returned with closed-form backward derivatives. Parameters ---------- lower : torch.Tensor, optional Lower integration limits. Can have batch shape. The last dimension is the dimension of the random vector. Default is ``None`` which is understood as minus infinity for all components. Values ``- numpy.Inf`` are supported, e.g. if only few components have an infinite boundary. upper : torch.Tensor, optional Upper integration limits. See `lower`. loc : torch.Tensor, optional Mean of the Gaussian vector. Default is zeros. covariance_matrix : torch.Tensor, optional Covariance matrix of the Gaussian vector. Must be provided if `scale_tril` is not. scale_tril : torch.Tensor, optional A lower triangular root of the covariance matrix of the Gaussian vector (e.g. a Cholesky factor). Must be provided if `covariance_matrix` is not. The method ``'GenzBretz'``, needs the covariance matrix and it will be computed from `scale_tril`. method : :obj: str, optional Method deployed for the integration. Either ``'MonteCarlo'`` or ``'GenzBretz'``. nmc : :obj: int, optional Number of Monte Carlo samples. maxpts :obj: int, optional Maximum number of integration points in the Fortran routine. abseps :obj: float, optional Absolute error tolerance. releps :obj: float, optional Relative error tolerance. error_info :obj: bool, optional Should an estimation of the integration error be returned. Not compatible with autograd. Returns ------- value : torch.Tensor The probability of the event ``lower < Y < upper``, with ``Y`` a Gaussian vector defined by `loc` and `covariance_matrix` (or `scale_tril`). Closed form derivative are implemented if `lower`, `upper`, `loc`, `covariance_matrix` or `scale_tril` require a gradient. error : torch.Tensor The estimated error for each component of `value`. **Returned only if** `error_info` is ``True``. info : torch.Tensor Tensor of type ``int32`` informing on the execution for each component. - If ``0``, normal completion with ``error < abseps`` - If ``1``, completion with ``error > abseps`` and (for ``method = 'GenzBretz'``) all maxpts evaluation budget is depleted. - If ``2``, N > 1000 or N < 1 (only for ``method = 'GenzBretz'``) - If ``3``, `covariance_matrix` is not positive semi-definite (only for ``method = 'GenzBretz'``) **Returned only if** `error_info` is ``True``. Notes ------- Parameters `lower`, `upper` and `covariance_matrix` (or `scale_tril`), as well as the returns `value`, `error` and `info` are broadcasted to their common batch shape. See PyTorch' `broadcasting semantics <https://pytorch.org/docs/stable/notes/broadcasting.html#broadcasting-semantics>`_. If any component of ``lower - upper`` is nonpositive, the function returns a null tensor with consistent shape. Method ``MonteCarlo`` uses Monte Carlo sampling for estimating `value`, whereas ``method = 'GenzBretz'`` call a Fortran routine [1]_. If `method` is ``MonteCarlo`` a Cholesky decomposition of the covariance matrix will be performed. Else if ``GenzBretz``, only the correlation matrix is computed and passed to the Fortran routine. The parameter `maxpts` can be used to limit the time. A suggested calibration strategy is to start with 1000 times the integration dimension, and then increase it if the returned `error` is too large. Partial derivative are computed using non-trivial closed form formula, see e.g. Marmin et al. [2]_, p 13. References ---------- .. [1] Alan Genz and Frank Bretz, "Comparison of Methods for the Computation of Multivariate t-Probabilities", Journal of Computational and Graphical Statistics 11, pp. 950-971, 2002. `Source code <http://www.math.wsu.edu/faculty/genz/software/fort77/mvtdstpack.f>`_. .. [2] Sébastien Marmin, Clément Chevalier and David Ginsbourger, "Differentiating the multipoint Expected Improvement for optimal batch design", International Workshop on Machine learning, Optimization and big Data, Taormina, Italy, 2015. `PDF <https://hal.archives-ouvertes.fr/hal-01133220v4/document>`_. Examples -------- >>> import torch >>> from torch.autograd import grad >>> n = 4 >>> x = 1 + torch.randn(n) >>> x.requires_grad = True >>> # Make a positive semi-definite matrix >>> A = torch.randn(n,n) >>> C = 1/n*torch.matmul(A,A.t()) >>> p = mvnorm.multivariate_normal_cdf(upper=x,covariance_matrix=C) >>> p tensor(0.3721, grad_fn=<MultivariateNormalCDFBackward>) >>> grad(p,(x,)) >>> (tensor([0.0085, 0.2510, 0.1272, 0.0332]),) """ if (covariance_matrix is not None) + (scale_tril is not None) != 1: raise ValueError( "Exactly one of covariance_matrix or scale_tril may be specified.") mat = scale_tril if covariance_matrix is None else covariance_matrix device, dtype = mat.device, mat.dtype d = mat.size(-1) if isinstance(lower, (int, float)): lower = Tensor.new_full((d, ), float(lower), dtype=dtype, device=device) if isinstance(upper, (int, float)): upper = Tensor.new_full((d, ), float(upper), dtype=dtype, device=device) lnone = lower is None unone = upper is None if not lnone and lower.max() == -Inf: lower = None lnone = True if not unone and upper.min() == Inf: upper = None unone = True if method == "MonteCarlo": # Monte Carlo estimation if loc is None: loc = torch_zeros(d, device=device, dtype=dtype) p = MultivariateNormal(loc=loc, scale_tril=scale_tril, covariance_matrix=covariance_matrix) r = nmc % 5 N = nmc if r == 0 else nmc + 5 - r # rounded to the upper multiple of 5 Y = p.sample(Size([N])) if lnone and unone: error = torch_zeros(p.batch_shape, device=device, dtype=dtype) if error_info else -1 info = torch_zeros(p.batch_shape, device=device, dtype=int32) if error_info else -1 value = torch_ones(p.batch_shape, device=device, dtype=dtype) else: if lnone: Z = (Y < upper).prod(-1) else: Z = (Y > lower).prod(-1) if unone else (Y < upper).prod(-1) * ( Y > lower).prod(-1) if error_info: # Does NOT slow down significatively booleans = Z.view( N // 5, 5, *Z.shape[1:] ) # divide in 5 groups to have an idea of the precision values = ((booleans.sum(0).type(dtype))) / N * 5 value = values.mean(0) std = values.var(0).sqrt() error = 1.96 * std / sqrt5 # at 95 % info = (error > abseps).type(int32) else: value = Z.sum(0).type(dtype) / N error = info = -1 elif method == "GenzBretz": # Fortran routine if (d > 1000): raise ValueError("Only dimensions below 1000 are allowed. Got " + str(d) + ".") # centralize the problem uppe = upper if loc is None else None if unone else upper - loc lowe = lower if loc is None else None if lnone else lower - loc c = matmul(scale_tril, scale_tril.transpose( -1, -2)) if covariance_matrix is None else covariance_matrix if (not unone and uppe.requires_grad) or ( not lnone and lowe.requires_grad) or mat.requires_grad: if error_info: raise ValueError( "Option 'error_info' is True, and one of x, loc, covariance_matrix or scale_tril requires gradient. With option 'GenzBretz', the estimation of CDF error is not compatible with autograd." ) error = info = -1 if lnone: upp = uppe elif unone: upp = -lowe else: raise ValueError( "For autograd with option 'GenzBretz', at least lower or upper should be None (or with all components infinite)." ) value = CDFapp(upp, c, maxpts, abseps, releps) else: if lnone and unone: value = torch_ones(c.shape[:-2], device=device, dtype=dtype) error = torch_zeros(c.shape[:-2], device=device, dtype=dtype) if error_info else -1 info = torch_zeros(c.shape[:-2], device=device, dtype=int32) if error_info else -1 else: stds = diagonal(c, dim1=-2, dim2=-1).sqrt() low, upp, corr = _cov2cor(lowe, uppe, c, stds) res = _hyperrectangle_integration(low, upp, corr, maxpts, abseps, releps, info=error_info) value, error, info = (res if error_info else (res, -1, -1)) else: raise ValueError( "The 'method=' should be either 'GenzBretz' or 'MonteCarlo'.") #if error_info and error > abseps: # warn("Estimated error is higher than abseps. Consider raising the computation budget (nmc for method='MonteCarlo' or maxpts for 'GenzBretz'). Switch 'error_info' to False to ignore.") if error_info: return value, error, info else: return value
def detect_objects(self, image_as_tensor, min_score, max_overlap, top_k): predicted_locs, predicted_scores = self.forward(image_as_tensor) batch_size = predicted_locs.size(0) n_priors = self.priors_cxcy.size(0) predicted_scores = F.softmax(predicted_scores, dim=2) all_images_boxes = list() all_images_labels = list() all_images_scores = list() assert n_priors == predicted_locs.size(1) == predicted_scores.size(1) for i in range(batch_size): decoded_locs = cxcy_to_xy( gcxgcy_to_cxcy(predicted_locs[i], self.priors_cxcy)) image_boxes = list() image_labels = list() image_scores = list() for c in range(self.num_classes - 1): class_scores = predicted_scores[i][:, c] score_above_min_score = class_scores > min_score n_above_min_score = score_above_min_score.sum().item() if n_above_min_score == 0: continue class_scores = class_scores[score_above_min_score] class_decoded_locs = decoded_locs[score_above_min_score] class_scores, sort_ind = class_scores.sort(dim=0, descending=True) class_decoded_locs = class_decoded_locs[sort_ind] overlap = find_jaccard_overlap(class_decoded_locs, class_decoded_locs) suppress = self._to_cuda( torch_zeros((n_above_min_score), dtype=torch_uint8)) for box in range(class_decoded_locs.size(0)): if suppress[box] == 1: continue suppress = torch_max( suppress, (overlap[box] > max_overlap).type(torch_uint8)) suppress[box] = 0 kept_indices = self._to_cuda( suppress.type(BoolTensor).logical_not()) locs = class_decoded_locs[kept_indices].tolist() for loc_index, loc in enumerate(locs): locs[loc_index] = [ max(loc[0], 0.), max(loc[1], 0.), min(loc[2], 1.), min(loc[3], 1.) ] image_boxes.append(self._to_cuda(FloatTensor(locs))) image_labels.append( self._to_cuda(LongTensor(kept_indices.sum().item() * [c]))) image_scores.append(self._to_cuda(class_scores[kept_indices])) if len(image_boxes) == 0: image_boxes.append( self._to_cuda(FloatTensor([[0., 0., 0., 0.]]))) image_labels.append(self._to_cuda(LongTensor([120]))) image_scores.append(self._to_cuda(FloatTensor([0.]))) image_boxes = self._to_cuda(torch_cat(image_boxes, dim=0)) image_labels = self._to_cuda(torch_cat(image_labels, dim=0)) image_scores = self._to_cuda(torch_cat(image_scores, dim=0)) n_objects = image_scores.size(0) if n_objects > top_k: image_scores, sort_ind = image_scores.sort(dim=0, descending=True) image_scores = image_scores[:top_k] image_boxes = image_boxes[sort_ind][:top_k] image_labels = image_labels[sort_ind][:top_k] all_images_boxes.append(image_boxes) all_images_labels.append(image_labels) all_images_scores.append(image_scores) return all_images_boxes, all_images_labels, all_images_scores