def forward(self, input): x, low_level_features = self.xception_features(input) x1 = self.aspp1(x) x2 = self.aspp2(x) x3 = self.aspp3(x) x4 = self.aspp4(x) x5 = self.global_avg_pool(x) x5 = F.interpolate(x5, size=x4.size()[2:], mode='bilinear', align_corners=True) x = torch.cat((x1, x2, x3, x4, x5), dim=1) x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = F.interpolate(x, size=(int(math.ceil(input.size()[-2]/4)), int(math.ceil(input.size()[-1]/4))), mode='bilinear', align_corners=True) low_level_features = self.conv2(low_level_features) low_level_features = self.bn2(low_level_features) low_level_features = self.relu(low_level_features) x = torch.cat((x, low_level_features), dim=1) x = self.last_conv(x) x = F.interpolate(x, size=input.size()[2:], mode='bilinear', align_corners=True) return x
def forward(self, x): x, skip = x x = F.interpolate(x, scale_factor=2, mode='nearest') if skip is not None: x = torch.cat([x, skip], dim=1) x = self.block(x) return x
def forward(self, x): """ Arguments: x (list[Tensor]): feature maps for each feature level. Returns: results (tuple[Tensor]): feature maps after FPN layers. They are ordered from highest resolution first. """ last_inner = getattr(self, self.inner_blocks[-1])(x[-1]) results = [] results.append(getattr(self, self.layer_blocks[-1])(last_inner)) for feature, inner_block, layer_block in zip( x[:-1][::-1], self.inner_blocks[:-1][::-1], self.layer_blocks[:-1][::-1] ): inner_top_down = F.interpolate(last_inner, scale_factor=2, mode="nearest") inner_lateral = getattr(self, inner_block)(feature) # TODO use size instead of scale to make it robust to different sizes # inner_top_down = F.upsample(last_inner, size=inner_lateral.shape[-2:], # mode='bilinear', align_corners=False) last_inner = inner_lateral + inner_top_down results.insert(0, getattr(self, layer_block)(last_inner)) if self.top_blocks is not None: last_results = self.top_blocks(results[-1]) results.extend(last_results) return tuple(results)
def make_score_map(self, img, mode='sigmoid'): """ """ img = img/255 # The offset is inserted so that the final size of the score map matches # the search image. To know more see "How to overlay the search img with # the score map" in Trello/Report. It is half of the dimension of the # Smallest Class Equivalent of the Ref image. offset = (((self.ref.shape[0] + 1)//4)*4 - 1)//2 img_mean = img.mean() img_padded = np.pad(img, ((offset, offset), (offset, offset), (0, 0)), mode='constant', constant_values=img_mean) img_padded = numpy_to_torch_var(img_padded, device) srch_emb = self.net.get_embedding(img_padded) score_map = self.net.match_corr(self.ref_emb, srch_emb) dimx = score_map.shape[-1] dimy = score_map.shape[-2] score_map = score_map.view(-1, dimy, dimx) if mode == 'sigmoid': score_map = sigmoid(score_map) elif mode == 'norm': score_map = score_map - score_map.min() score_map = score_map/score_map.max() score_map = score_map.unsqueeze(0) # We upscale 4 times, because the total stride of the network is 4 score_map = F.interpolate(score_map, scale_factor=4, mode='bilinear', align_corners=False) score_map = score_map.cpu() score_map = torch_var_to_numpy(score_map) return score_map
def forward(self, input): x, low_level_feat = self.backbone(input) x = self.aspp(x) x = self.decoder(x, low_level_feat) x = F.interpolate(x, size=input.size()[2:], mode='bilinear', align_corners=True) return x
def forward(self, x): x, skip = x x = F.interpolate(x, scale_factor=2, mode='nearest') skip = self.skip_conv(skip) x = x + skip return x
def forward(self, x, low_level_feat): low_level_feat = self.conv1(low_level_feat) low_level_feat = self.bn1(low_level_feat) low_level_feat = self.relu(low_level_feat) x = F.interpolate(x, size=low_level_feat.size()[2:], mode='bilinear', align_corners=True) x = torch.cat((x, low_level_feat), dim=1) x = self.last_conv(x) return x
def forward(self, x): x1 = self.aspp1(x) x2 = self.aspp2(x) x3 = self.aspp3(x) x4 = self.aspp4(x) x5 = self.global_avg_pool(x) x5 = F.interpolate(x5, size=x4.size()[2:], mode='bilinear', align_corners=True) x = torch.cat((x1, x2, x3, x4, x5), dim=1) x = self.conv1(x) x = self.bn1(x) x = self.relu(x) return x
def generate(self, target_layer): fmaps = self._find(self.fmap_pool, target_layer) grads = self._find(self.grad_pool, target_layer) weights = self._compute_grad_weights(grads) gcam = torch.mul(fmaps, weights).sum(dim=1, keepdim=True) gcam = F.relu(gcam) gcam = F.interpolate( gcam, self.image_shape, mode="bilinear", align_corners=False ) B, C, H, W = gcam.shape gcam = gcam.view(B, -1) gcam -= gcam.min(dim=1, keepdim=True)[0] gcam /= gcam.max(dim=1, keepdim=True)[0] gcam = gcam.view(B, C, H, W) return gcam
def forward(self, x): c5, c4, c3, c2, _ = x p5 = self.conv1(c5) p4 = self.p4([p5, c4]) p3 = self.p3([p4, c3]) p2 = self.p2([p3, c2]) s5 = self.s5(p5) s4 = self.s4(p4) s3 = self.s3(p3) s2 = self.s2(p2) x = s5 + s4 + s3 + s2 x = self.dropout(x) x = self.final_conv(x) x = F.interpolate(x, scale_factor=4, mode='bilinear', align_corners=True) return x
def forward(self, x): features = self._get(x) x = self.psp(features) x = self.conv(x) if self.dropout_factor: x = self.dropout(x) x = self.final_conv(x) x = F.interpolate( x, scale_factor=self.downsample_factor, mode='bilinear', align_corners=True ) if self.training and self.aux_output: aux = self.aux(features) x = [x, aux] return x
from torchvision import transforms # import torch.functional as F import torch.nn.functional as F eps = np.finfo(np.float64).eps plt.rcParams['figure.figsize'] = 10, 10 ''' Affine crop testing ''' img_t = F.interpolate(transforms.ToTensor()(scipy.misc.face()).unsqueeze(0), size=(768, 768), mode='bilinear') theta = torch.from_numpy(np.array([[[0.2, 0.0, 0.2], [0.0, 0.2, 0.3]]])) grid = F.affine_grid(theta, torch.Size((1, 3, 768, 768))) grid.size() plt.rcParams['figure.figsize'] = 8, 8 fig, axis = plt.subplots(nrows=1, ncols=2) axis[0].imshow(grid[0, :, :, 0]) axis[0].set_title('x') axis[1].imshow(grid[0, :, :, 1]) axis[1].set_title('y')
def forward(self, x): return F.interpolate(x, size=self.size, scale_factor=self.scale_factor, mode=self.mode, align_corners=self.align_corners)
def random_resize(images, min_size=288, max_size=448): new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0] images = F.interpolate(images, size=new_size, mode="nearest") return images
def add_overlay(self, tag, embed, img, alpha=0.8, cmap='inferno', add_ref=None): """ Adds to the summary the images of the input image (ref or search) overlayed with the corresponding embedding or correlation map. It expect tensors of with dimensions [C x H x W] or [B x C x H x W] if the tensor has a batch dimension it takes the FIRST ELEMENT of the batch. The image is displayed as fusion of the input image in grayscale and the overlay in the chosen color_map, this fusion is controlled by the alpha factor. In the case of the embeddings, since there are multiple feature channels, we show each of them individually in a grid. OBS: The colors represent relative values, where the peak color corresponds to the maximum value in any given channel, so no direct value comparisons can be made between epochs, only the relative distribution of neighboring pixel values, (which should be enough, since we are mosly interested in finding the maximum of a given correlation map) Args: tag: (str) The string identifying the image in tensorboard, images with the same tag are grouped together with a slider, and are indexed by epoch. embed: (torch.Tensor) The tensor containing the embedding of an input (ref or search image) or a correlation map (the final output). The shape should be [B, C, H, W] or [B, H, W] for the case of the correlation map. img: (torch.Tensor) The image on top of which the embed is going to be overlaid. Reference image embeddings should be overlaid on top of reference images and search image embeddings as well as the correlation maps should be overlaid on top of the search images. alpha: (float) A mixing variable, it controls how much of the final embedding corresponds to the grayscale input image and how much corresponds to the overlay. Alpha = 0, means there is no overlay in the final image, only the input image. Conversely, Alpha = 1 means there is only overlay. Adjust this value so you can distinctly see the overlay details while still seeing where it is in relation to the orignal image. cmap: (str) The name of the colormap to be used with the overlay. The colormaps are defined in the colormaps.py module, but values include 'viridis' (greenish blue) and 'inferno' (yellowish red). add_ref: (torch.Tensor) Optional. An additional reference image that will be plotted to the side of the other images. Useful when plotting correlation maps, because it lets the user see both the search image and the reference that is used as the target. ``Example`` >>> summ_maker = SummaryMaker(os.path.join(exp_dir, 'tensorboard'), params, model.upscale_factor) ... >>> embed_ref = model.get_embedding(ref_img_batch) >>> embed_srch = model.get_embedding(search_batch) >>> output_batch = model.match_corr(embed_ref, embed_srch) >>> batch_index = 0 >>> summ_maker.add_overlay("Ref_image_{}".format(tbx_index), embed_ref[batch_index], ref_img_batch[batch_index], cmap='inferno') >>> summ_maker.add_overlay("Search_image_{}".format(tbx_index), embed_srch[batch_index], search_batch[batch_index], cmap='inferno') >>> summ_maker.add_overlay("Correlation_map_{}".format(tbx_index), output_batch[batch_index], search_batch[batch_index], cmap='inferno') """ # TODO Add numbers in the final image to the feature channels. # TODO Add the color bar showing the progression of values. # If minibatch is given, take only the first image # TODO let the user select the image? Loop on all images? if len(embed.shape) == 4: embed = embed[0] if len(img.shape) == 4: img = img[0] # Normalize the image. img = img - img.min() img = img/img.max() embed = cm.apply_cmap(embed, cmap=cmap) # Get grayscale version of image by taking the weighted average of the channels # as described in https://www.cs.virginia.edu/~vicente/recognition/notebooks/image_processing_lab.html#2.-Converting-to-Grayscale R,G,B = img img_gray = 0.21 * R + 0.72 * G + 0.07 * B # Get the upscaled size of the embedding, so as to take into account # the network's downscale caused by the stride. upsc_size = (embed.shape[-1] - 1) * self.up_factor + 1 embed = F.interpolate(embed, upsc_size, mode='bilinear', align_corners=False) # Pad the embedding with zeros to match the image dimensions. We pad # all 4 corners equally to keep the embedding centered. tot_pad = img.shape[-1] - upsc_size # Sanity check 1. The amount of padding must be equal on all sides, so # the total padding on any dimension must be an even integer. assert tot_pad % 2 == 0, "The embed or image dimensions are incorrect." pad = int(tot_pad/2) embed = F.pad(embed, (pad, pad, pad, pad), 'constant', 0) # Sanity check 2, the size of the embedding in the (H, w) dimensions # matches the size of the image. assert embed.shape[-2:] == img.shape[-2:], ("The embedding overlay " "and image dimensions " "do not agree.") final_imgs = alpha * embed + (1-alpha) * img_gray # The embedding_channel (or feature channel) dimension is treated like # a batch dimension, so the grid shows each individual embeding # overlayed with the input image. Plus the original image is also shown. # If add_ref is used the ref image is the first to be shown. img = img.unsqueeze(0) final_imgs = torch.cat((img, final_imgs)) if add_ref is not None: # Pads the image if necessary pad = int((img.shape[-1] - add_ref.shape[-1])//2) add_ref = F.pad(add_ref, (pad, pad, pad, pad), 'constant', 0) add_ref = add_ref.unsqueeze(0) final_imgs = torch.cat((add_ref, final_imgs)) final_imgs = make_grid(final_imgs, nrow=6) self.writer_val.add_image(tag, final_imgs, self.epoch)
def forward(self, x): x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode) x = self.conv(x) return x
def forward(self, x_small, x_big): x_small = self.up(x_small) x_small = F.interpolate(x_small, size=x_big.size()[2:], mode='bilinear', align_corners=True) x = torch.cat([x_big, x_small], dim=1) x = self.conv(x) return x
def loss(self, cls_scores, bbox_preds, centernesses, cof_preds, feat_masks, track_feats, track_feats_ref, gt_bboxes, gt_labels, img_metas, cfg, gt_bboxes_ignore=None, gt_masks_list=None, ref_bboxes_list=None, gt_pids_list=None): assert len(cls_scores) == len(bbox_preds) == len(centernesses) featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] all_level_points = self.get_points(featmap_sizes, bbox_preds[0].dtype, bbox_preds[0].device) labels, bbox_targets, label_list, bbox_targets_list, gt_inds = self.fcos_target( all_level_points, gt_bboxes, gt_labels) # decode detection and groundtruth det_bboxes = [] det_targets = [] num_levels = len(bbox_preds) for img_id in range(len(img_metas)): bbox_pred_list = [ bbox_preds[i][img_id].permute(1, 2, 0).reshape(-1, 4).detach() for i in range(num_levels) ] bbox_target_list = bbox_targets_list[img_id] bboxes = [] targets = [] for i in range(len(bbox_pred_list)): bbox_pred = bbox_pred_list[i] bbox_target = bbox_target_list[i] points = all_level_points[i] bboxes.append(distance2bbox(points, bbox_pred)) targets.append(distance2bbox(points, bbox_target)) bboxes = torch.cat(bboxes, dim=0) targets = torch.cat(targets, dim=0) det_bboxes.append(bboxes) det_targets.append(targets) gt_masks = [] for i in range(len(gt_labels)): gt_label = gt_labels[i] gt_masks.append( torch.from_numpy( np.array(gt_masks_list[i][:gt_label.shape[0]], dtype=np.float32)).to(gt_label.device)) num_imgs = cls_scores[0].size(0) # flatten cls_scores, bbox_preds and centerness flatten_cls_scores = [ cls_score.permute(0, 2, 3, 1).reshape(-1, self.cls_out_channels) for cls_score in cls_scores ] flatten_bbox_preds = [ bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4) for bbox_pred in bbox_preds ] flatten_centerness = [ centerness.permute(0, 2, 3, 1).reshape(-1) for centerness in centernesses ] flatten_cls_scores = torch.cat(flatten_cls_scores) flatten_bbox_preds = torch.cat(flatten_bbox_preds) flatten_centerness = torch.cat(flatten_centerness) flatten_labels = torch.cat(labels) flatten_bbox_targets = torch.cat(bbox_targets) # repeat points to align with bbox_preds flatten_points = torch.cat( [points.repeat(num_imgs, 1) for points in all_level_points]) pos_inds = flatten_labels.nonzero().reshape(-1) num_pos = len(pos_inds) loss_cls = self.loss_cls(flatten_cls_scores, flatten_labels, avg_factor=num_pos + num_imgs) # avoid num_pos is 0 pos_bbox_preds = flatten_bbox_preds[pos_inds] pos_centerness = flatten_centerness[pos_inds] if num_pos > 0: pos_bbox_targets = flatten_bbox_targets[pos_inds] pos_centerness_targets = self.centerness_target(pos_bbox_targets) pos_points = flatten_points[pos_inds] pos_decoded_bbox_preds = distance2bbox(pos_points, pos_bbox_preds) pos_decoded_target_preds = distance2bbox(pos_points, pos_bbox_targets) # centerness weighted iou loss loss_bbox = self.loss_bbox(pos_decoded_bbox_preds, pos_decoded_target_preds, weight=pos_centerness_targets, avg_factor=pos_centerness_targets.sum()) loss_centerness = self.loss_centerness(pos_centerness, pos_centerness_targets) else: loss_bbox = pos_bbox_preds.sum() loss_centerness = pos_centerness.sum() ##########mask loss################# flatten_cls_scores1 = [ cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1, self.cls_out_channels) for cls_score in cls_scores ] flatten_cls_scores1 = torch.cat(flatten_cls_scores1, dim=1) flatten_cof_preds = [ cof_pred.permute(0, 2, 3, 1).reshape(cof_pred.shape[0], -1, 32 * 4) for cof_pred in cof_preds ] loss_mask = 0 loss_match = 0 match_acc = 0 n_total = 0 flatten_cof_preds = torch.cat(flatten_cof_preds, dim=1) for i in range(num_imgs): labels = torch.cat( [labels_level.flatten() for labels_level in label_list[i]]) bbox_dt = det_bboxes[i] / 2 bbox_dt = bbox_dt.detach() pos_inds = (labels > 0).nonzero().view(-1) cof_pred = flatten_cof_preds[i][pos_inds] img_mask = feat_masks[i] mask_h = img_mask.shape[1] mask_w = img_mask.shape[2] idx_gt = gt_inds[i] bbox_dt = bbox_dt[pos_inds, :4] area = (bbox_dt[:, 2] - bbox_dt[:, 0]) * (bbox_dt[:, 3] - bbox_dt[:, 1]) bbox_dt = bbox_dt[area > 1.0, :] idx_gt = idx_gt[area > 1.0] cof_pred = cof_pred[area > 1.0] if bbox_dt.shape[0] == 0: loss_mask += area.sum() * 0 continue bbox_gt = gt_bboxes[i] cls_score = flatten_cls_scores1[i, pos_inds, labels[pos_inds] - 1].sigmoid().detach() cls_score = cls_score[area > 1.0] ious = bbox_overlaps(bbox_gt[idx_gt] / 2, bbox_dt, is_aligned=True) weighting = cls_score * ious weighting = weighting / (torch.sum(weighting) + 0.0001) * len(weighting) ###################track#################### bboxes = ref_bboxes_list[i] amplitude = 0.05 random_offsets = bboxes.new_empty(bboxes.shape[0], 4).uniform_( -amplitude, amplitude) # before jittering cxcy = (bboxes[:, 2:4] + bboxes[:, :2]) / 2 wh = (bboxes[:, 2:4] - bboxes[:, :2]).abs() # after jittering new_cxcy = cxcy + wh * random_offsets[:, :2] new_wh = wh * (1 + random_offsets[:, 2:]) # xywh to xyxy new_x1y1 = (new_cxcy - new_wh / 2) new_x2y2 = (new_cxcy + new_wh / 2) new_bboxes = torch.cat([new_x1y1, new_x2y2], dim=1) # clip bboxes # print(bbox_dt.shape) track_feat_i = self.extract_box_feature_center_single( track_feats[i], bbox_dt * 2) track_box_ref = self.extract_box_feature_center_single( track_feats_ref[i], new_bboxes) gt_pids = gt_pids_list[i] cur_ids = gt_pids[idx_gt] prod = torch.mm(track_feat_i, torch.transpose(track_box_ref, 0, 1)) m = prod.size(0) dummy = torch.zeros(m, 1, device=torch.cuda.current_device()) prod_ext = torch.cat([dummy, prod], dim=1) loss_match += cross_entropy(prod_ext, cur_ids) n_total += len(idx_gt) match_acc += accuracy(prod_ext, cur_ids) * len(idx_gt) gt_mask = F.interpolate(gt_masks[i].unsqueeze(0), scale_factor=0.5, mode='bilinear', align_corners=False).squeeze(0) shape = np.minimum(feat_masks[i].shape, gt_mask.shape) gt_mask_new = gt_mask.new_zeros(gt_mask.shape[0], mask_h, mask_w) gt_mask_new[:gt_mask.shape[0], :shape[1], : shape[2]] = gt_mask[:gt_mask. shape[0], :shape[1], :shape[2]] gt_mask_new = gt_mask_new.gt(0.5).float() gt_mask_new = torch.index_select(gt_mask_new, 0, idx_gt).permute(1, 2, 0).contiguous() #######spp########################### img_mask1 = img_mask.permute(1, 2, 0) pos_masks00 = torch.sigmoid(img_mask1 @ cof_pred[:, 0:32].t()) pos_masks01 = torch.sigmoid(img_mask1 @ cof_pred[:, 32:64].t()) pos_masks10 = torch.sigmoid(img_mask1 @ cof_pred[:, 64:96].t()) pos_masks11 = torch.sigmoid(img_mask1 @ cof_pred[:, 96:128].t()) pred_masks = torch.stack( [pos_masks00, pos_masks01, pos_masks10, pos_masks11], dim=0) pred_masks = self.crop_cuda(pred_masks, bbox_dt) gt_mask_crop = self.crop_gt_cuda(gt_mask_new, bbox_dt) # pred_masks, gt_mask_crop = crop_split(pos_masks00, pos_masks01, pos_masks10, pos_masks11, bbox_dt, # gt_mask_new) pre_loss = F.binary_cross_entropy(pred_masks, gt_mask_crop, reduction='none') pos_get_csize = center_size(bbox_dt) gt_box_width = pos_get_csize[:, 2] gt_box_height = pos_get_csize[:, 3] pre_loss = pre_loss.sum(dim=( 0, 1)) / gt_box_width / gt_box_height / pos_get_csize.shape[0] loss_mask += torch.sum(pre_loss * weighting.detach()) loss_mask = loss_mask / num_imgs loss_match = loss_match / num_imgs match_acc = match_acc / n_total if loss_mask == 0: loss_mask = bbox_dt[:, 0].sum() * 0 return dict(loss_cls=loss_cls, loss_bbox=loss_bbox, loss_centerness=loss_centerness, loss_mask=loss_mask, loss_match=loss_match, match_acc=match_acc)
def forward(self, feats, feats_x, flag_train=True): # return multi_apply(self.forward_single, feats, self.scales) cls_scores = [] bbox_preds = [] centernesses = [] cof_preds = [] feat_masks = [] track_feats = [] track_feats_ref = [] count = 0 for x, x_f, scale, stride in zip(feats, feats_x, self.scales, self.strides): cls_feat = x reg_feat = x track_feat = x track_feat_f = x_f for cls_layer in self.cls_convs: cls_feat = cls_layer(cls_feat) for reg_layer in self.reg_convs: reg_feat = reg_layer(reg_feat) if count < 3: for track_layer in self.track_convs: track_feat = track_layer(track_feat) track_feat = F.interpolate(track_feat, scale_factor=(2**count), mode='bilinear', align_corners=False) track_feats.append(track_feat) if flag_train: for track_layer in self.track_convs: track_feat_f = track_layer(track_feat_f) track_feat_f = F.interpolate(track_feat_f, scale_factor=(2**count), mode='bilinear', align_corners=False) track_feats_ref.append(track_feat_f) # scale the bbox_pred of different level # float to avoid overflow when enabling FP16 bbox_pred = scale(self.fcos_reg(reg_feat)) cls_feat = self.feat_align(cls_feat, bbox_pred) cls_score = self.fcos_cls(cls_feat) centerness = self.fcos_centerness(reg_feat) centernesses.append(centerness) cls_scores.append(cls_score) bbox_preds.append(bbox_pred.float() * stride) ########COFFECIENTS############### cof_pred = self.sip_cof(cls_feat) cof_preds.append(cof_pred) ############contextual####################### if count < 3: feat_up = F.interpolate(reg_feat, scale_factor=(2**count), mode='bilinear', align_corners=False) feat_masks.append(feat_up) count = count + 1 # ################contextual enhanced################## feat_masks = torch.cat(feat_masks, dim=1) feat_masks = self.relu( self.sip_mask_lat(self.relu(self.sip_mask_lat0(feat_masks)))) feat_masks = F.interpolate(feat_masks, scale_factor=4, mode='bilinear', align_corners=False) track_feats = torch.cat(track_feats, dim=1) track_feats = self.sipmask_track(track_feats) if flag_train: track_feats_ref = torch.cat(track_feats_ref, dim=1) track_feats_ref = self.sipmask_track(track_feats_ref) return cls_scores, bbox_preds, centernesses, cof_preds, feat_masks, track_feats, track_feats_ref else: return cls_scores, bbox_preds, centernesses, cof_preds, feat_masks, track_feats, track_feats
def validation(model, val_loader, epoch, writer): # set evaluate mode model.eval() total_correct, total_label = 0, 0 total_correct_hb, total_label_hb = 0, 0 total_correct_fb, total_label_fb = 0, 0 hist = np.zeros((args.num_classes, args.num_classes)) hist_hb = np.zeros((args.hbody_cls, args.hbody_cls)) hist_fb = np.zeros((args.fbody_cls, args.fbody_cls)) # Iterate over data. bar = Bar('Processing {}'.format('val'), max=len(val_loader)) bar.check_tty = False for idx, batch in enumerate(val_loader): image, target, hlabel, flabel, _ = batch image, target, hlabel, flabel = image.cuda(), target.cuda( ), hlabel.cuda(), flabel.cuda() with torch.no_grad(): h, w = target.size(1), target.size(2) outputs = model(image) outputs = gather(outputs, 0, dim=0) preds = F.interpolate(input=outputs[0][-1], size=(h, w), mode='bilinear', align_corners=True) preds_hb = F.interpolate(input=outputs[1][-1], size=(h, w), mode='bilinear', align_corners=True) preds_fb = F.interpolate(input=outputs[2][-1], size=(h, w), mode='bilinear', align_corners=True) if idx % 50 == 0: img_vis = inv_preprocess(image, num_images=args.save_num) label_vis = decode_predictions(target.int(), num_images=args.save_num, num_classes=args.num_classes) pred_vis = decode_predictions(torch.argmax(preds, dim=1), num_images=args.save_num, num_classes=args.num_classes) # visual grids img_grid = torchvision.utils.make_grid( torch.from_numpy(img_vis.transpose(0, 3, 1, 2))) label_grid = torchvision.utils.make_grid( torch.from_numpy(label_vis.transpose(0, 3, 1, 2))) pred_grid = torchvision.utils.make_grid( torch.from_numpy(pred_vis.transpose(0, 3, 1, 2))) writer.add_image('val_images', img_grid, epoch * len(val_loader) + idx + 1) writer.add_image('val_labels', label_grid, epoch * len(val_loader) + idx + 1) writer.add_image('val_preds', pred_grid, epoch * len(val_loader) + idx + 1) # pixelAcc correct, labeled = batch_pix_accuracy(preds.data, target) correct_hb, labeled_hb = batch_pix_accuracy(preds_hb.data, hlabel) correct_fb, labeled_fb = batch_pix_accuracy(preds_fb.data, flabel) # mIoU hist += fast_hist(preds, target, args.num_classes) hist_hb += fast_hist(preds_hb, hlabel, args.hbody_cls) hist_fb += fast_hist(preds_fb, flabel, args.fbody_cls) total_correct += correct total_correct_hb += correct_hb total_correct_fb += correct_fb total_label += labeled total_label_hb += labeled_hb total_label_fb += labeled_fb pixAcc = 1.0 * total_correct / (np.spacing(1) + total_label) IoU = round(np.nanmean(per_class_iu(hist)) * 100, 2) pixAcc_hb = 1.0 * total_correct_hb / (np.spacing(1) + total_label_hb) IoU_hb = round(np.nanmean(per_class_iu(hist_hb)) * 100, 2) pixAcc_fb = 1.0 * total_correct_fb / (np.spacing(1) + total_label_fb) IoU_fb = round(np.nanmean(per_class_iu(hist_fb)) * 100, 2) # plot progress bar.suffix = '{} / {} | pixAcc: {pixAcc:.4f}, mIoU: {IoU:.4f} |' \ 'pixAcc_hb: {pixAcc_hb:.4f}, mIoU_hb: {IoU_hb:.4f} |' \ 'pixAcc_fb: {pixAcc_fb:.4f}, mIoU_fb: {IoU_fb:.4f}'.format(idx + 1, len(val_loader), pixAcc=pixAcc, IoU=IoU, pixAcc_hb=pixAcc_hb, IoU_hb=IoU_hb, pixAcc_fb=pixAcc_fb, IoU_fb=IoU_fb) bar.next() print('\n per class iou part: {}'.format(per_class_iu(hist) * 100)) print('per class iou hb: {}'.format(per_class_iu(hist_hb) * 100)) print('per class iou fb: {}'.format(per_class_iu(hist_fb) * 100)) mIoU = round(np.nanmean(per_class_iu(hist)) * 100, 2) mIoU_hb = round(np.nanmean(per_class_iu(hist_hb)) * 100, 2) mIoU_fb = round(np.nanmean(per_class_iu(hist_fb)) * 100, 2) writer.add_scalar('val_pixAcc', pixAcc, epoch) writer.add_scalar('val_mIoU', mIoU, epoch) writer.add_scalar('val_pixAcc_hb', pixAcc_hb, epoch) writer.add_scalar('val_mIoU_hb', mIoU_hb, epoch) writer.add_scalar('val_pixAcc_fb', pixAcc_fb, epoch) writer.add_scalar('val_mIoU_fb', mIoU_fb, epoch) bar.finish() return pixAcc, mIoU
def _shortcut(self, x): if self.upsample: x = F.interpolate(x, scale_factor=2, mode='nearest') if self.learned_sc: x = self.conv1x1(x) return x
def __unpool(self, input): _, _, H, W = input.shape return F.interpolate(input, mode='bilinear', scale_factor=2, align_corners=True)
def train(hyp, opt, device, tb_writer=None, wandb=None): logger.info(f'Hyperparameters {hyp}') save_dir, epochs, batch_size, total_batch_size, weights, rank = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank # Directories wdir = save_dir / 'weights' wdir.mkdir(parents=True, exist_ok=True) # make dir last = wdir / 'last.pt' best = wdir / 'best.pt' results_file = save_dir / 'results.txt' # Save run settings with open(save_dir / 'hyp.yaml', 'w') as f: yaml.dump(hyp, f, sort_keys=False) with open(save_dir / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure plots = not opt.evolve # create plots cuda = device.type != 'cpu' init_seeds(2 + rank) with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict with torch_distributed_zero_first(rank): check_dataset(data_dict) # check train_path = data_dict['train'] test_path = data_dict['val'] nc = 1 if opt.single_cls else int(data_dict['nc']) # number of classes names = ['item'] if opt.single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data) # check # Model pretrained = weights.endswith('.pt') if pretrained: with torch_distributed_zero_first(rank): attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location=device) # load checkpoint if hyp.get('anchors'): ckpt['model'].yaml['anchors'] = round(hyp['anchors']) # force autoanchor model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device) # create exclude = ['anchor'] if opt.cfg or hyp.get('anchors') else [] # exclude keys state_dict = ckpt['model'].float().state_dict() # to FP32 state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(state_dict, strict=False) # load logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report else: model = Model(opt.cfg, ch=3, nc=nc).to(device) # create # Freeze freeze = [] # parameter names to freeze (full or partial) for k, v in model.named_parameters(): v.requires_grad = True # train all layers if any(x in k for x in freeze): print('freezing %s' % k) v.requires_grad = False # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay logger.info(f"Scaled weight_decay = {hyp['weight_decay']}") pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in model.named_modules(): if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): pg2.append(v.bias) # biases if isinstance(v, nn.BatchNorm2d): pg0.append(v.weight) # no decay elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): pg1.append(v.weight) # apply decay if opt.adam: optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # Logging if rank in [-1, 0] and wandb and wandb.run is None: opt.hyp = hyp # add hyperparameters wandb_run = wandb.init(config=opt, resume="allow", project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem, name=save_dir.stem, id=ckpt.get('wandb_id') if 'ckpt' in locals() else None) loggers = {'wandb': wandb} # loggers dict # Resume start_epoch, best_fitness = 0, 0.0 if pretrained: # Optimizer if ckpt['optimizer'] is not None: optimizer.load_state_dict(ckpt['optimizer']) best_fitness = ckpt['best_fitness'] # Results if ckpt.get('training_results') is not None: with open(results_file, 'w') as file: file.write(ckpt['training_results']) # write results.txt # Epochs start_epoch = ckpt['epoch'] + 1 if opt.resume: assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weights, epochs) if epochs < start_epoch: logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (weights, ckpt['epoch'], epochs)) epochs += ckpt['epoch'] # finetune additional epochs del ckpt, state_dict # Image sizes gs = int(model.stride.max()) # grid size (max stride) nl = model.model[-1].nl # number of detection layers (used for scaling hyp['obj']) imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size] # verify imgsz are gs-multiples # DP mode if cuda and rank == -1 and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and rank != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) logger.info('Using SyncBatchNorm()') # EMA ema = ModelEMA(model) if rank in [-1, 0] else None # DDP mode if cuda and rank != -1: model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank) # Trainloader dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank, world_size=opt.world_size, workers=opt.workers, image_weights=opt.image_weights, quad=opt.quad) mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1) # Process 0 if rank in [-1, 0]: ema.updates = start_epoch * nb // accumulate # set EMA updates testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt, # testloader hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1, world_size=opt.world_size, workers=opt.workers, pad=0.5)[0] if not opt.resume: labels = np.concatenate(dataset.labels, 0) c = torch.tensor(labels[:, 0]) # classes # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency # model._initialize_biases(cf.to(device)) if plots: plot_labels(labels, save_dir, loggers) if tb_writer: tb_writer.add_histogram('classes', c, 0) # Anchors if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # Model parameters hyp['box'] *= 3. / nl # scale to layers hyp['cls'] *= nc / 80. * 3. / nl # scale to classes and layers hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl # scale to image size and layers model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou) model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights model.names = names # Start training t0 = time.time() nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0) # P, R, [email protected], [email protected], val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) logger.info('Image sizes %g train, %g test\n' 'Using %g dataloader workers\nLogging results to %s\n' 'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, save_dir, epochs)) for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional) if opt.image_weights: # Generate indices if rank in [-1, 0]: cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx # Broadcast if DDP if rank != -1: indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int() dist.broadcast(indices, 0) if rank != 0: dataset.indices = indices.cpu().numpy() # Update mosaic border # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(4, device=device) # mean losses if rank != -1: dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) logger.info(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total', 'targets', 'img_size')) if rank in [-1, 0]: pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward with amp.autocast(enabled=cuda): pred = model(imgs) # forward loss, loss_items = compute_loss(pred, targets.to(device), model) # loss scaled by batch_size if rank != -1: loss *= opt.world_size # gradient averaged between devices in DDP mode if opt.quad: loss *= 4. # Backward scaler.scale(loss).backward() # Optimize if ni % accumulate == 0: scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) # Print if rank in [-1, 0]: mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.4g' * 6) % ( '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) pbar.set_description(s) # Plot if plots and ni < 3: f = save_dir / f'train_batch{ni}.jpg' # filename Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start() # if tb_writer: # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard elif plots and ni == 3 and wandb: wandb.log({"Mosaics": [wandb.Image(str(x), caption=x.name) for x in save_dir.glob('train*.jpg')]}) # end batch ------------------------------------------------------------------------------------------------ # end epoch ---------------------------------------------------------------------------------------------------- # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard scheduler.step() # DDP process 0 or single-GPU if rank in [-1, 0]: # mAP if ema: ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights']) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP results, maps, times = test.test(opt.data, batch_size=total_batch_size, imgsz=imgsz_test, model=ema.ema, single_cls=opt.single_cls, dataloader=testloader, save_dir=save_dir, plots=plots and final_epoch, log_imgs=opt.log_imgs if wandb else 0) # Write with open(results_file, 'a') as f: f.write(s + '%10.4g' * 7 % results + '\n') # P, R, [email protected], [email protected], val_loss(box, obj, cls) if len(opt.name) and opt.bucket: os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) # Log tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss 'x/lr0', 'x/lr1', 'x/lr2'] # params for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): if tb_writer: tb_writer.add_scalar(tag, x, epoch) # tensorboard if wandb: wandb.log({tag: x}) # W&B # Update best mAP fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, [email protected], [email protected]] if fi > best_fitness: best_fitness = fi # Save model save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: with open(results_file, 'r') as f: # create checkpoint ckpt = {'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': ema.ema, 'optimizer': None if final_epoch else optimizer.state_dict(), 'wandb_id': wandb_run.id if wandb else None} # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) del ckpt # end epoch ---------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Strip optimizers final = best if best.exists() else last # final model for f in [last, best]: if f.exists(): strip_optimizer(f) # strip optimizers if opt.bucket: os.system(f'gsutil cp {final} gs://{opt.bucket}/weights') # upload # Plots if plots: plot_results(save_dir=save_dir) # save as results.png if wandb: files = ['results.png', 'precision_recall_curve.png', 'confusion_matrix.png'] wandb.log({"Results": [wandb.Image(str(save_dir / f), caption=f) for f in files if (save_dir / f).exists()]}) if opt.log_artifacts: wandb.log_artifact(artifact_or_path=str(final), type='model', name=save_dir.stem) # Test best.pt logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) if opt.data.endswith('coco.yaml') and nc == 80: # if COCO for conf, iou, save_json in ([0.25, 0.45, False], [0.001, 0.65, True]): # speed, mAP tests results, _, _ = test.test(opt.data, batch_size=total_batch_size, imgsz=imgsz_test, conf_thres=conf, iou_thres=iou, model=attempt_load(final, device).half(), single_cls=opt.single_cls, dataloader=testloader, save_dir=save_dir, save_json=save_json, plots=False) else: dist.destroy_process_group() wandb.run.finish() if wandb and wandb.run else None torch.cuda.empty_cache() return results
def geo_lng(dataloader, args): mappings = pickle.load(open('util_files/country_lang_mappings.pkl', 'rb')) iso3_to_lang = mappings['iso3_to_lang'] # Country to iso3 mappings that are missing missing = { 'South+Korea': 'KOR', 'North+Korea': 'PRK', 'Laos': 'LAO', 'Caribbean+Netherlands': 'BES', 'St.+Lucia': 'LCA', 'East+Timor': 'TLS', 'Democratic+Republic+of+Congo': 'COD', 'Swaziland': 'SWZ', 'Cape+Verde': 'CPV', 'C%C3%B4te+d%C2%B4Ivoire': 'CIV', 'Ivory+Coast': 'CIV', 'Channel+Islands': 'GBR' } use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") model = models.alexnet(pretrained=True).to(device) new_classifier = nn.Sequential(*list(model.classifier.children())[:-1]) model.classifier = new_classifier with_country = dataloader.dataset.with_country country_with_langs = {} country_with_imgs = { } # for each country, first list is tourist second is local lang_counts = {} detecter = fasttext.load_model('util_files/lid.176.bin') lang_dict = {} normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) for i, (data, target) in enumerate(tqdm(dataloader)): if data is None: continue this_tags = [ tag['label'] for tag in target[0] if len(tag['label']) >= 3 ] if len(this_tags) > 0: srcz = [] conf = [] for tag in this_tags: classify = detecter.predict(tag) srcz.append(classify[0][0][9:]) conf.append(classify[1][0]) # Pick out the most common language commons = Counter(srcz).most_common() the_src = commons[0][0] # If the most common language is English, look at the second most common language # since people oftentimes use English even when it's not their native language if the_src == 'en' and len(commons) > 1: the_src_maybe = commons[1][0] words = [ i for i in range(len(srcz)) if srcz[i] == the_src_maybe ] # If this second most common language has been classified with more than .5 # probability, then choose this language for the image for word in words: if conf[word] > .5: the_src = the_src_maybe if the_src in lang_counts.keys(): lang_counts[the_src] += 1 else: lang_counts[the_src] = 1 country = target[2][0] iso3 = None local = None try: iso3 = pycountry.countries.search_fuzzy( country.replace('+', ' '))[0].alpha_3 except LookupError: iso3 = missing[country] try: country_info = CountryInfo(country.replace('+', ' ')).info() except KeyError: country_info = {} country_name = country.split('+') if 'name' in country_info.keys(): country_name += country_info['name'] if 'nativeName' in country_info.keys(): country_name += country_info['nativeName'] # When comparing images to distinguish between tourist and local, we further look into the content of the tags, # allowing some images to be categorized as 'unknown' if we are not that sure if it's tourist or local # Local: in a local language, country's name isn't a tag, and 'travel' isn't a tag # Tourist: in a non-local language, or 'travel' is a tag try: if the_src in iso3_to_lang[iso3] and len( set(country_name) & set(this_tags)) == 0 and 'travel' not in this_tags: local = 1 elif the_src not in iso3_to_lang[iso3] or 'travel' in this_tags: local = 0 except KeyError: print("This iso3 can't be found in iso3_to_lang: {}".format( iso3)) if country not in country_with_langs.keys(): country_with_langs[country] = [] country_with_imgs[country] = [[], []] country_with_langs[country].append(the_src) if local is not None: if len(country_with_imgs[country][local]) < 500: data = normalize(data).to(device) big_data = F.interpolate(data.unsqueeze(0), size=224, mode='bilinear').to(device) this_features = model.forward(big_data) country_with_imgs[country][local].append( (this_features.data.cpu().numpy(), target[3])) info = {} info['lang_counts'] = lang_counts info['country_with_langs'] = country_with_langs info['country_with_imgs'] = country_with_imgs pickle.dump(info, open("results/{}/geo_lng.pkl".format(args.folder), "wb"))
def geo_tag_region(dataloader, args): # map from a region name to a list whose value at index i represents count of category # i region_tags = {} tag_to_region_features = {} categories = dataloader.dataset.categories if not os.path.exists("results/{}/geo_ctr.pkl".format(args.folder)): print('running geo_ctr_region() first to get necessary info...') geo_ctr_region(dataloader, args) counts = pickle.load( open("results/{}/geo_ctr.pkl".format(args.folder), "rb")) id_to_region = counts_gps['id_to_region'] # get name of regions unique_regions = list(set(id_to_region.values())) # Extracts features from model pretrained on ImageNet use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") model = models.alexnet(pretrained=True).to(device) new_classifier = nn.Sequential(*list(model.classifier.children())[:-1]) model.classifier = new_classifier normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) region_features = {} for region in unique_regions: region_features[region] = [] for cat in range(len(categories)): tag_to_region_features[cat] = copy.deepcopy(region_features) for i, (data, target) in enumerate(tqdm(dataloader)): if data is None: continue region_name = id_to_region[target[3]] anns = target[0] filepath = target[3] this_categories = list( set([categories.index(ann['label']) for ann in anns])) if region_name not in region_tags.keys(): region_tags[region_name] = np.zeros(len(categories)) this_features = None for cat in this_categories: if len(tag_to_region_features[cat][region_name]) < 500: data = normalize(data).to(device) big_data = F.interpolate(data.unsqueeze(0), size=224, mode='bilinear').to(device) this_features = model.forward(big_data) break for cat in this_categories: if this_features is not None and len( tag_to_region_features[cat][region_name]) < 500: tag_to_region_features[cat][region_name].append( (this_features.data.cpu().numpy(), filepath)) for ann in anns: region_tags[region_name][categories.index(ann['label'])] += 1 info_stats = {} info_stats['region_tags'] = region_tags info_stats['tag_to_region_features'] = tag_to_region_features pickle.dump(info_stats, open("results/{}/geo_tag.pkl".format(args.folder), "wb"))
def get_bboxes_single(self, cls_scores, bbox_preds, centernesses, cof_preds, feat_mask, mlvl_points, img_shape, ori_shape, scale_factor, cfg, rescale=False): assert len(cls_scores) == len(bbox_preds) == len(mlvl_points) mlvl_bboxes = [] mlvl_scores = [] mlvl_centerness = [] mlvl_cofs = [] for cls_score, bbox_pred, cof_pred, centerness, points in zip( cls_scores, bbox_preds, cof_preds, centernesses, mlvl_points): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] scores = cls_score.permute(1, 2, 0).reshape( -1, self.cls_out_channels).sigmoid() centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid() bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) cof_pred = cof_pred.permute(1, 2, 0).reshape(-1, 32 * 4) nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: max_scores, _ = (scores * centerness[:, None]).max(dim=1) _, topk_inds = max_scores.topk(nms_pre) points = points[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] cof_pred = cof_pred[topk_inds, :] scores = scores[topk_inds, :] centerness = centerness[topk_inds] bboxes = distance2bbox(points, bbox_pred, max_shape=img_shape) mlvl_cofs.append(cof_pred) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_centerness.append(centerness) mlvl_bboxes = torch.cat(mlvl_bboxes) mlvl_cofs = torch.cat(mlvl_cofs) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([padding, mlvl_scores], dim=1) mlvl_centerness = torch.cat(mlvl_centerness) mlvl_scores = mlvl_scores * mlvl_centerness.view(-1, 1) det_bboxes, det_labels, det_cofs = self.fast_nms( mlvl_bboxes, mlvl_scores[:, 1:].transpose(1, 0).contiguous(), mlvl_cofs, cfg, iou_threshold=0.5) masks = [] if det_bboxes.shape[0] > 0: scale = 2 #####spp######################## img_mask1 = feat_mask.permute(1, 2, 0) pos_masks00 = torch.sigmoid(img_mask1 @ det_cofs[:, 0:32].t()) pos_masks01 = torch.sigmoid(img_mask1 @ det_cofs[:, 32:64].t()) pos_masks10 = torch.sigmoid(img_mask1 @ det_cofs[:, 64:96].t()) pos_masks11 = torch.sigmoid(img_mask1 @ det_cofs[:, 96:128].t()) if rescale: pos_masks = torch.stack( [pos_masks00, pos_masks01, pos_masks10, pos_masks11], dim=0) pos_masks = self.crop_cuda( pos_masks, det_bboxes[:, :4] * det_bboxes.new_tensor(scale_factor) / scale) # pos_masks = crop_split(pos_masks00, pos_masks01, pos_masks10, pos_masks11, det_bboxes * det_bboxes.new_tensor(scale_factor) / scale) else: pos_masks = torch.stack( [pos_masks00, pos_masks01, pos_masks10, pos_masks11], dim=0) pos_masks = self.crop_cuda(pos_masks, det_bboxes[:, :4] / scale) # pos_masks = crop_split(pos_masks00, pos_masks01, pos_masks10, pos_masks11, det_bboxes / scale) pos_masks = pos_masks.permute(2, 0, 1) if rescale: masks = F.interpolate(pos_masks.unsqueeze(0), scale_factor=scale / scale_factor, mode='bilinear', align_corners=False).squeeze(0) else: masks = F.interpolate(pos_masks.unsqueeze(0), scale_factor=scale, mode='bilinear', align_corners=False).squeeze(0) masks.gt_(0.5) return det_bboxes, det_labels, masks
def __upsample_cat(self, p2, p3, p4, p5): h, w = p2.size()[2:] p3 = F.interpolate(p3, size=(h, w), mode='bilinear', align_corners=False) p4 = F.interpolate(p4, size=(h, w), mode='bilinear', align_corners=False) p5 = F.interpolate(p5, size=(h, w), mode='bilinear', align_corners=False) return torch.cat([p2, p3, p4, p5], dim=1)
def resize( img: Tensor, size: List[int], interpolation: str = "bilinear", max_size: Optional[int] = None, antialias: Optional[bool] = None, ) -> Tensor: _assert_image_tensor(img) if not isinstance(size, (int, tuple, list)): raise TypeError("Got inappropriate size arg") if not isinstance(interpolation, str): raise TypeError("Got inappropriate interpolation arg") if interpolation not in ["nearest", "bilinear", "bicubic"]: raise ValueError("This interpolation mode is unsupported with Tensor input") if isinstance(size, tuple): size = list(size) if isinstance(size, list): if len(size) not in [1, 2]: raise ValueError( f"Size must be an int or a 1 or 2 element tuple/list, not a {len(size)} element tuple/list" ) if max_size is not None and len(size) != 1: raise ValueError( "max_size should only be passed if size specifies the length of the smaller edge, " "i.e. size should be an int or a sequence of length 1 in torchscript mode." ) if antialias is None: antialias = False if antialias and interpolation not in ["bilinear", "bicubic"]: raise ValueError("Antialias option is supported for bilinear and bicubic interpolation modes only") _, h, w = get_dimensions(img) if isinstance(size, int) or len(size) == 1: # specified size only for the smallest edge short, long = (w, h) if w <= h else (h, w) requested_new_short = size if isinstance(size, int) else size[0] new_short, new_long = requested_new_short, int(requested_new_short * long / short) if max_size is not None: if max_size <= requested_new_short: raise ValueError( f"max_size = {max_size} must be strictly greater than the requested " f"size for the smaller edge size = {size}" ) if new_long > max_size: new_short, new_long = int(max_size * new_short / new_long), max_size new_w, new_h = (new_short, new_long) if w <= h else (new_long, new_short) if (w, h) == (new_w, new_h): return img else: # specified both h and w new_w, new_h = size[1], size[0] img, need_cast, need_squeeze, out_dtype = _cast_squeeze_in(img, [torch.float32, torch.float64]) # Define align_corners to avoid warnings align_corners = False if interpolation in ["bilinear", "bicubic"] else None img = interpolate(img, size=[new_h, new_w], mode=interpolation, align_corners=align_corners, antialias=antialias) if interpolation == "bicubic" and out_dtype == torch.uint8: img = img.clamp(min=0, max=255) img = _cast_squeeze_out(img, need_cast=need_cast, need_squeeze=need_squeeze, out_dtype=out_dtype) return img
def forward(self, x): return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
test_dir = '/workspace/test_dir' filenames = [ join(test_dir, x) for x in listdir(test_dir) if is_image_file(x) ] filenames.sort() output_dir = join(test_dir, "outputs") if not os.path.exists(output_dir): os.mkdir(output_dir) bg_scale = 2 for fn in filenames: img = Image.open(fn).convert("RGB") input_data = T.ToTensor()(img).unsqueeze(0).cuda() input_data = opt.bg_valid_blur(input_data) bg_upblur = F.interpolate(input_data, scale_factor=bg_scale, mode='bilinear', align_corners=False) bg_upblur = opt.bg_base_blur(bg_upblur) b, c, h, w = input_data.shape ph0, ph1, pw0, pw1 = [0] * 4 min_sz = 256 if h < min_sz or w < min_sz: ph0 = 0 if h > min_sz else (min_sz - h) // 2 pw0 = 0 if w > min_sz else (min_sz - w) // 2 ph1 = ph0 pw1 = pw0 if h + ph0 + ph1 < min_sz: ph1 += min_sz - h - ph0 - ph1
def _upsample(x, size): return F.interpolate(x, size=size, mode='bilinear', align_corners=True)
) print(len(dataloader.dataset)) data = iter(dataloader) for i in range(len(dataloader.dataset)): """Saves a generated sample from the validation set""" img = next(data) # print(img.unsqueeze(0).size()) X1 = img.repeat(opt.style_dim, 1, 1, 1) X1 = Variable(X1.type(Tensor)) # Get random style codes s_code = np.random.uniform(-1, 1, (opt.style_dim, opt.style_dim)) s_code = Variable(Tensor(s_code)) # Generate samples c_code_1, _ = Enc1(X1) X12 = Dec2(c_code_1, s_code) # Concatenate samples horisontally name = dataloader.dataset.files[i].split("/")[-1] for i, sample in enumerate(X12): tmp_name = name.split(".")[0] + "_" + str(i) + "." + name.split( ".")[-1] sample = F.interpolate(sample.unsqueeze(0), size=(480, 640), mode='bicubic') # sample = transform.resize(sample.unsqueeze(0), (480, 640)) save_image(sample, opt.output_location + "/" + tmp_name, normalize=True)
def _calculate_localization_map(self, inputs, labels=None): """ Calculate localization map for all inputs with Grad-CAM. Args: inputs (list of tensor(s)): the input clips. labels (Optional[tensor]): labels of the current input clips. Returns: localization_maps (list of ndarray(s)): the localization map for each corresponding input. preds (tensor): shape (n_instances, n_class). Model predictions for `inputs`. """ assert len(inputs) == len( self.target_layers ), "Must register the same number of target layers as the number of input pathways." input_clone = [inp.clone() for inp in inputs] preds = self.model(input_clone) if labels is None: score = torch.max(preds, dim=-1)[0] else: if labels.ndim == 1: labels = labels.unsqueeze(-1) score = torch.gather(preds, dim=1, index=labels) self.model.zero_grad() score = torch.sum(score) score.backward() localization_maps = [] for i, inp in enumerate(inputs): _, _, T, H, W = inp.size() gradients = self.gradients[self.target_layers[i]] activations = self.activations[self.target_layers[i]] B, C, Tg, _, _ = gradients.size() weights = torch.mean(gradients.view(B, C, Tg, -1), dim=3) weights = weights.view(B, C, Tg, 1, 1) localization_map = torch.sum( weights * activations, dim=1, keepdim=True ) localization_map = F.relu(localization_map) localization_map = F.interpolate( localization_map, size=(T, H, W), mode="trilinear", align_corners=False, ) localization_map_min, localization_map_max = ( torch.min(localization_map.view(B, -1), dim=-1, keepdim=True)[ 0 ], torch.max(localization_map.view(B, -1), dim=-1, keepdim=True)[ 0 ], ) localization_map_min = torch.reshape( localization_map_min, shape=(B, 1, 1, 1, 1) ) localization_map_max = torch.reshape( localization_map_max, shape=(B, 1, 1, 1, 1) ) # Normalize the localization map. localization_map = (localization_map - localization_map_min) / ( localization_map_max - localization_map_min + 1e-6 ) localization_map = localization_map.data localization_maps.append(localization_map) return localization_maps, preds
def forward(self, input, label, step=0, alpha=-1, content_input=None, style_layer_begin=0, style_layer_end=-1): out_act = lambda x: x if style_layer_end == -1: #content_input layer IDs go from 0 to (step). The local numbering is reversed so that, at 128x128 when step=5, id=0 <==> (step-5), id=1 <==> (step-4) etc. style_layer_end = step + 1 style_layer_end = min(step + 1, style_layer_end) if style_layer_begin == -1 or style_layer_begin >= style_layer_end: return content_input assert (not input is None) # Label is reserved for future use. Make None if not in use. #label = self.label_embed(label) if not label is None: input = torch.cat([input, label], 1) batchN = input.size()[0] if args.stylefc > 0: input = self.z_preprocess[0](input) for anb in self.adanorm_blocks: anb.update(input) # For 3 levels of coarseness, for the 2 resnet-block layers, in both the generator and generator_running. Since the layers are just added to the global list without specific indices, the resulting index numbers are ad hoc but atm they are deterministically like here: def layers_for_block_depth(d, holder): # Generator layers start from 0 and running-Generatro from 17, or vice versa. For both, do the same styling. network_offset = int(len(holder.adanorm_blocks) / 2) #17 return [d * 2, d * 2 + 1] #, d*2+network_offset, d*2+network_offset+1] # The first conv call will start from a constant content_input defined as a class-level var in AdaNorm if content_input is None: out = torch.ones(512, 4, 4).to(device=input.device).repeat( batchN, 1, 1, 1) else: out = content_input block_offset = 0 for i in range(style_layer_begin, style_layer_end): if i > 0 and not i in Generator.supportBlockPoints: if args.upsampling != 'bilinear': upsample = F.upsample(out, scale_factor=2) else: upsample = F.interpolate(out, align_corners=False, scale_factor=2, mode='bilinear') else: upsample = out out = upsample if i == 0 or not self.use_layer_noise: out = self.progression[i](out) else: out = self.progression[i].conv[0][0](out) out = self.progression[i].conv[0][1](out) out = self.noise[i][0](out) out = self.progression[i].conv[0][2](out) #act if args.upsampling != 'bilinear': out = self.progression[i].conv[0][3](out) #Blur out = self.progression[i].conv[1](out) out = self.noise[i][1](out) out = self.progression[i].conv[2](out) #act out = self.progression[i].conv[3](out) if style_layer_end == step + 1: # The final layer is ALWAYS either to_rgb layer, or a mixture of 2 to-rgb_layers! out = out_act(self.to_rgb[step](out)) if style_layer_end > 1 and 0 <= alpha < 1: skip_rgb = out_act(self.to_rgb[step - 1](upsample)) if args.gnn: channelwise_std = skip_rgb.std((0, 2, 3), keepdim=True) channelwise_mean = skip_rgb.mean((0, 2, 3), keepdim=True) out_std = out.std(dim=(0, 2, 3), keepdim=True) out_mean = out.mean(dim=(0, 2, 3), keepdim=True) skip_rgb = (skip_rgb - channelwise_mean) * ( out_std / channelwise_std) + out_mean out = (1 - alpha) * skip_rgb + alpha * out return out
def resize(image, size): image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0) return image
def forward(self, x): b, t, c, h, w = x.size() if self.hr_in: assert h % (self.upscale**2) == 0 and w % (self.upscale**2) == 0, ( 'The height and width must be multiple of {}.'.format( self.upscale**2)) else: assert h % self.upscale == 0 and w % self.upscale == 0, ( 'The height and width must be multiple of {}.'.format( self.upscale)) x_center = x[:, self.center_frame_idx, :, :, :].contiguous() # extract features for each frame # L1 if self.with_predeblur: feat_l1 = self.conv_1x1(self.predeblur(x.view(-1, c, h, w))) if self.hr_in: h, w = h // self.upscale, w // self.upscale else: feat_l1 = self.lrelu(self.conv_first(x.view(-1, c, h, w))) feat_l1 = self.feature_extraction(feat_l1) # L2 feat_l2 = self.lrelu(self.conv_l2_1(feat_l1)) feat_l2 = self.lrelu(self.conv_l2_2(feat_l2)) # L3 feat_l3 = self.lrelu(self.conv_l3_1(feat_l2)) feat_l3 = self.lrelu(self.conv_l3_2(feat_l3)) feat_l1 = feat_l1.view(b, t, -1, h, w) feat_l2 = feat_l2.view(b, t, -1, h // 2, w // 2) feat_l3 = feat_l3.view(b, t, -1, h // 4, w // 4) # PCD alignment ref_feat_l = [ # reference feature list feat_l1[:, self.center_frame_idx, :, :, :].clone(), feat_l2[:, self.center_frame_idx, :, :, :].clone(), feat_l3[:, self.center_frame_idx, :, :, :].clone() ] aligned_feat = [] for i in range(t): nbr_feat_l = [ # neighboring feature list feat_l1[:, i, :, :, :].clone(), feat_l2[:, i, :, :, :].clone(), feat_l3[:, i, :, :, :].clone() ] aligned_feat.append(self.pcd_align(nbr_feat_l, ref_feat_l)) aligned_feat = torch.stack(aligned_feat, dim=1) # (b, t, c, h, w) if not self.with_tsa: aligned_feat = aligned_feat.view(b, -1, h, w) feat = self.fusion(aligned_feat) out = self.reconstruction(feat) if self.add_rrdb: out = self.RRDB(out) for i in range(self.n_upscale): upconv = getattr(self, f'upconv{i+1}') out = self.lrelu(self.pixel_shuffle(upconv(out))) out = self.lrelu(self.conv_hr(out)) out = self.conv_last(out) if self.hr_in: base = x_center else: base = F.interpolate(x_center, scale_factor=self.upscale, mode='bilinear', align_corners=False) out += base return out
def forward(self, conv_out, output_switch=None, seg_size=None): output_dict = {k: None for k in output_switch.keys()} conv5 = conv_out[-1] input_size = conv5.size() ppm_out = [conv5] roi = [] # fake rois, just used for pooling for i in range(input_size[0]): # batch size roi.append(torch.Tensor([i, 0, 0, input_size[3], input_size[2]]).view(1, -1)) # b, x0, y0, x1, y1 roi = torch.cat(roi, dim=0).type_as(conv5) ppm_out = [conv5] for pool_scale, pool_conv in zip(self.ppm_pooling, self.ppm_conv): ppm_out.append(pool_conv(F.interpolate( pool_scale(conv5, roi.detach()), (input_size[2], input_size[3]), mode='bilinear', align_corners=False))) ppm_out = torch.cat(ppm_out, 1) f = self.ppm_last_conv(ppm_out) if output_switch['scene']: # scene output_dict['scene'] = self.scene_head(f) if output_switch['object'] or output_switch['part'] or output_switch['material']: fpn_feature_list = [f] for i in reversed(range(len(conv_out) - 1)): conv_x = conv_out[i] conv_x = self.fpn_in[i](conv_x) # lateral branch f = F.interpolate( f, size=conv_x.size()[2:], mode='bilinear', align_corners=False) # top-down branch f = conv_x + f fpn_feature_list.append(self.fpn_out[i](f)) fpn_feature_list.reverse() # [P2 - P5] # material if output_switch['material']: output_dict['material'] = self.material_head(fpn_feature_list[0]) if output_switch['object'] or output_switch['part']: output_size = fpn_feature_list[0].size()[2:] fusion_list = [fpn_feature_list[0]] for i in range(1, len(fpn_feature_list)): fusion_list.append(F.interpolate( fpn_feature_list[i], output_size, mode='bilinear', align_corners=False)) fusion_out = torch.cat(fusion_list, 1) x = self.conv_fusion(fusion_out) if output_switch['object']: # object output_dict['object'] = self.object_head(x) if output_switch['part']: output_dict['part'] = self.part_head(x) if self.use_softmax: # is True during inference # inference scene x = output_dict['scene'] x = x.squeeze(3).squeeze(2) x = F.softmax(x, dim=1) output_dict['scene'] = x # inference object, material for k in ['object', 'material']: x = output_dict[k] x = F.interpolate(x, size=seg_size, mode='bilinear', align_corners=False) x = F.softmax(x, dim=1) output_dict[k] = x # inference part x = output_dict['part'] x = F.interpolate(x, size=seg_size, mode='bilinear', align_corners=False) part_pred_list, head = [], 0 for idx_part, object_label in enumerate(broden_dataset.object_with_part): n_part = len(broden_dataset.object_part[object_label]) _x = F.interpolate(x[:, head: head + n_part], size=seg_size, mode='bilinear', align_corners=False) _x = F.softmax(_x, dim=1) part_pred_list.append(_x) head += n_part output_dict['part'] = part_pred_list else: # Training # object, scene, material for k in ['object', 'scene', 'material']: if output_dict[k] is None: continue x = output_dict[k] x = F.log_softmax(x, dim=1) if k == "scene": # for scene x = x.squeeze(3).squeeze(2) output_dict[k] = x if output_dict['part'] is not None: part_pred_list, head = [], 0 for idx_part, object_label in enumerate(broden_dataset.object_with_part): n_part = len(broden_dataset.object_part[object_label]) x = output_dict['part'][:, head: head + n_part] x = F.log_softmax(x, dim=1) part_pred_list.append(x) head += n_part output_dict['part'] = part_pred_list return output_dict
def geo_tag(dataloader, args): # redirect to geo_tag_gps if dataset is of gps form: if (dataloader.dataset.geography_info_type == "GPS_LABEL"): print("redirecting to geo_tag_gps()...") return geo_tag_gps(dataloader, args) elif (dataloader.dataset.geography_info_type == "STRING_FORMATTED_LABEL" and dataloader.dataset.geography_label_string_type == "REGION_LABEL"): print("redirecting to geo_tag_region()...") return geo_tag_region(dataloader, args) country_tags = {} tag_to_subregion_features = {} categories = dataloader.dataset.categories iso3_to_subregion = pickle.load( open('util_files/iso3_to_subregion_mappings.pkl', 'rb')) unique_subregions = set(list(iso3_to_subregion.values())) # Extracts features from model pretrained on ImageNet use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") model = models.alexnet(pretrained=True).to(device) new_classifier = nn.Sequential(*list(model.classifier.children())[:-1]) model.classifier = new_classifier normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) subregion_features = {} for subregion in unique_subregions: subregion_features[subregion] = [] for cat in range(len(categories)): tag_to_subregion_features[cat] = copy.deepcopy(subregion_features) for i, (data, target) in enumerate(tqdm(dataloader)): if data is None: continue country = target[2][0] anns = target[0] filepath = target[3] this_categories = list( set([categories.index(ann['label']) for ann in anns])) subregion = iso3_to_subregion[country_to_iso3(country)] if country not in country_tags.keys(): country_tags[country] = np.zeros(len(categories)) this_features = None for cat in this_categories: if len(tag_to_subregion_features[cat][subregion]) < 500: data = normalize(data).to(device) big_data = F.interpolate(data.unsqueeze(0), size=224, mode='bilinear').to(device) this_features = model.forward(big_data) break for cat in this_categories: country_tags[country][cat] += 1 if this_features is not None and len( tag_to_subregion_features[cat][subregion]) < 500: tag_to_subregion_features[cat][subregion].append( (this_features.data.cpu().numpy(), filepath)) info_stats = {} info_stats['country_tags'] = country_tags info_stats['tag_to_subregion_features'] = tag_to_subregion_features pickle.dump(info_stats, open("results/{}/geo_tag.pkl".format(args.folder), "wb"))
def forward(self, x): x = self.block(x) if self.upsample: x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True) return x
def downsample(masks): masks = F.interpolate(masks,scale_factor= 1/2, mode="bilinear",align_corners=True,recompute_scale_factor=True) m = masks >= 0 #.5 masks[m] = 1 masks[~m] = 0 return masks
def plot_heatmap(image, gt,label, model, layer): """ function to plot the heat map on original transformed input image Args: x: the 1x3x512x512 pytorch tensor file that represents the NIH CXR should be consistent with cfg.CROP_SIZE label:user-supplied label you wish to get class activation map for; must be in FINDINGS list model: densenet121 trained on NIH CXR data layer: which layer's heat map to extract Returns: cam_torch: 224x224 torch tensor containing activation map """ label_index = -1 for i in range(14): if CLASS_NAMES[i] == label: label_index = i break assert label_index != -1 res, fea = model(image) choosen = fea[layer] choosen = torch.sum(choosen, dim=1).unsqueeze(0) size = choosen.size()[2:] # bilinear pooling to make it becomes original size choosen = F.interpolate(choosen, list(size), mode="bilinear") raw_cam = choosen.detach() # create predictions for label of interest and all labels list_res = res[1][0].data.tolist() predx = ['%.3f' % elem for elem in list_res] fig, (showcxr, heatmap) = plt.subplots(ncols=2, figsize=(14, 5)) hmap = sns.heatmap(raw_cam.squeeze(), cmap='viridis', alpha=0.3, # whole heatmap is translucent zorder=2, square=True, vmin=-5, vmax=5 ) cxr = image.squeeze(0).permute(1,2,0).cpu().numpy() mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) # cxr = cxr * std + mean cxr = np.clip(cxr, 0, 1) LABEL = label # print cxr.size() hmap.imshow(cxr, aspect=hmap.get_aspect(), extent=hmap.get_xlim() + hmap.get_ylim(), zorder=1) # put the map under the heatmap hmap.axis('off') hmap.set_title("P(" + LABEL + ")=" + str(predx[label_index])) showcxr.imshow(cxr) showcxr.axis('off') showcxr.set_title("Heat Map") plt.show() preds_concat = pd.concat([pd.Series(CLASS_NAMES), pd.Series(predx), pd.Series(gt.numpy().astype(bool)[0])], axis=1) preds = pd.DataFrame(data=preds_concat) preds.columns = ["Finding", "Predicted Probability", "Ground Truth"] preds.set_index("Finding", inplace=True) preds.sort_values(by='Predicted Probability', inplace=True, ascending=False) return preds
def __upsample_add(self, x, y): return F.interpolate(x, size=y.size()[2:], mode='bilinear', align_corners=False) + y
def lincomb_mask_loss(self, pos, idx_t, loc_data, mask_data, priors, proto_data, masks, gt_box_t, inst_data, interpolation_mode='bilinear'): mask_h = proto_data.size(1) mask_w = proto_data.size(2) process_gt_bboxes = cfg.mask_proto_normalize_emulate_roi_pooling or cfg.mask_proto_crop if cfg.mask_proto_remove_empty_masks: # Make sure to store a copy of this because we edit it to get rid of all-zero masks pos = pos.clone() loss_m = 0 loss_d = 0 # Coefficient diversity loss for idx in range(mask_data.size(0)): with torch.no_grad(): downsampled_masks = F.interpolate( masks[idx].unsqueeze(0), (mask_h, mask_w), mode=interpolation_mode, align_corners=False).squeeze(0) downsampled_masks = downsampled_masks.permute(1, 2, 0).contiguous() if cfg.mask_proto_binarize_downsampled_gt: downsampled_masks = downsampled_masks.gt(0.5).float() if cfg.mask_proto_remove_empty_masks: # Get rid of gt masks that are so small they get downsampled away very_small_masks = (downsampled_masks.sum(dim=(0, 1)) <= 0.0001) for i in range(very_small_masks.size(0)): if very_small_masks[i]: pos[idx, idx_t[idx] == i] = 0 if cfg.mask_proto_reweight_mask_loss: # Ensure that the gt is binary if not cfg.mask_proto_binarize_downsampled_gt: bin_gt = downsampled_masks.gt(0.5).float() else: bin_gt = downsampled_masks gt_foreground_norm = bin_gt / ( torch.sum(bin_gt, dim=(0, 1), keepdim=True) + 0.0001) gt_background_norm = (1 - bin_gt) / (torch.sum( 1 - bin_gt, dim=(0, 1), keepdim=True) + 0.0001) mask_reweighting = gt_foreground_norm * cfg.mask_proto_reweight_coeff + gt_background_norm mask_reweighting *= mask_h * mask_w cur_pos = pos[idx] pos_idx_t = idx_t[idx, cur_pos] if process_gt_bboxes: # Note: this is in point-form pos_gt_box_t = gt_box_t[idx, cur_pos] if pos_idx_t.size(0) == 0: continue proto_masks = proto_data[idx] proto_coef = mask_data[idx, cur_pos, :] if cfg.mask_proto_coeff_diversity_loss: if inst_data is not None: div_coeffs = inst_data[idx, cur_pos, :] else: div_coeffs = proto_coef loss_d += self.coeff_diversity_loss(div_coeffs, pos_idx_t) # If we have over the allowed number of masks, select a random sample old_num_pos = proto_coef.size(0) if old_num_pos > cfg.masks_to_train: perm = torch.randperm(proto_coef.size(0)) select = perm[:cfg.masks_to_train] proto_coef = proto_coef[select, :] pos_idx_t = pos_idx_t[select] if process_gt_bboxes: pos_gt_box_t = pos_gt_box_t[select, :] num_pos = proto_coef.size(0) mask_t = downsampled_masks[:, :, pos_idx_t] # Size: [mask_h, mask_w, num_pos] pred_masks = proto_masks @ proto_coef.t() pred_masks = cfg.mask_proto_mask_activation(pred_masks) if cfg.mask_proto_double_loss: if cfg.mask_proto_mask_activation == activation_func.sigmoid: pre_loss = F.binary_cross_entropy(torch.clamp( pred_masks, 0, 1), mask_t, reduction='sum') else: pre_loss = F.smooth_l1_loss(pred_masks, mask_t, reduction='sum') loss_m += cfg.mask_proto_double_loss_alpha * pre_loss if cfg.mask_proto_crop: pred_masks = crop(pred_masks, pos_gt_box_t) if cfg.mask_proto_mask_activation == activation_func.sigmoid: pre_loss = F.binary_cross_entropy(torch.clamp( pred_masks, 0, 1), mask_t, reduction='none') else: pre_loss = F.smooth_l1_loss(pred_masks, mask_t, reduction='none') if cfg.mask_proto_normalize_mask_loss_by_sqrt_area: gt_area = torch.sum(mask_t, dim=(0, 1), keepdim=True) pre_loss = pre_loss / (torch.sqrt(gt_area) + 0.0001) if cfg.mask_proto_reweight_mask_loss: pre_loss = pre_loss * mask_reweighting[:, :, pos_idx_t] if cfg.mask_proto_normalize_emulate_roi_pooling: weight = mask_h * mask_w if cfg.mask_proto_crop else 1 pos_get_csize = center_size(pos_gt_box_t) gt_box_width = pos_get_csize[:, 2] * mask_w gt_box_height = pos_get_csize[:, 3] * mask_h pre_loss = pre_loss.sum( dim=(0, 1)) / gt_box_width / gt_box_height * weight # If the number of masks were limited scale the loss accordingly if old_num_pos > num_pos: pre_loss *= old_num_pos / num_pos loss_m += torch.sum(pre_loss) losses = {'M': loss_m * cfg.mask_alpha / mask_h / mask_w} if cfg.mask_proto_coeff_diversity_loss: losses['D'] = loss_d return losses
def upsample(x): """Upsample input tensor by a factor of 2 """ return F.interpolate(x, scale_factor=2, mode="nearest")
def train(args, train_loader, disp_net, pose_exp_net, optimizer, epoch_size, logger, train_writer): global n_iter, device batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter(precision=4) w1, w2, w3 = args.photo_loss_weight, args.mask_loss_weight, args.smooth_loss_weight # switch to train mode disp_net.train() pose_exp_net.train() end = time.time() logger.train_bar.update(0) for i, (tgt_img, ref_imgs, intrinsics, intrinsics_inv) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) tgt_img = tgt_img.to(device) ref_imgs = [img.to(device) for img in ref_imgs] intrinsics = intrinsics.to(device) intrinsics_inv = intrinsics_inv.to(device) # compute output disparities = disp_net(tgt_img) depth = [1/disp for disp in disparities] explainability_mask, pose = pose_exp_net(tgt_img, ref_imgs) loss_1 = photometric_reconstruction_loss(tgt_img, ref_imgs, intrinsics, intrinsics_inv, depth, explainability_mask, pose, args.rotation_mode, args.padding_mode) if w2 > 0: loss_2 = explainability_loss(explainability_mask) else: loss_2 = 0 loss_3 = smooth_loss(depth) loss = w1*loss_1 + w2*loss_2 + w3*loss_3 if i > 0 and n_iter % args.print_freq == 0: train_writer.add_scalar('photometric_error', loss_1.item(), n_iter) if w2 > 0: train_writer.add_scalar('explanability_loss', loss_2.item(), n_iter) train_writer.add_scalar('disparity_smoothness_loss', loss_3.item(), n_iter) train_writer.add_scalar('total_loss', loss.item(), n_iter) if args.training_output_freq > 0 and n_iter % args.training_output_freq == 0: train_writer.add_image('train Input', tensor2array(tgt_img[0]), n_iter) for k, scaled_depth in enumerate(depth): train_writer.add_image('train Dispnet Output Normalized {}'.format(k), tensor2array(disparities[k][0], max_value=None, colormap='bone'), n_iter) train_writer.add_image('train Depth Output Normalized {}'.format(k), tensor2array(1/disparities[k][0], max_value=None), n_iter) b, _, h, w = scaled_depth.size() downscale = tgt_img.size(2)/h tgt_img_scaled = F.interpolate(tgt_img, (h, w), mode='area') ref_imgs_scaled = [F.interpolate(ref_img, (h, w), mode='area') for ref_img in ref_imgs] intrinsics_scaled = torch.cat((intrinsics[:, 0:2]/downscale, intrinsics[:, 2:]), dim=1) intrinsics_scaled_inv = torch.cat((intrinsics_inv[:, :, 0:2]*downscale, intrinsics_inv[:, :, 2:]), dim=2) # log warped images along with explainability mask for j,ref in enumerate(ref_imgs_scaled): ref_warped = inverse_warp(ref, scaled_depth[:,0], pose[:,j], intrinsics_scaled, intrinsics_scaled_inv, rotation_mode=args.rotation_mode, padding_mode=args.padding_mode)[0] train_writer.add_image('train Warped Outputs {} {}'.format(k,j), tensor2array(ref_warped), n_iter) train_writer.add_image('train Diff Outputs {} {}'.format(k,j), tensor2array(0.5*(tgt_img_scaled[0] - ref_warped).abs()), n_iter) if explainability_mask[k] is not None: train_writer.add_image('train Exp mask Outputs {} {}'.format(k,j), tensor2array(explainability_mask[k][0,j], max_value=1, colormap='bone'), n_iter) # record loss and EPE losses.update(loss.item(), args.batch_size) # compute gradient and do Adam step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() with open(args.save_path/args.log_full, 'a') as csvfile: writer = csv.writer(csvfile, delimiter='\t') writer.writerow([loss.item(), loss_1.item(), loss_2.item() if w2 > 0 else 0, loss_3.item()]) logger.train_bar.update(i+1) if i % args.print_freq == 0: logger.train_writer.write('Train: Time {} Data {} Loss {}'.format(batch_time, data_time, losses)) if i >= epoch_size - 1: break n_iter += 1 return losses.avg[0]
def split_feats(self, feats): return (F.interpolate(feats['p2'], scale_factor=0.5, mode='bilinear'), feats['p3'], feats['p4'], feats['p5'], F.interpolate(feats['p6'], size=feats['p5'].shape[-2:], mode='bilinear'))