def transform(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size,img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225)) ])(img) return img, boxes, labels
def transform_test(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size,img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4140, 0.4265, 0.4172), (0.2646, 0.2683, 0.2751)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels
def transform_test(img, boxes, labels): img, boxes = resize(img, boxes, size=img_size, max_size=img_size) img = pad(img, (img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels
def transform_train(img, boxes, labels): img = random_distort(img) # if random.random() < 0.5: # img, boxes = random_paste(img, boxes, max_ratio=4, fill=(123, 116, 103)) img, boxes, labels = random_crop(img, boxes, labels) img, boxes = resize(img, boxes, size=(img_size,img_size), random_interpolation=True) img, boxes = random_flip(img, boxes) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4140, 0.4265, 0.4172), (0.2646, 0.2683, 0.2751)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels
def create_wandb_img(classes, img_path, target, preds, att_map, f_map, dec_att): prob = F.softmax(preds["pred_logits"], -1) scores, labels = prob[..., :-1].max(-1) img = Image.open(img_path) # size for logging purposes tensor_img = ToTensor()(resize(img, size=(1500, 1333), target=None)[0]) boxes_data = [] for sc, cl, (cx, cy, width, height) in zip(scores.tolist(), labels.tolist(), preds["pred_boxes"].tolist()): boxes_data.append({ "position": { "middle": (cx, cy), "width": width, "height": height }, "box_caption": f"{classes[cl]}: {sc:0.2f}", "class_id": cl, "scores": { "score": sc } }) gt_data = [] for cl, (cx, cy, width, height) in zip(target["labels"].tolist(), target["boxes"].tolist()): gt_data.append({ "position": { "middle": (cx, cy), "width": width, "height": height }, "box_caption": f"{classes[cl]}", "class_id": cl, "scores": { "score": 1.0 } }) boxes = {"predictions": {"box_data": boxes_data, "class_labels": classes}} boxes["ground_truth"] = {"box_data": gt_data, "class_labels": classes} wimg = wandb.Image(tensor_img, boxes=boxes, caption="Image: " + str(target["image_id"].item())) # resize to feedforward size tensor_img = ToTensor()(resize(img, size=800, target=None, max_size=1333)[0]) # Taken from https://colab.research.google.com/github/facebookresearch/detr/blob/colab/notebooks/detr_attention.ipynb # visualize encoder self attention fact = 2**round(math.log2(tensor_img.shape[-1] / att_map.shape[-1])) # how much was the original image upsampled before feeding it to the model scale_y = img.height / tensor_img.shape[-2] scale_x = img.width / tensor_img.shape[-1] # visualize attention around gt's center sample = random.sample(gt_data, 4) idxs = [(int(data["position"]["middle"][1] * tensor_img.shape[-2]), int(data["position"]["middle"][0] * tensor_img.shape[-1])) for data in sample] captions = [data["box_caption"] for data in sample] colors = ['lime', 'deepskyblue', 'orange', 'red'] fig = plt.figure(constrained_layout=True, figsize=(25 * 0.7, 8.5 * 0.7)) gs = fig.add_gridspec(2, 4) axs = [ fig.add_subplot(gs[0, 0]), fig.add_subplot(gs[1, 0]), fig.add_subplot(gs[0, -1]), fig.add_subplot(gs[1, -1]), ] for idx_o, ax, col, caption in zip(idxs, axs, colors, captions): idx = ((idx_o[0] // fact), idx_o[1] // fact) ax.imshow(att_map[..., idx[0], idx[1]], cmap='cividis', interpolation='nearest') ax.axis('off') ax.set_title(f'self-attention: {col} ({caption})') fcenter_ax = fig.add_subplot(gs[:, 1:-1]) fcenter_ax.imshow(img) for (y, x), col in zip(idxs, colors): x = ((x // fact) + 0.5) * fact y = ((y // fact) + 0.5) * fact fcenter_ax.add_patch( plt.Circle((x * scale_x, y * scale_y), fact // 4, color=col)) fcenter_ax.axis('off') self_att = wandb.Image(fig, caption="Image: " + str(target["image_id"].item())) h, w = f_map.shape[-2:] # select 4 highest scores keep = torch.sort(scores, 0, descending=True)[1][:4] bboxes_scaled = rescale_bboxes(preds["pred_boxes"][keep].cpu(), (img.width, img.height)) fig = plt.figure(constrained_layout=True, figsize=(25 * 0.7, 8.5 * 0.7)) gs = fig.add_gridspec(2, 4) axs = [ fig.add_subplot(gs[0, 0]), fig.add_subplot(gs[1, 0]), fig.add_subplot(gs[0, -1]), fig.add_subplot(gs[1, -1]), ] for idx, ax, col in zip(keep, axs, colors): ax.imshow(dec_att[0, idx].view(h, w)) ax.axis('off') ax.set_title(f'Attention: {col} ({classes[labels[idx].item()]})') fcenter_ax = fig.add_subplot(gs[:, 1:-1]) fcenter_ax.imshow(img) for col, (xmin, ymin, xmax, ymax) in zip(colors, bboxes_scaled): fcenter_ax.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, color=col, linewidth=2)) att_map = wandb.Image(plt, caption="Image: " + str(target["image_id"].item())) plt.close() return wimg, self_att, att_map