def __getitem__(self, idx): """ """ fname = self.fnames[idx] img = Image.open(os.path.join(self.root, fname)) if img.mode != 'RGB': img = img.convert('RGB') boxes = self.boxes[idx].clone() # converting ratios into numbers w, h = img.size boxes = boxes * torch.Tensor([w, h, w, h]) labels = self.labels[idx].clone() size = self.input_size #Data Augmentation if self.train: img, boxes = random_flip(img, boxes) img, boxes = random_crop(img, boxes) img, boxes = resize(img, boxes, (size, size)) else: img, boxes = resize(img, boxes, (size, size)) #img, boxes = center_crop(img, boxes, (size,size)) img = self.transform(img) return img, boxes, labels
def transform(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) return img, boxes, labels
def train_transform(img, boxes, labels): img, boxes = random_paste(img, boxes, max_ratio=4, fill=(123, 116, 103)) img, boxes, labels = random_crop(img, boxes, labels) img, boxes = resize(img, boxes, size=600, random_interpolation=True) img, boxes = random_flip(img, boxes) print(img.size) img = transforms.ToTensor()(img) print(boxes) return img, boxes, labels
def transform_test(img, boxes, labels): img, boxes = resize(img, boxes, size=(self.img_size, self.img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = self.box_coder.encode(boxes, labels) return img, boxes, labels
def transform_image_w_bbox(img, boxes, labels, img_size=224): assert torch.is_tensor(boxes), 'type(boxes) : {}'.format(type(boxes)) img = random_distort(img) # This slows down learning too much... # if random.random() < 0.5: # img, boxes = random_paste(img, boxes, max_ratio=4, fill=(123, 116, 103)) img, boxes, labels = random_crop(img, boxes, labels) img, boxes = resize(img, boxes, size=(img_size, img_size), random_interpolation=True) img, boxes = random_flip(img, boxes) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) return img, boxes, labels
def transform_train(img, boxes, labels): img = random_distort(img) if random.random() < 0.5: img, boxes = random_paste(img, boxes, max_ratio=4, fill=(123, 116, 103)) img, boxes, labels = random_crop(img, boxes, labels) img, boxes = resize(img, boxes, size=(self.img_size, self.img_size), random_interpolation=False) img, boxes = random_flip(img, boxes) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = self.box_coder.encode(boxes, labels) return img, boxes, labels
def main(args): print('Loading fpnssd model...') net = FPNSSD512(num_classes=21).cuda(3) box_coder = SSDBboxCoder(net) net = nn.DataParallel(net, [3]) net.load_state_dict(torch.load('./checkpoint/ckpt.pt')['net']) # net.load_state_dict(torch.load('./fpnssd512_20_trained.pth')) # net.load_state_dict(torch.load(args[1])['net']) net.eval() start = time.time() print('Loading image...') # img = Image.open('/home/yhuangcc/data/voc(07+12)/JPEGImages/000002.jpg') # img=Image.open('/home/yhuangcc/data/coco/images/val2017/000000000285.jpg') img = Image.open(args[1]) w, h = img.size img = img.resize((512, 512)) print('Predicting...') transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) x = transform(img).cuda(3) loc_preds, cls_preds = net(x.unsqueeze(0)) print('Decoding...') loc_preds = loc_preds.squeeze().cpu() cls_preds = F.softmax(cls_preds.squeeze().cpu(), dim=1) boxes, labels, scores = box_coder.decode(loc_preds, cls_preds) print(f'Detection ends... Consuming time {time.time()-start:.4f}s') label_names = np.loadtxt( '/home/yhuangcc/ObjectDetection/datasets/voc/labels.txt', np.object).tolist() # label_names = np.loadtxt(args[3], np.object).tolist() img, boxes = resize(img, boxes.cpu(), (w, h)) vis_image_bbox(img, boxes, [label_names[label] for label in labels], scores) plt.close()
def main( fast=False, batch_size=None, **kwargs, ): # CONFIG batch_size = batch_size or (4 if fast else 32) energy_loss = get_energy_loss(config="consistency_two_path", mode="standard", **kwargs) # LOGGING logger = VisdomLogger("train", env=JOB) # DATA LOADING video_dataset = ImageDataset( files=sorted( glob.glob(f"mount/taskonomy_house_tour/original/image*.png"), key=lambda x: int(os.path.basename(x)[5:-4])), return_tuple=True, resize=720, ) video = RealityTask("video", video_dataset, [ tasks.rgb, ], batch_size=batch_size, shuffle=False) # GRAPHS graph_baseline = TaskGraph(tasks=energy_loss.tasks + [video], finetuned=False) graph_baseline.compile(torch.optim.Adam, lr=3e-5, weight_decay=2e-6, amsgrad=True) graph_finetuned = TaskGraph(tasks=energy_loss.tasks + [video], finetuned=True) graph_finetuned.compile(torch.optim.Adam, lr=3e-5, weight_decay=2e-6, amsgrad=True) graph_conservative = TaskGraph(tasks=energy_loss.tasks + [video], finetuned=True) graph_conservative.compile(torch.optim.Adam, lr=3e-5, weight_decay=2e-6, amsgrad=True) graph_conservative.load_weights( f"{MODELS_DIR}/conservative/conservative.pth") graph_ood_conservative = TaskGraph(tasks=energy_loss.tasks + [video], finetuned=True) graph_ood_conservative.compile(torch.optim.Adam, lr=3e-5, weight_decay=2e-6, amsgrad=True) graph_ood_conservative.load_weights( f"{SHARED_DIR}/results_2F_grounded_1percent_gt_twopath_512_256_crop_7/graph_grounded_1percent_gt_twopath.pth" ) graphs = { "baseline": graph_baseline, "finetuned": graph_finetuned, "conservative": graph_conservative, "ood_conservative": graph_ood_conservative, } inv_transform = transforms.ToPILImage() data = {key: {"losses": [], "zooms": []} for key in graphs} size = 256 for batch in range(0, 700): if batch * batch_size > len(video_dataset.files): break frac = (batch * batch_size * 1.0) / len(video_dataset.files) if frac < 0.3: size = int(256.0 - 128 * frac / 0.3) elif frac < 0.5: size = int(128.0 + 128 * (frac - 0.3) / 0.2) else: size = int(256.0 + (720 - 256) * (frac - 0.5) / 0.5) print(size) # video.reload() size = (size // 32) * 32 print(size) video.step() video.task_data[tasks.rgb] = resize( video.task_data[tasks.rgb].to(DEVICE), size).data print(video.task_data[tasks.rgb].shape) with torch.no_grad(): for i, img in enumerate(video.task_data[tasks.rgb]): inv_transform(img.clamp(min=0, max=1.0).data.cpu()).save( f"mount/taskonomy_house_tour/distorted/image{batch*batch_size + i}.png" ) for name, graph in graphs.items(): normals = graph.sample_path([tasks.rgb, tasks.normal], reality=video) normals2 = graph.sample_path( [tasks.rgb, tasks.principal_curvature, tasks.normal], reality=video) for i, img in enumerate(normals): energy, _ = tasks.normal.norm(normals[i:(i + 1)], normals2[i:(i + 1)]) data[name]["losses"] += [energy.data.cpu().numpy().mean()] data[name]["zooms"] += [size] inv_transform(img.clamp(min=0, max=1.0).data.cpu()).save( f"mount/taskonomy_house_tour/normals_{name}/image{batch*batch_size + i}.png" ) for i, img in enumerate(normals2): inv_transform(img.clamp(min=0, max=1.0).data.cpu()).save( f"mount/taskonomy_house_tour/path2_{name}/image{batch*batch_size + i}.png" ) pickle.dump(data, open(f"mount/taskonomy_house_tour/data.pkl", 'wb')) os.system("bash ~/scaling/scripts/create_vids.sh")
def forward(self, x): return resize(x, val=dest_task.resize)
# img = Image.open(args[2]) w, h = img.size img = img.resize((512, 512)) print('Predicting...') transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) x = transform(img).cuda(3) loc_preds, cls_preds = net(x.unsqueeze(0)) print('Decoding...') box_coder = SSDBboxCoder(net) box_coder.anchor_boxes = box_coder.anchor_boxes.cuda(3) loc_preds = loc_preds.squeeze() cls_preds = F.softmax(cls_preds.squeeze(), dim=1) boxes, labels, scores = box_coder.decode(loc_preds, cls_preds) print(f'Detection ends... Consuming time {time.time()-start:.4f}s') label_names = np.loadtxt( '/home/yhuangcc/ObjectDetection/datasets/voc/labels.txt', np.object).tolist() # label_names = np.loadtxt(args[3], np.object).tolist() img, boxes = resize(img, boxes.cpu(), (w, h)) vis_image_bbox(img, boxes, [label_names[label] for label in labels], scores) plt.close() # if __name__ == '__main__': # main(sys.argv)
ct = c.copy() dt = d.copy() ce = e.copy() ct, dt, ce = enchancement_transform(ct, dt, ce) #if len(dt.squeeze()) != 0: if dt.numel() != 0: dt[:, [0, 2]] *= ct.shape[1] dt[:, [1, 3]] *= ct.shape[0] show_detection(ct, dt) if True: print('test resize') for i in range(1): ct = c.copy() dt = d.copy() ct = resize(ct) show_detection(ct, dt) if True: print('test enchancement_transform(many image)') num_test = 10 for c, d, e in dataset: ct = c.copy() dt = d.copy() ct, dt, ce = enchancement_transform(ct, dt, ce) print(dt) #if len(dt.squeeze()) != 0: if dt.numel() != 0: dt[:, [0, 2]] *= ct.shape[1] dt[:, [1, 3]] *= ct.shape[0]