def make_vcoco_transforms(image_set): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] if image_set == 'train': return T.Compose([ T.RandomHorizontalFlip(), T.ColorJitter(.4, .4, .4), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), T.RandomResize(scales, max_size=1333), ])), normalize, ]) if image_set == 'val': return T.Compose([ T.RandomResize([800], max_size=1333), normalize, ]) raise ValueError(f'unknown {image_set}')
def initialise(args): # Load model and loss function detr, criterion, postprocessors = build_model(args) class_embed = torch.nn.Linear(256, 81, bias=True) if os.path.exists(args.pretrained): print(f"Load pre-trained model from {args.pretrained}") detr.load_state_dict(torch.load(args.pretrained)['model_state_dict']) w, b = detr.class_embed.state_dict().values() keep = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 91 ] class_embed.load_state_dict(dict(weight=w[keep], bias=b[keep])) detr.class_embed = class_embed if os.path.exists(args.resume): print(f"Resume from model at {args.resume}") detr.load_state_dict(torch.load(args.resume)['model_state_dict']) # Prepare dataset transforms normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] if args.partition == 'train2015': transforms = T.Compose([ T.RandomHorizontalFlip(), T.ColorJitter(.4, .4, .4), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), T.RandomResize(scales, max_size=1333), ])), normalize, ]) if args.partition == 'test2015': transforms = T.Compose([ T.RandomResize([800], max_size=1333), normalize, ]) # Load dataset dataset = HICODetObject( pocket.data.HICODet( root=os.path.join(args.data_root, f'hico_20160224_det/images/{args.partition}'), anno_file=os.path.join(args.data_root, f'instances_{args.partition}.json'), target_transform=pocket.ops.ToTensor(input_format='dict')), transforms) return detr, criterion, postprocessors['bbox'], dataset
def make_coco_transforms(image_set, args): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.538, 0.494, 0.453], [0.257, 0.263, 0.273]) ]) scales = [ 480, 512, 544, 576, 608, 640, 672, 680, 690, 704, 736, 768, 788, 800 ] test_size = 1100 max = 1333 if args.eval: return T.Compose([ T.RandomResize([test_size], max_size=max), normalize, ]) else: if image_set == 'train': return T.Compose([ T.RandomSelect( T.RandomHorizontalFlip(), T.RandomVerticalFlip(), ), T.RandomSelect( T.RandomResize(scales, max_size=max), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), T.RandomResize(scales, max_size=max), ])), T.ColorJitter(), normalize, ]) if image_set == 'val': return T.Compose([ T.RandomResize([test_size], max_size=max), normalize, ]) raise ValueError(f'unknown {image_set}')
class MyDataloader(data.Dataset): modality_names = ["rgb"] def is_image_file(self, filename): IMG_EXTENSIONS = [".h5"] return any( filename.endswith(extension) for extension in IMG_EXTENSIONS) def find_classes(self, dir): classes = [ d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d)) ] classes.sort() class_to_idx = {classes[i]: i for i in range(len(classes))} return classes, class_to_idx def make_dataset(self, dir, class_to_idx): images = [] dir = os.path.expanduser(dir) for target in sorted(os.listdir(dir)): d = os.path.join(dir, target) if not os.path.isdir(d): continue for root, _, fnames in sorted(os.walk(d)): for fname in sorted(fnames): if self.is_image_file(fname): path = os.path.join(root, fname) item = (path, class_to_idx[target]) images.append(item) return images color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4) def __init__(self, root, split, modality="rgb", loader=h5_loader): classes, class_to_idx = self.find_classes(root) imgs = self.make_dataset(root, class_to_idx) assert len(imgs) > 0, "Found 0 images in subfolders of: " + root + "\n" # print("Found {} images in {} folder.".format(len(imgs), split)) self.root = root self.imgs = imgs self.classes = classes self.class_to_idx = class_to_idx if split == "train": self.transform = self.train_transform elif split == "holdout": self.transform = self.val_transform elif split == "val": self.transform = self.val_transform else: raise (RuntimeError("Invalid dataset split: " + split + "\n" "Supported dataset splits are: train, val")) self.loader = loader assert modality in self.modality_names, ( "Invalid modality split: " + modality + "\n" + "Supported dataset splits are: " + "".join(self.modality_names)) self.modality = modality def train_transform(self, rgb, depth): raise (RuntimeError("train_transform() is not implemented. ")) def val_transform(rgb, depth): raise (RuntimeError("val_transform() is not implemented.")) def __getraw__(self, index): """ Args: index (int): Index Returns: tuple: (rgb, depth) the raw data. """ path, target = self.imgs[index] rgb, depth = self.loader(path) return rgb, depth def __getitem__(self, index): rgb, depth = self.__getraw__(index) if self.transform is not None: rgb_np, depth_np = self.transform(rgb, depth) else: raise (RuntimeError("transform not defined")) # color normalization # rgb_tensor = normalize_rgb(rgb_tensor) # rgb_np = normalize_np(rgb_np) if self.modality == "rgb": input_np = rgb_np to_tensor = transforms.ToTensor() input_tensor = to_tensor(input_np) while input_tensor.dim() < 3: input_tensor = input_tensor.unsqueeze(0) depth_tensor = to_tensor(depth_np) depth_tensor = depth_tensor.unsqueeze(0) return input_tensor, depth_tensor def __len__(self): return len(self.imgs)