def test_Yolov1Loss(): v1_loss = Yolov1Loss(weight_coord=1., weight_noobject=1., num_boxes=2, num_classes=20, grid_size=(7, 7)) print(v1_loss) img_size, grid_size = (448, 448), (7, 7) rs_ = Resize(size=img_size) tt = ToTensor() gco = ToGridCellOffset(img_size=img_size, grid_size=grid_size) img_trans = Compose([rs_, tt]) box_trans = Compose([rs_, gco]) voc = VOCDataset(config, phase='train', img_transform=img_trans, box_transform=box_trans) img, boxes = voc[4] # print(img.size(), type(boxes), boxes[0].size(), boxes[1].size()) img = img.unsqueeze(0) grid = boxes[0].unsqueeze(0) box = boxes[1].unsqueeze(0) # print(img.size(), type(boxes), grid.size(), box.size()) net = Yolo(num_boxes=3, num_classes=20, grid_size=grid_size) # net = net.cuda() # img = img.cuda() out = net(img) # print(out.size()) # print(out.view(out.size(0), grid_size[0], grid_size[1], -1).size()) loss = v1_loss(out, (grid, box)) print(loss)
def test_transform(): rhf = RandomHorizontalFlip(p=0.5) rc = RandomCrop(ratio=0.8) cj = ColorJitter(brightness=0.4, saturation=0.4, hue=0.4) rb = RandomBlur(p=0.5, r=(2, 3)) rs = RandomShift(p=0.5, ratio=0.1) rs_ = Resize(size=(448, 448)) tt = ToTensor() img_trans = Compose([rhf, rc, cj, rb, rs, rs_, tt]) box_trans = Compose([rhf, rc, rs, rs_, tt]) voc = VOCDataset(config, phase='train', img_transform=img_trans, box_transform=box_trans) img, boxes = voc[4] print(img.size(), type(boxes), boxes, boxes.size()) img = img.permute(1, 2, 0) img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) print(img.shape) img_h, img_w = img.shape[:2] for box in list(boxes): class_idx, cx, cy, w, h = int(box[0]), float(box[1]), float( box[2]), float(box[3]), float(box[4]) cx, cy, w, h = cx * img_w, cy * img_h, w * img_w, h * img_h pt1, pt2 = (int(cx - w / 2), int(cy - h / 2)), (int(cx + w / 2), int(cy + h / 2)) cv2.rectangle(img, pt1, pt2, (0, 255, 0), 2) cv2.imshow('transform', img) cv2.waitKey(0)
def test_Resize(): resize = Resize(size=(448, 448)) img_trans = Compose([resize]) box_trans = Compose([resize]) voc = VOCDataset(config, phase='train', img_transform=img_trans, box_transform=box_trans) img, boxes = voc[2] img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) print(img.shape) for box in boxes: pt1, pt2 = box.points() cv2.rectangle(img, pt1, pt2, (0, 255, 0), 2) cv2.imshow('transform', img) cv2.waitKey(0)
def test_ToGridCellOffset(): rs_ = Resize(size=(448, 448)) tt = ToTensor() gco = ToGridCellOffset(img_size=(448, 448), grid_size=(7, 7)) img_trans = Compose([rs_, tt]) box_trans = Compose([rs_, gco]) voc = VOCDataset(config, phase='train', img_transform=img_trans, box_transform=box_trans) img, boxes = voc[4] print(img.size(), type(boxes), len(boxes)) print(boxes[0].size(), boxes[1].size()) print(boxes[0]) print(boxes[1]) img = img.permute(1, 2, 0) img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) for box in boxes[1]: i, j, _, o_x, o_y, w, h = int(box[0]), int(box[1]), int(box[2]), float( box[3]), float(box[4]), float(box[5]), float(box[6]) w, h = w * 448, h * 448 x, y = (i + o_x) * (448 / 7.) - w / 2, (j + o_y) * (448 / 7.) - h / 2 pt1 = (int(x), int(y)) pt2 = (int(x + w), int(y + h)) cv2.rectangle(img, pt1, pt2, (0, 255, 0), 2) # Center point cv2.line(img, (int(x + w / 2), int(y + h / 2)), (int(x + w / 2), int(y + h / 2)), (255, 0, 0), 10) # Draw grid inter_ = 448 // 7 for i in range(7): cv2.line(img, (0, inter_ * i), (448, inter_ * i), (0, 0, 255), 2) for j in range(7): cv2.line(img, (inter_ * j, 0), (inter_ * j, 448), (0, 0, 225), 2) cv2.imshow('transform', img) cv2.waitKey(0)
result_dir = os.path.join(os.getcwd(), result_dir) if cfg["debug"]: print("Running in debug mode.") if cfg["train"]["multi_process"] and cfg["train"]["local_rank"] != 0: pass else: print(json.dumps(cfg, indent=2)) image_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (1, 1, 1)) ]) train_transform = Compose([ Resize(cfg["dataset"]["resize"]), FlipLeftRight(), ]) val_transform = Compose([ Resize(cfg["dataset"]["resize"]), ]) train_data = cfg["dataset"]["train_data"] train_dataset = COCODataset(train_data["root"], train_data["annFile"], train_transform, debug=cfg["debug"]) num_classes = len(train_dataset.classes.keys()) val_data = cfg["dataset"]["val_data"] val_dataset = COCODataset(val_data["root"], val_data["annFile"], val_transform, debug=cfg["debug"]) if isinstance(device, list) and not cfg["train"]["multi_process"]: batch_size = cfg["dataset"]["batch_size"] * len(device) else:
def dataloader(dataset, batch_size, cuda, conditionnal=False): if dataset == 'CIFAR10': data = datasets.CIFAR10('./CIFAR10', train=True, download=True, transform=transforms.Compose([ AddUniformNoise(0.05), Transpose(), ToTensor() ])) data_hflip = datasets.CIFAR10('./CIFAR10', train=True, download=True, transform=transforms.Compose([ HorizontalFlip(), AddUniformNoise(0.05), Transpose(), ToTensor() ])) data = torch.utils.data.ConcatDataset([data, data_hflip]) train_data, valid_data = torch.utils.data.random_split(data, [90000, 10000]) test_data = datasets.CIFAR10('./CIFAR10', train=False, download=True, transform=transforms.Compose([ AddUniformNoise(0.05), Transpose(), ToTensor() ])) elif dataset == 'MNIST': data = datasets.MNIST('./MNIST', train=True, download=True, transform=transforms.Compose([ AddUniformNoise(), ToTensor() ])) train_data, valid_data = torch.utils.data.random_split(data, [50000, 10000]) test_data = datasets.MNIST('./MNIST', train=False, download=True, transform=transforms.Compose([ AddUniformNoise(), ToTensor() ])) elif len(dataset) == 6 and dataset[:5] == 'MNIST': data = datasets.MNIST('./MNIST', train=True, download=True, transform=transforms.Compose([ AddUniformNoise(), ToTensor() ])) label = int(dataset[5]) idx = data.train_labels == label data.train_labels = data.train_labels[idx] data.train_data = data.train_data[idx] train_data, valid_data = torch.utils.data.random_split(data, [5000, idx.sum() - 5000]) test_data = datasets.MNIST('./MNIST', train=False, download=True, transform=transforms.Compose([ AddUniformNoise(), ToTensor() ])) idx = test_data.test_labels == label test_data.test_labels = test_data.test_labels[idx] test_data.test_data = test_data.test_data[idx] elif dataset == 'MNIST32': data = datasets.MNIST('./MNIST', train=True, download=True, transform=transforms.Compose([ Resize(), AddUniformNoise(), ToTensor() ])) train_data, valid_data = torch.utils.data.random_split(data, [50000, 10000]) test_data = datasets.MNIST('./MNIST', train=False, download=True, transform=transforms.Compose([ Resize(), AddUniformNoise(), ToTensor() ])) elif len(dataset) == 8 and dataset[:7] == 'MNIST32': data = datasets.MNIST('./MNIST', train=True, download=True, transform=transforms.Compose([ Resize(), AddUniformNoise(), ToTensor() ])) label = int(dataset[7]) idx = data.train_labels == label data.train_labels = data.train_labels[idx] data.train_data = data.train_data[idx] train_data, valid_data = torch.utils.data.random_split(data, [5000, idx.sum() - 5000]) test_data = datasets.MNIST('./MNIST', train=False, download=True, transform=transforms.Compose([ Resize(), AddUniformNoise(), ToTensor() ])) idx = test_data.test_labels == label test_data.test_labels = test_data.test_labels[idx] test_data.test_data = test_data.test_data[idx] else: print ('what network ?', args.net) sys.exit(1) #load data kwargs = {'num_workers': 0, 'pin_memory': True} if cuda > -1 else {} train_loader = torch.utils.data.DataLoader( train_data, batch_size=batch_size, shuffle=True, **kwargs) valid_loader = torch.utils.data.DataLoader( valid_data, batch_size=batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True, **kwargs) return train_loader, valid_loader, test_loader
if __name__ == '__main__': with open(args.config) as rptr: config = EasyDict(yaml.load(rptr)) config = parser_config(config) torch.manual_seed(config.SEED) torch.cuda.manual_seed(config.SEED) np.random.seed(config.SEED) random.seed(config.SEED) rhf = RandomHorizontalFlip(p=0.5) rc_ = RandomCrop(ratio=0.75) cj_ = ColorJitter(brightness=0.4, saturation=0.4, hue=0.4) rb_ = RandomBlur(p=0.5, r=(2, 3)) rsf = RandomShift(p=0.5, ratio=0.15) rs_ = Resize(size=(448, 448)) tt_ = ToTensor() gco = ToGridCellOffset((448, 448), (7, 7)) img_trans = Compose([rhf, rc_, cj_, rb_, rsf, rs_, tt_]) box_trans = Compose([rhf, rc_, rsf, rs_, gco]) dataloader = MakeDataLoader(dataset=VOCDataset(config, phase='train', img_transform=img_trans, box_transform=box_trans), batch_size=config.TRAIN.BATCH_SIZE, shuffle=True) exe = Execute(config=config, dataloader=dataloader) exe.train()