def get_voc_dataset(root): from dataset.voc_dataset import VOCDataset, custom_collate_fn dset_name = 'voc2012' transform_og = transforms.Compose([transforms.Resize((416, 416)), transforms.ToTensor()]) transform_norm = transforms.Compose([transforms.Resize((416, 416)), transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225])]) transform_noise = transforms.Compose([transforms.Resize((416, 416)), transforms.ToTensor(), GaussianNoise(mean=0, std=.2)]) transform_norm_noise = transforms.Compose([transforms.Resize((416, 416)), transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]), GaussianNoise(mean=0, std=.2)]) transform_rotate = transforms.Compose([transforms.Resize((416, 416)), transforms.RandomRotation((-60, 60)), transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225])]) transform_vflip = transforms.Compose([transforms.Resize((416, 416)), transforms.RandomVerticalFlip(1), transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225])]) transform_hflip = transforms.Compose([transforms.Resize((416, 416)), transforms.RandomHorizontalFlip(1), transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225])]) dset_og = VOCDataset(root, img_size=(416, 416), transforms=transform_og, is_categorical=True) dset_norm = VOCDataset(root, img_size=(416, 416), transforms=transform_norm, is_categorical=True) dset_noise = VOCDataset(root, img_size=(416, 416), transforms=transform_noise, is_categorical=True) dset_norm_noise = VOCDataset(root, img_size=(416, 416), transforms=transform_norm_noise, is_categorical=True) # dset_rotate = VOCDataset(root, img_size=(416, 416), transforms=transform_rotate, is_categorical=True) # dset_vflip = VOCDataset(root, img_size=(416, 416), transforms=transform_vflip, is_categorical=True) # dset_hflip = VOCDataset(root, img_size=(416, 416), transforms=transform_hflip, is_categorical=True) num_classes = dset_og.num_classes n_data = len(dset_og) n_train_data = int(n_data * .7) indices = list(range(n_data)) np.random.shuffle(indices) train_idx, val_idx = indices[:n_train_data], indices[n_train_data:] train_dset_og = Subset(dset_og, indices=train_idx) train_dset_norm = Subset(dset_norm, indices=train_idx) train_dset_noise = Subset(dset_noise, indices=train_idx) train_dset_norm_noise = Subset(dset_norm_noise, indices=train_idx) # train_dset_rotate = Subset(dset_rotate, indices=train_idx) # train_dset_vflip = Subset(dset_vflip, indices=train_idx) # train_dset_hflip = Subset(dset_hflip, indices=train_idx) # train_dset = ConcatDataset([dset_og, dset_norm, dset_noise, dset_norm_noise]) train_dset = train_dset_og val_dset = Subset(dset_og, indices=val_idx) collate_fn = custom_collate_fn return dset_name, train_dset, val_dset, collate_fn
feed_dict = { self.input_img: img, self.input_roi: roi, self.input_labels: labels } tests = session.run(self.bbox_logit, feed_dict=feed_dict) return tests if __name__ == '__main__': import numpy as np from dataset.voc_dataset import VOCDataset, bbox_visualization, bbox_transform_inv, bbox_transform, bb_norm, bb_denorm from tool.tool import array_img_save dataset = VOCDataset() network = FastRCNN('FastRCNN', 21, 64) network.buind() network.complete() restorer_fc = tf.train.Saver(tf.trainable_variables(scope='vgg_16')) session = tf.get_default_session() restorer_fc.restore(session, "./VGG/vgg_16.ckpt") for step in range(100000): images, labels, rois = dataset.get_minbatch(1, step % 3, selectivesearch=True) labels = np.reshape(labels, (-1, 5)) labels[:, 0:4] = bbox_transform_inv(rois[0], labels[:, 0:4]) rois[0] = bb_norm(images[0].shape[1], images[0].shape[0], rois[0])
transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225])]) transform_rotate = transforms.Compose([transforms.Resize((224, 224)), transforms.RandomRotation((-30, 30)), transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225])]) transform_vertical_flip = transforms.Compose([transforms.Resize((224, 224)), transforms.RandomVerticalFlip(1), transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225])]) transform_horizontal_flip = transforms.Compose([transforms.Resize((224, 224)), transforms.RandomHorizontalFlip(1), transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225])]) dset_original = VOCDataset(root, img_size=img_size, transforms=transform_original, is_categorical=True) dset_rotate = VOCDataset(root, img_size=img_size, transforms=transform_rotate, is_categorical=True) dset_vertical_flip = VOCDataset(root, img_size=img_size, transforms=transform_vertical_flip, is_categorical=True) dset_horizontal_flip = VOCDataset(root, img_size=img_size, transforms=transform_horizontal_flip, is_categorical=True) n_val_data = int(len(dset_original) * .3) train_val_ratio = [len(dset_original) - n_val_data, n_val_data] dset_original, dset_val = random_split(dset_original, train_val_ratio) dset_rotate, _ = random_split(dset_rotate, train_val_ratio) dset_vertical_flip, _ = random_split(dset_vertical_flip, train_val_ratio) dset_horizontal_flip, _ = random_split(dset_horizontal_flip, train_val_ratio) dset_train = ConcatDataset([dset_original, dset_rotate, dset_vertical_flip, dset_horizontal_flip]) print('Train: {} Validation: {}'.format(len(dset_train), len(dset_val)))
]) horizontal_flip_transforms = transforms.Compose([ transforms.RandomCrop((224, 224), pad_if_needed=True), transforms.RandomHorizontalFlip(1), transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]) ]) vertical_flip_transforms = transforms.Compose([ transforms.RandomCrop((224, 224), pad_if_needed=True), transforms.RandomVerticalFlip(1), transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]) ]) original_dset = VOCDataset(root, img_size=(224, 224), transforms=original_transforms, is_categorical=True) rotate_dset = VOCDataset(root, img_size=(224, 224), transforms=rotate_transforms, is_categorical=True) horizontal_flip_dset = VOCDataset(root, img_size=(224, 224), transforms=horizontal_flip_transforms, is_categorical=True) vertical_flip_dset = VOCDataset(root, img_size=(224, 224), transforms=vertical_flip_transforms, is_categorical=True) n_class = original_dset.num_classes
# root_val = os.path.join(root, 'images', 'val') # ann_train = os.path.join(root, 'annotations', 'instances_train2017.json') # ann_val = os.path.join(root, 'annotations', 'instances_val2017.json') # # dset_train = COCODataset(root_train, ann_train, transforms.Compose([transforms.ToTensor()])) # dset_val = COCODataset(root_val, ann_val, transforms.Compose([transforms.ToTensor()])) #################### VOC Dataset #################### root = 'C://DeepLearningData/VOC2012/' transforms = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) dset_train = VOCDataset(root, img_size=(224, 224), is_validation=False, transforms=transforms, is_categorical=False) dset_val = VOCDataset(root, img_size=(224, 224), is_validation=True, transforms=transforms, is_categorical=False) train_data_loader = DataLoader(dset_train, batch_size, shuffle=True, collate_fn=collate_fn) val_data_loader = DataLoader(dset_val, batch_size, shuffle=True,
# ann_train = os.path.join(root, 'annotations', 'instances_train2017.json') # ann_val = os.path.join(root, 'annotations', 'instances_val20z17.json') # # dset_train = COCODataset(root_train, ann_train, transforms.Compose([transforms.ToTensor()])) # dset_val = COCODataset(root_val, ann_val, transforms.Compose([transforms.ToTensor()])) #################### VOC Dataset #################### root = 'C://DeepLearningData/VOC2012/' transforms = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) # dset_train = VOCDataset(root, is_validation=False, transforms=transforms, is_categorical=True) # dset_val = VOCDataset(root, is_validation=True, transforms=transforms, is_categorical=True) dset = VOCDataset(root, img_size, transforms=transforms, is_categorical=True) n_data = len(dset) n_train_data = int(n_data * .7) n_val_data = n_data - n_train_data dset_train, dset_val = random_split(dset, [n_train_data, n_val_data]) train_data_loader = DataLoader(dset_train, batch_size, shuffle=True, collate_fn=collate_fn) val_data_loader = DataLoader(dset_val, batch_size, shuffle=True,