Esempio n. 1
0
 def get_dataset(self, set_name, sub_dir=None):
     with redirect_stdout(None):
         training_dataset = CocoDataset(root_dir=self.root_dir,
                                        set_name=set_name,
                                        transform=None,
                                        sub_dir=sub_dir)
     [min_w, min_h] = self.get_min_size(training_dataset)
     if set_name == 'val':
         _transforms = transforms.Compose([Normalizer(), Resizer()])
     else:
         _transforms = transforms.Compose([
             Normalizer(),
             Augmenter(),
             RandomCropOrScale(min_w=min_w, min_h=min_h)
         ])
     training_dataset.transform = _transforms
     return training_dataset
Esempio n. 2
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')
    parser.add_argument('--dataset',
                        default='coco',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument(
        '--coco_path',
        default='/home/hao.wyh/jupyter/黑边/smart_reverse_label/coco/',
        help='Path to COCO directory')
    #parser.add_argument('--coco_path', default='/home/hao.wyh/jupyter/黑边/评估任务/3k_imgs/coco/', help='Path to COCO directory')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )
    parser.add_argument('--model', help='Path to model (.pt) file.')
    parser = parser.parse_args(args)

    if parser.dataset == 'coco':
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))
    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler_val = AspectRatioBasedSampler(dataset_val,
                                          batch_size=1,
                                          drop_last=False)
    dataloader_val = DataLoader(dataset_val,
                                num_workers=1,
                                collate_fn=collater,
                                batch_sampler=sampler_val)

    retinanet = torch.load(parser.model)
    use_gpu = True
    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet.eval()
    coco_eval.evaluate_coco(dataset_val, retinanet)
Esempio n. 3
0
def main(args=None):

	parser     = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

	parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.', default="coco")
	parser.add_argument('--csv_train', help='Dataset type, must be one of csv or coco.', default="mscoco_sampled_0.1131.csv")
	parser.add_argument('--csv_classes', help='Dataset type, must be one of csv or coco.', default="coco_class_labels.csv")
	parser.add_argument('--coco_path', help='Path to COCO directory',
						default="/default/path/to/COCO2017/")

	parser = parser.parse_args(args)

	dataset_train = CocoDataset(parser.coco_path, set_name='train2017')
	dataset_csv= CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes)

	keys = []
	# get all keys in coco train set, total image count!
	for k,v in dataset_train.coco.imgToAnns.iteritems():
		keys.append(k)

	main_dict = {}
	annots = []
	imgs = []

	# select first N image
	for i in dataset_csv.image_names:
		im_id = int(i[:-4])
		for ann in dataset_train.coco.imgToAnns[im_id]:
			annots.append(ann)
		imgs.append(dataset_train.coco.imgs[im_id])

	main_dict['images'] = imgs
	main_dict['annotations'] = annots
	main_dict['categories'] = dataset_train.coco.dataset['categories']
	main_dict['info'] = dataset_train.coco.dataset['info']
	main_dict['licenses'] = dataset_train.coco.dataset['licenses']

	# dump to json
	with open('mini_coco_sampled.json', 'w') as fp:
		json.dump(main_dict, fp)
def main(args):
    global C, H, W
    coco_labels = json.load(open(args.coco_labels))
    num_classes = coco_labels['num_classes']
    if args.model == 'inception_v3':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv3(pretrained='imagenet')

    elif args.model == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(pretrained='imagenet')

    elif args.model == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(num_classes=1000,
                                             pretrained='imagenet')

    else:
        print("doesn't support %s" % (args['model']))

    load_image_fn = utils.LoadTransformImage(model)
    dim_feats = model.last_linear.in_features
    model = MILModel(model, dim_feats, num_classes)
    model = model.cuda()
    dataset = CocoDataset(coco_labels)
    dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True)
    optimizer = optim.Adam(model.parameters(),
                           lr=args.learning_rate,
                           weight_decay=args.weight_decay)
    exp_lr_scheduler = optim.lr_scheduler.StepLR(
        optimizer,
        step_size=args.learning_rate_decay_every,
        gamma=args.learning_rate_decay_rate)

    crit = nn.MultiLabelSoftMarginLoss()
    if not os.path.isdir(args.checkpoint_path):
        os.mkdir(args.checkpoint_path)
    train(dataloader, model, crit, optimizer, exp_lr_scheduler, load_image_fn,
          args)
Esempio n. 5
0
def main(args=None):

	parser = argparse.ArgumentParser(description="Evaluate network parameters pf RetinaNet")

	parser.add_argument("--dataset")
	parser.add_argument("--coco_path")
	parser.add_argument("--saved_weights")

	parser = parser.parse_args(args)


	if parser.dataset=="coco":

		dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()]))

	#Load the network
	retinanet = torch.load(parser.saved_weights)
	retinanet.eval()


	#Evaluate the netwoek on coco
	coco_eval.evaluate_coco(dataset_val, retinanet)
Esempio n. 6
0
 def get_training_dataloader(self,
                             set_name='train'
                             ):  # this can be used for entire sets
     with redirect_stdout(None):
         self.training_dataset = CocoDataset(root_dir=self.root_dir,
                                             set_name=set_name,
                                             transform=None)
     [min_w, min_h] = self.get_min_size(self.training_dataset)
     self.training_dataset.transform = transforms.Compose(
         [Normalizer(), Augmenter(), Resizer()])
     # RandomCropOrScale(min_w, min_h)])
     # training_dataset = self.get_dataset(set_name=set_name)
     sampler_train = AspectRatioBasedSampler(self.training_dataset,
                                             batch_size=self.batch_size,
                                             shuffle=True)
     self.training_dataloader = DataLoader(dataset=self.training_dataset,
                                           num_workers=self.workers,
                                           collate_fn=collater,
                                           batch_sampler=sampler_train,
                                           pin_memory=True)
     self.print_data_statistics(data_loader=self.training_dataloader,
                                set_type='Training')
Esempio n. 7
0
 def get_validation_dataloader(self,
                               sub_dir=None,
                               sort=False,
                               set_name='val'):
     with redirect_stdout(None):
         self.validation_dataset = CocoDataset(
             root_dir=self.root_dir,
             set_name=set_name,
             sub_dir=sub_dir,
             transform=transforms.Compose([Normalizer(),
                                           Resizer()]),
             categories=self.categories,
             sort=sort)
         # validation_dataset = self.get_dataset(set_name=set_name, sub_dir=sub_dir)
     sampler_val = AspectRatioBasedSampler(self.validation_dataset,
                                           batch_size=1,
                                           shuffle=False)
     self.validation_dataloader = DataLoader(self.validation_dataset,
                                             num_workers=self.workers,
                                             collate_fn=collater,
                                             batch_sampler=sampler_val,
                                             pin_memory=True)
     self.print_data_statistics(data_loader=self.validation_dataloader,
                                set_type='Validation')
Esempio n. 8
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--coco_path',
                        help='Path to COCO directory',
                        type=str,
                        default='./data/coco')
    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--checkpoint',
                        help='The path to the checkpoint.',
                        type=str,
                        default=None)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)
    parser.add_argument('--batch_size',
                        help='Number of batch',
                        type=int,
                        default=16)
    parser.add_argument('--gpu_ids',
                        help='Gpu parallel',
                        type=str,
                        default='1, 2')

    parser = parser.parse_args(args)

    # Create the data lodaders
    dataset_train = CocoDataset(parser.coco_path,
                                set_name='train2017',
                                transform=transforms.Compose(
                                    [Normalizer(),
                                     Augmenter(),
                                     Resizer()]))
    dataset_val = CocoDataset(parser.coco_path,
                              set_name='val2017',
                              transform=transforms.Compose(
                                  [Normalizer(), Resizer()]))

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=4,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=16,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    sampler_val = AspectRatioBasedSampler(dataset_val,
                                          batch_size=1,
                                          drop_last=False)
    dataloader_val = DataLoader(dataset_val,
                                num_workers=3,
                                collate_fn=collater,
                                batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()
    gpu_ids = parser.gpu_ids.split(',')
    device = torch.device("cuda:" + gpu_ids[0])
    torch.cuda.set_device(device)
    gpu_ids = list(map(int, gpu_ids))
    retinanet = torch.nn.DataParallel(retinanet, device_ids=gpu_ids).to(device)

    if parser.checkpoint:
        pretrained = torch.load(parser.checkpoint).state_dict()
        retinanet.module.load_state_dict(pretrained)

    # add tensorboard to record train log
    retinanet.training = True
    writer = SummaryWriter('./log')
    # writer.add_graph(retinanet, input_to_model=[images, labels])

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data['img'].to(device), data['ann'].to(device)])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))
                writer.add_scalar('Loss/train', loss, iter_num)
                writer.add_scalar('Loss/reg_loss', regression_loss, iter_num)
                writer.add_scalar('Loss/cls_loss', classification_loss,
                                  iter_num)

                epoch_loss.append(float(loss))

                if (iter_num + 1) % 1000 == 0:
                    print('Save model')
                    torch.save(
                        retinanet.module,
                        'COCO_retinanet_epoch{}_iter{}.pt'.format(
                            epoch_num, iter_num))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        print('Evaluating dataset')

        coco_eval.evaluate_coco(dataset_val, retinanet, writer)

        scheduler.step(np.mean(epoch_loss))

        torch.save(retinanet.module, 'COCO_retinanet_{}.pt'.format(epoch_num))

    retinanet.eval()

    torch.save(retinanet, 'model_final.pt'.format(epoch_num))
Esempio n. 9
0
    else:
        raise Exception('args.dataset not in ["coco", "csv"]')
    
    return args

if __name__ == "__main__":
    args = parse_args()
    json.dump(vars(args), open(args.config_path, 'w'))
    
    # Create the data loaders
    if args.dataset == 'coco':
        dataset_train = CocoDataset(
            root_dir=args.coco_path,
            set_name='train2017', 
            transform=transforms.Compose([
                Normalizer(),
                Augmenter(),
                Resizer()
            ])
        )
        
        dataset_val = CocoDataset(
            root_dir=args.coco_path,
            set_name='val2017', 
            transform=transforms.Compose([
                Normalizer(),
                Resizer()
            ])
        )
        
    elif args.dataset == 'csv':
Esempio n. 10
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )
    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)
    parser.add_argument('--optimizer',
                        help='[SGD | Adam]',
                        type=str,
                        default='SGD')
    parser.add_argument('--model', help='Path to model (.pt) file.')
    parser = parser.parse_args(args)

    # Create the data loaders
    print("\n[Phase 1]: Creating DataLoader for {} dataset".format(
        parser.dataset))
    if parser.dataset == 'coco':
        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2014',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2014',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':
        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=8,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=8,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=16,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=8,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    print('| Num training images: {}'.format(len(dataset_train)))
    print('| Num test images : {}'.format(len(dataset_val)))

    print("\n[Phase 2]: Preparing RetinaNet Detection Model...")
    use_gpu = torch.cuda.is_available()
    if use_gpu:
        device = torch.device('cuda')
        retinanet = retinanet.to(device)

    retinanet = torch.nn.DataParallel(retinanet,
                                      device_ids=range(
                                          torch.cuda.device_count()))
    print("| Using %d GPUs for Train/Validation!" % torch.cuda.device_count())
    retinanet.training = True

    if parser.optimizer == 'Adam':
        optimizer = optim.Adam(retinanet.parameters(),
                               lr=1e-5)  # not mentioned
        print("| Adam Optimizer with Learning Rate = {}".format(1e-5))
    elif parser.optimizer == 'SGD':
        optimizer = optim.SGD(retinanet.parameters(),
                              lr=1e-2,
                              momentum=0.9,
                              weight_decay=1e-4)
        print("| SGD Optimizer with Learning Rate = {}".format(1e-2))
    else:
        raise ValueError('Unsupported Optimizer, must be one of [SGD | Adam]')

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)
    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn(
    )  # Freeze the BN parameters to ImageNet configuration

    # Check if there is a 'checkpoints' path
    if not osp.exists('./checkpoints/'):
        os.makedirs('./checkpoints/')

    print("\n[Phase 3]: Training Model on {} dataset...".format(
        parser.dataset))
    for epoch_num in range(parser.epochs):
        epoch_loss = []
        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()
                classification_loss, regression_loss = retinanet(
                    [data['img'].to(device), data['annot']])
                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                loss = classification_loss + regression_loss
                if bool(loss == 0):
                    continue

                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.001)
                optimizer.step()
                loss_hist.append(float(loss))
                epoch_loss.append(float(loss))

                sys.stdout.write('\r')
                sys.stdout.write(
                    '| Epoch: {} | Iteration: {}/{} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num + 1, iter_num + 1, len(dataloader_train),
                            float(classification_loss), float(regression_loss),
                            np.mean(loss_hist)))
                sys.stdout.flush()

                del classification_loss
                del regression_loss

            except Exception as e:
                print(e)
                continue

        print("\n| Saving current best model at epoch {}...".format(epoch_num +
                                                                    1))
        torch.save(
            retinanet.state_dict(),
            './checkpoints/{}_retinanet_{}.pt'.format(parser.dataset,
                                                      epoch_num + 1))

        if parser.dataset == 'coco':
            #print('Evaluating dataset')
            coco_eval.evaluate_coco(dataset_val, retinanet, device)

        elif parser.dataset == 'csv' and parser.csv_val is not None:
            #print('Evaluating dataset')
            mAP = csv_eval.evaluate(dataset_val, retinanet, device)

        scheduler.step(np.mean(epoch_loss))

    retinanet.eval()
    torch.save(retinanet.state_dict(), './checkpoints/model_final.pt')
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.',
                        default='coco')
    parser.add_argument('--coco_path',
                        help='Path to COCO directory',
                        default='coco')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument('--model',
                        help='Path to model (.pt) file.',
                        default='pretrained/coco_resnet_50_map_0_335.pt')
    parser.add_argument('--state_dict',
                        help='Path to state dict (.pth) file.',
                        default='pretrained/coco_resnet_50_map_0_335.pt')

    parser = parser.parse_args(args)

    if parser.dataset == 'coco':
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))
    elif parser.dataset == 'csv':
        dataset_val = CSVDataset(train_file=parser.csv_train,
                                 class_list=parser.csv_classes,
                                 transform=transforms.Compose(
                                     [Normalizer(), Resizer()]))
    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    retinanet = torch.load(parser.model)

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    def draw_caption(image, box, caption):

        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (255, 255, 255), 1)

    video_list = [
        'camera_1.mp4', 'camera_2.mp4', 'camera_5.mp4', 'camera_6.mp4'
    ]
    for video in video_list:
        vidcap = cv2.VideoCapture(video)
        fps = vidcap.get(cv2.cv.CV_CAP_PROP_FPS)
        success, image = vidcap.read()
        count = 0
        total_time = 0
        out = cv2.VideoWriter(
            "final_vid/{}_detected.avi".format(video.split(".")[0]),
            cv2.cv.CV_FOURCC(*'XVID'), fps, (image.shape[1], image.shape[0]))

        with torch.no_grad():
            retinanet.eval()
            while success:
                image_2 = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                transform = transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Normalize((0.485, 0.456, 0.406),
                                         (0.229, 0.224, 0.225))
                ])

                image_2 = transform(image_2)
                image_2 = image_2.view((-1, image_2.size()[0],
                                        image_2.size()[1], image_2.size()[2]))
                st = time.time()
                scores, classification, transformed_anchors = retinanet(
                    image_2.cuda().float())
                total_time += time.time() - st
                # print('Elapsed time: {}'.format(time.time()-st))
                idxs = np.where(scores > 0.5)
                # print(torch.max(image_2),torch.min(image_2))
                # print(np.max(image),np.min(image))

                for j in range(idxs[0].shape[0]):
                    bbox = transformed_anchors[idxs[0][j], :]
                    x1 = int(bbox[0])
                    y1 = int(bbox[1])
                    x2 = int(bbox[2])
                    y2 = int(bbox[3])
                    label_name = dataset_val.labels[int(
                        classification[idxs[0][j]])]
                    draw_caption(image, (x1, y1, x2, y2), label_name)

                    cv2.rectangle(image, (x1, y1), (x2, y2),
                                  color=(0, 0, 255),
                                  thickness=2)
                    # print(label_name)

                # cv2.imwrite('final_vid/frame_{}.jpg'.format(count), image)

                out.write(image)
                success, image = vidcap.read()
                count += 1
                # if(count == 100):
                # 	break
            out.release()
        print("Total time taken is = {}".format(
            str(timedelta(seconds=total_time))))
Esempio n. 12
0
import torch
from torchvision import datasets, models, transforms
import new_model
from dataloader import CocoDataset, CSVDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, UnNormalizer, Normalizer


dataset_train = CocoDataset("./data", set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
retinanet = new_model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)

retinanet.load_state_dict(torch.load("./saved_models/model_final_0.pth", map_location='cuda:0'))

print(retinanet)
print(retinanet.anchors)
Esempio n. 13
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description=
        'Simple visualizing script for visualize a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument('--ROI_model', help='Path to ROI model (.pt) file.')
    parser.add_argument('--QRCode_model',
                        help="path to QRcode model(.pt) file")

    parser = parser.parse_args(args)

    if parser.dataset == 'coco':
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))
    elif parser.dataset == 'csv':
        dataset_val = CSVDataset(train_file=parser.csv_val,
                                 class_list=parser.csv_classes,
                                 transform=transforms.Compose([
                                     Normalizer(ROI_mean, ROI_std),
                                     Resizer()
                                 ]))
    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    dataloader_val = DataLoader(dataset_val,
                                num_workers=1,
                                collate_fn=collater,
                                batch_sampler=None,
                                sampler=None)

    ROI_net = torch.load(parser.ROI_model)
    QRCode_net = torch.load(parser.QRCode_model)

    use_gpu = True

    if use_gpu:
        ROI_net = ROI_net.cuda()
        QRCode_net = QRCode_net.cuda(0)

    ROI_net.eval()
    QRCode_net.eval()

    unnormalize = UnNormalizer(ROI_mean, ROI_std)

    def draw_caption(image, box, caption):
        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (255, 255, 255), 1)

    for idx, data in enumerate(dataloader_val):
        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = ROI_net(
                data['img'].cuda().float())
            print('Elapsed time: {}'.format(time.time() - st))
            # if batch_size = 1, and batch_sampler, sampler is None, then no_shuffle, will use sequential index, then the get_image_name is OK.
            # otherwise, it will failed.
            fn = dataset_val.get_image_name(idx)
            print('fn of image:', fn)
            idxs = np.where(scores.cpu() > 0.5)
            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0))

            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
            print("image shape when drawcaption:", img.shape)
            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = dataset_val.labels[int(
                    classification[idxs[0][j]])]
                draw_caption(img, (x1, y1, x2, y2), label_name)
                cv2.rectangle(img, (x1, y1), (x2, y2),
                              color=(0, 0, 255),
                              thickness=2)

            if idxs[0].shape[0] == 1:
                origin_img = cv2.imread(fn)
                ph, pw, _ = img.shape
                ret = convert_predict_to_origin_bbox(origin_img, pw, ph, x1,
                                                     y1, x2, y2)
                if ret is None:
                    print("ERROR: convert predicted origin bbox error")
                    continue

                x1p, y1p, x2p, y2p = ret
                print("ROI predicted:", x1p, y1p, x2p, y2p)
                output_file.write(fn + ',' + str(x1p) + ',' + str(y1p) + ',' +
                                  str(x2p) + ',' + str(y2p) + ',ROI\n')
                print("!!!! FN {} saved!!!".format(fn))
                ROI = origin_img[y1p:y2p, x1p:x2p]
                cv2.rectangle(origin_img, (x1p, y1p), (x2p, y2p),
                              color=(0, 0, 255),
                              thickness=8)
                #import pdb
                #pdb.set_trace()
                ROI = ROI.astype(np.float32) / 255.0
                # normalize it
                ROI_normalized = (ROI - QRCode_mean) / QRCode_std
                #resize it
                rows, cols, cns = ROI_normalized.shape
                smallest_side = min(rows, cols)
                #rescale the image so the smallest side is min_side
                min_side = 600.0
                max_side = 900.0
                scale = min_side / smallest_side
                #check if the largest side is now greater than max_side, which can happen
                # when images have a large aspect ratio
                largest_side = max(rows, cols)
                if largest_side * scale > 900:
                    scale = max_side / largest_side

                # resize the image with the computed scale
                ROI_scale = skimage.transform.resize(
                    ROI_normalized,
                    (int(round(rows * scale)), int(round((cols * scale)))))
                rows, cols, cns = ROI_scale.shape

                pad_w = 32 - rows % 32
                pad_h = 32 - cols % 32

                ROI_padded = np.zeros(
                    (rows + pad_w, cols + pad_h, cns)).astype(np.float32)
                ROI_padded[:rows, :cols, :] = ROI_scale.astype(np.float32)
                x = torch.from_numpy(ROI_padded)
                print('x.shape:', x.shape)
                x = torch.unsqueeze(x, dim=0)
                print('x.shape after unsqueeze:', x.shape)
                x = x.permute(0, 3, 1, 2)
                print('x.shape after permute:', x.shape)

                scores, classification, transformed_anchors = QRCode_net(
                    x.cuda().float())
                print('scores:', scores)
                print('classification;', classification)
                print('transformed_anchors:', transformed_anchors)
                idxs = np.where(scores.cpu() > 0.5)
                predict_height, predict_width, _ = ROI_padded.shape

                for j in range(idxs[0].shape[0]):
                    bbox = transformed_anchors[idxs[0][j], :]
                    x1 = int(bbox[0])
                    y1 = int(bbox[1])
                    x2 = int(bbox[2])
                    y2 = int(bbox[3])
                    print("!!QRCode predicted bbox inside ROI:", x1, y1, x2,
                          y2)

                    ret = convert_predict_to_origin_bbox(
                        ROI, predict_width, predict_height, x1, y1, x2, y2)
                    if ret is None:
                        continue

                    qrcode_x1, qrcode_y1, qrcode_x2, qrcode_y2 = ret
                    print('qrcode(bbox):', qrcode_x1, qrcode_y1, qrcode_x2,
                          qrcode_y2)

                    qrcode_img_x1 = x1p + qrcode_x1
                    qrcode_img_y1 = y1p + qrcode_y1
                    qrcode_img_x2 = x1p + qrcode_x2
                    qrcode_img_y2 = y1p + qrcode_y2
                    print('!!!QRCode in image:', qrcode_img_x1, qrcode_img_y1,
                          qrcode_img_x2, qrcode_img_y2)
                    cv2.rectangle(origin_img, (qrcode_img_x1, qrcode_img_y1),
                                  (qrcode_img_x2, qrcode_img_y2),
                                  color=(255, 0, 0),
                                  thickness=8)
                    cv2.imwrite('origin_img_qrcode.png', origin_img)
                    resized = cv2.resize(origin_img, (800, 600))
                    cv2.imshow('result', resized)
            else:
                not_processed_file.write(fn + ",,,,,\n")

            if debug:
                cv2.imshow('img', img)
                cv2.setWindowTitle('img', fn)
                key = cv2.waitKey(0)
                if 'q' == chr(key & 255):
                    exit(0)

    output_file.close()
    not_processed_file.close()
Esempio n. 14
0
def main(args=None):
#def main(epoch):
	parser     = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

	parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
	parser.add_argument('--coco_path', help='Path to COCO directory')
	parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)')
	parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
	parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')

	parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
	parser.add_argument('--epochs', help='Number of epochs', type=int, default=100)

	#parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint')
	parser.add_argument('--start-epoch', default=0, type=int, help='manual epoch number (useful on restarts)')

	parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)')

	parser = parser.parse_args(args)
	#args = parser.parse_args()        
	#parser = parser.parse_args(epoch)

	# Create the data loaders
	if parser.dataset == 'coco':

		if parser.coco_path is None:
			raise ValueError('Must provide --coco_path when training on COCO,')

		dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
		dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()]))

	elif parser.dataset == 'csv':

		if parser.csv_train is None:
			raise ValueError('Must provide --csv_train when training on COCO,')

		if parser.csv_classes is None:
			raise ValueError('Must provide --csv_classes when training on COCO,')


		dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))

		if parser.csv_val is None:
			dataset_val = None
			print('No validation annotations provided.')
		else:
			dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))

	else:
		raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

	sampler = AspectRatioBasedSampler(dataset_train, batch_size=4, drop_last=False)
	dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)

	if dataset_val is not None:
		sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
		dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)

	# Create the model
	if parser.depth == 18:
		retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
	elif parser.depth == 34:
		retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
	elif parser.depth == 50:
		retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
	elif parser.depth == 101:
		retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
	elif parser.depth == 152:
		retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
	else:
		raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')		

	use_gpu = True

	if use_gpu:
		retinanet = retinanet.cuda()

	#retinanet().load_state_dict(torch.load('/users/wenchi/ghwwc/Pytorch-retinanet-master/resnet50-19c8e357.pth'))
       
	#if True:
           #print('==> Resuming from checkpoint..')
           #checkpoint = torch.load('/users/wenchi/ghwwc/Pytorch-retinanet-master/coco_retinanet_2.pt')
           #retinanet().load_state_dict(checkpoint)
           #best_loss = checkpoint['loss']
           #start_epoch = checkpoint['epoch']
        
	
	retinanet = torch.nn.DataParallel(retinanet).cuda()

	retinanet.training = True

	#optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
	optimizer = optim.SGD(retinanet.parameters(), lr=1e-5)

	scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)

	loss_hist = collections.deque(maxlen=500)

	retinanet.train()
	#retinanet.freeze_bn()               #for train from a middle state
	retinanet.module.freeze_bn()       #for train from the very beginning

	print('Num training images: {}'.format(len(dataset_train)))

	for epoch_num in range(parser.start_epoch, parser.epochs):

		if parser.resume:
		    if os.path.isfile(parser.resume):
                        print("=>loading checkpoint '{}'".format(parser.resume))
                        checkpoint = torch.load(parser.resume)
                        print(parser.start_epoch)
                        #parser.start_epoch = checkpoint['epoch']
                        #retinanet.load_state_dict(checkpoint['state_dict'])
                        retinanet=checkpoint
                        #retinanet.load_state_dict(checkpoint)
                        print(retinanet)
                        #optimizer.load_state_dict(checkpoint)
                        print("=> loaded checkpoint '{}' (epoch {})".format(parser.resume, checkpoint))
		    else:
                        print("=> no checkpoint found at '{}'".format(parser.resume))

		retinanet.train()
		retinanet.freeze_bn()
		#retinanet.module.freeze_bn()

		if parser.dataset == 'coco':

			print('Evaluating dataset')

			coco_eval.evaluate_coco(dataset_val, retinanet)

		elif parser.dataset == 'csv' and parser.csv_val is not None:

			print('Evaluating dataset')

			mAP = csv_eval.evaluate(dataset_val, retinanet)
		
		epoch_loss = []
		
		for iter_num, data in enumerate(dataloader_train):
			try:
				optimizer.zero_grad()

				classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot'].cuda()])

				classification_loss = classification_loss.mean()
				regression_loss = regression_loss.mean()

				loss = classification_loss + regression_loss
				
				if bool(loss == 0):
					continue

				loss.backward()

				torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

				optimizer.step()

				loss_hist.append(float(loss))

				epoch_loss.append(float(loss))

				print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))
				
				del classification_loss
				del regression_loss
			except Exception as e:
				print(e)
				continue

		if parser.dataset == 'coco':

			print('Evaluating dataset')

			coco_eval.evaluate_coco(dataset_val, retinanet)

		elif parser.dataset == 'csv' and parser.csv_val is not None:

			print('Evaluating dataset')

			mAP = csv_eval.evaluate(dataset_val, retinanet)

		
		scheduler.step(np.mean(epoch_loss))	

		#torch.save(retinanet.module, '{}_retinanet_101_{}.pt'.format(parser.dataset, epoch_num))
		torch.save(retinanet, '{}_retinanet_dilation_experiment1_{}.pt'.format(parser.dataset, epoch_num))
		name = '{}_retinanet_dilation_experiment1_{}.pt'.format(parser.dataset, epoch_num)
		parser.resume = '/users/wenchi/ghwwc/pytorch-retinanet-master_new/name'

	retinanet.eval()

	torch.save(retinanet, 'model_final_dilation_experiment1.pt'.format(epoch_num))
Esempio n. 15
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument('--model', help='Path to model (.pt) file.')

    parser = parser.parse_args(args)

    if parser.dataset == 'coco':
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))
    elif parser.dataset == 'csv':
        dataset_val = CSVDataset(train_file=parser.csv_val,
                                 class_list=parser.csv_classes,
                                 transform=transforms.Compose(
                                     [Normalizer(), Resizer()]))
    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler_val = AspectRatioBasedSampler(dataset_val,
                                          batch_size=1,
                                          drop_last=False)
    dataloader_val = DataLoader(dataset_val,
                                num_workers=1,
                                collate_fn=collater,
                                batch_sampler=sampler_val)

    #retinanet = torch.load(parser.model)
    retinanet = model.resnet50(num_classes=80, pretrained=True)
    retinanet.load_state_dict(torch.load(parser.model))

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet.eval()

    if not os.path.isdir("./detection_files"):
        os.makedirs("./detection_files")

    for idx, data in enumerate(dataloader_val):

        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = retinanet(
                data['img'].cuda().float())
            print('Elapsed time: {}'.format(time.time() - st))
            idxs = np.where(scores > 0.35)
            img_name = data['img_name'].split('.')[0]
            with open("./detection_files/" + img_name + '.txt', 'w') as f:
                for j in range(idxs[0].shape[0]):
                    bbox = transformed_anchors[idxs[0][j], :]
                    x1 = int(bbox[0])
                    y1 = int(bbox[1])
                    x2 = int(bbox[2])
                    y2 = int(bbox[3])
                    label_name = dataset_val.labels[int(
                        classification[idxs[0][j]])]
                    f.write('{},{},{},{},label_name'.format(
                        x1, y1, x2, y2, label_name))
                    if j < idxs[0].shape[0] - 1:
                        f.write('\n')
def main(args=None):

    parser     = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',default="csv", help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path',default="/home/mayank-s/PycharmProjects/Datasets/coco",help='Path to COCO directory')
    parser.add_argument('--csv_train',default="berkely_ready_to_train_for_retinanet_pytorch.csv", help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',default="berkely_class.csv", help='Path to file containing class list (see readme)')
    parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')

    parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
    parser.add_argument('--epochs', help='Number of epochs', type=int, default=200)
    # parser.add_argument('--resume', default=0, help='resume from checkpoint')
    parser = parser.parse_args(args)
    # print(args.resume)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path, set_name='train2014', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
        dataset_val = CocoDataset(parser.coco_path, set_name='val2014', transform=transforms.Compose([Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError('Must provide --csv_classes when training on COCO,')


        dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))

    else:
        raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train, batch_size=4, drop_last=False)
    dataloader_train = DataLoader(dataset_train, num_workers=0, collate_fn=collater, batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
        dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
    else:
        raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    # if use_gpu:
    if torch.cuda.is_available():
        retinanet = retinanet.cuda()

        retinanet = torch.nn.DataParallel(retinanet).cuda()

        retinanet.training = True
    ###################################################################################3
    # # args.resume=0
    # Resume_model = False
    # start_epoch=0
    # if Resume_model:
    #     print('==> Resuming from checkpoint..')
    #     checkpoint = torch.load('./checkpoint/saved_with_epochs/retina_fpn_1')
    #     retinanet.load_state_dict(checkpoint['net'])
    #     best_loss = checkpoint['loss']
    #     start_epoch = checkpoint['epoch']
    #     print('Resuming from epoch:{ep}  loss:{lp}'.format(ep=start_epoch, lp=best_loss))
    #####################################################################################
    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    retinanet = torch.load("./checkpoint/retina_fpn_1")

    # epoch_num=start_epoch
    for epoch_num in range(parser.epochs):

        # retinanet.train()retina_fpn_1
        # retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()
                if torch.cuda.is_available():
                    classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']])
                else:
                    classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        # print("Saving model...")
        # name = "./checkpoint/retina_fpn_" + str(epoch_num)
        # torch.save(retinanet, name)
        # ###################################################################333
        print('Saving..')
        state = {
            'net': retinanet.module.state_dict(),
            'loss': loss_hist,
            'epoch': epoch_num,
        }
        if not os.path.isdir('checkpoint/saved_with_epochs'):
            os.mkdir('checkpoint/saved_with_epochs')
        # checkpoint_path="./checkpoint/Ckpt_"+
        name = "./checkpoint/saved_with_epochs/retina_fpn_" + str(epoch_num)
        torch.save(state, name)
        # torch.save(state, './checkpoint/retinanet.pth')
        #####################################################################

        '''if parser.dataset == 'coco':
Esempio n. 17
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        default="csv",
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        default="./data/train_only.csv",
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        default="./data/classes.csv",
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        default="./data/train_only.csv",
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )
    parser.add_argument('--voc_train',
                        default="./data/voc_train",
                        help='Path to containing images and annAnnotations')
    parser.add_argument('--voc_val',
                        default="./data/bov_train",
                        help='Path to containing images and annAnnotations')
    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=101)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=40)

    parser = parser.parse_args(args)
    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))
    elif parser.dataset == 'voc':
        if parser.voc_train is None:
            raise ValueError(
                'Must provide --voc_train when training on PASCAL VOC,')
        dataset_train = XML_VOCDataset(
            img_path=parser.voc_train + 'JPEGImages/',
            xml_path=parser.voc_train + 'Annotations/',
            class_list=class_list,
            transform=transforms.Compose(
                [Normalizer(), Augmenter(),
                 ResizerMultiScale()]))

        if parser.voc_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = XML_VOCDataset(
                img_path=parser.voc_val + 'JPEGImages/',
                xml_path=parser.voc_val + 'Annotations/',
                class_list=class_list,
                transform=transforms.Compose([Normalizer(),
                                              Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=1,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=2,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=2,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-4)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=15,
                                                     verbose=True,
                                                     mode="max")
    #scheduler = optim.lr_scheduler.StepLR(optimizer,8)
    loss_hist = collections.deque(maxlen=1024)

    retinanet.train()
    retinanet.module.freeze_bn()
    if not os.path.exists("./logs"):
        os.mkdir("./logs")
    log_file = open("./logs/log.txt", "w")
    print('Num training images: {}'.format(len(dataset_train)))
    best_map = 0
    print("Training models...")
    for epoch_num in range(parser.epochs):

        #scheduler.step(epoch_num)
        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            #print('iter num is: ', iter_num)
            try:
                #print(csv_eval.evaluate(dataset_val[:20], retinanet)[0])
                #print(type(csv_eval.evaluate(dataset_val, retinanet)))
                #print('iter num is: ', iter_num % 10 == 0)
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss
                #print(loss)

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))
                if iter_num % 50 == 0:
                    print(
                        'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                        .format(epoch_num, iter_num,
                                float(classification_loss),
                                float(regression_loss), np.mean(loss_hist)))
                    log_file.write(
                        'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f} \n'
                        .format(epoch_num, iter_num,
                                float(classification_loss),
                                float(regression_loss), np.mean(loss_hist)))
                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)
        elif parser.dataset == 'voc' and parser.voc_val is not None:

            print('Evaluating dataset')

            mAP = voc_eval.evaluate(dataset_val, retinanet)

        try:
            is_best_map = mAP[0][0] > best_map
            best_map = max(mAP[0][0], best_map)
        except:
            pass
        if is_best_map:
            print("Get better map: ", best_map)

            torch.save(retinanet.module,
                       './logs/{}_scale15_{}.pt'.format(epoch_num, best_map))
            shutil.copyfile(
                './logs/{}_scale15_{}.pt'.format(epoch_num, best_map),
                "./best_models/model.pt")
        else:
            print("Current map: ", best_map)
        scheduler.step(best_map)
    retinanet.eval()

    torch.save(retinanet, './logs/model_final.pt')
Esempio n. 18
0
from torch.utils.data import DataLoader
from dataloader import CocoDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, Normalizer

from example import eval, model

if __name__ == '__main__':

    data_type = 'coco'
    data_root_dir = '/home/ubuntu/data'
    # model_depth = 50
    epoch_max = 100
    batch_size = 1

    dataset_train = CocoDataset(data_root_dir,
                                set_name='train2017',
                                transform=transforms.Compose(
                                    [Normalizer(),
                                     Augmenter(),
                                     Resizer()]))
    dataset_val = CocoDataset(data_root_dir,
                              set_name='val2017',
                              transform=transforms.Compose(
                                  [Normalizer(), Resizer()]))

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=batch_size,
                                      drop_last=True)
    train_data_loader = DataLoader(dataset_train,
                                   num_workers=8,
                                   collate_fn=collater,
                                   batch_sampler=sampler)
    sampler_val = AspectRatioBasedSampler(dataset_val,
def train(batch_size=2, learning_rate=1e-2, train_epoch=100):
    # Normalizer(), Augmenter(), Resizer() 各转换时按顺序进行的
    transform = transforms.Compose([Normalizer(), Augmenter(), Resizer()])
    dataset = CocoDataset('./data/coco/', 'train2017', transform)
    data_loader = Data.DataLoader(dataset, 2, num_workers=2, shuffle=True, \
                                  collate_fn=collater, pin_memory=True)
    dataset_size = len(dataset)
    print('sample number:', dataset_size)
    print('epoch size:', dataset_size / batch_size)

    retinanet = RetinaNet()
    anchor = Anchor()
    focal_loss = FocalLoss()

    if cuda:
        retinanet = torch.nn.DataParallel(retinanet).cuda()
        anchor = anchor.cuda()
        focal_loss = focal_loss.cuda()
    retinanet.module.freeze_bn()

    optimizer = torch.optim.SGD(retinanet.parameters(),
                                lr=learning_rate,
                                momentum=0.9,
                                weight_decay=1e-4)
    '''
    class torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', 
        factor=0.1, patience=10, verbose=False, threshold=0.0001, 
        threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08)
    :param optimer: 指的是网络的优化器
    :param mode: (str), 可选择‘min’或者‘max’,min表示当监控量停止下降的时候,学习率将减小,
                 max表示当监控量停止上升的时候,学习率将减小。默认值为‘min’
    :param factor: 学习率每次降低多少,new_lr = old_lr * factor
    :param patience=10: 容忍网路的性能不提升的次数,高于这个次数就降低学习率
    :param verbose: (bool), 如果为True,则为每次更新向stdout输出一条消息。 默认值:False
    :param threshold: (float), 测量新最佳值的阈值,仅关注重大变化。 默认值:1e-4
    :param cooldown: 减少lr后恢复正常操作之前要等待的时期数。 默认值:0。
    :param min_lr: 学习率的下限
    :param eps: 适用于lr的最小衰减。 如果新旧lr之间的差异小于eps,则忽略更新。 默认值:1e-8。
    ————————————————
    版权声明:本文为CSDN博主「张叫张大卫」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。
    原文链接:https://blog.csdn.net/weixin_40100431/article/details/84311430
    '''
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    for epoch_num in range(train_epoch):
        epoch_loss = []

        for iter_num, data in enumerate(data_loader):
            iter_time = time.time()
            images, annots, scales = data
            if cuda:
                images = images.cuda()
                annots = annots.cuda()
                scales = scales.cuda()

            total_anchors = anchor(data['img'])
            classification, localization = retinanet(images)

            cls_loss, loc_loss = \
                focal_loss(classification, localization, total_anchors, annots)
            loss = cls_loss + loc_loss
            epoch_loss.append(float(loss))

            optimizer.zero_grad()
            loss.backward()
            '''
            关于torch.nn.utils.clip_grad_norm_(): 
            In some cases you may find that each layer of your net amplifies the 
            gradient it receives. This causes a problem because the lower layers of 
            the net then get huge gradients and their updates will be far too large 
            to allow the model to learn anything.

            This function ‘clips’ the norm of the gradients by scaling the gradients 
            down by the same amount in order to reduce the norm to an acceptable 
            level. In practice this places a limit on the size of the parameter 
            updates.

            The hope is that this will ensure that your model gets reasonably 
            sized gradients and that the corresponding updates will allow the 
            model to learn.
            引用自https://discuss.pytorch.org/t/about-torch-nn-utils-clip-grad-norm/13873 
            感受一下来自 PyTorch 讨论社区的窒息攻防,2333。。
            '''
            torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
            optimizer.step()

            print('|', 'Epoch:', epoch_num + 1, '|', 'Iter:', iter_num + 1,
                  '|', 'cls loss:', float(cls_loss), '|', 'loc loss:',
                  float(loc_loss), '|', 'loss:', float(loss), '|', 'lr:',
                  float(optimizer.learning_rate), '|', 'time:',
                  time.time() - iter_time)

        scheduler.step(np.mean(epoch_loss))

        print('Saving parameters in model on epoch', epoch_num + 1)
        torch.save(
            retinanet.state_dict(),
            './param/param_epoch' + str(epoch_num + 1).zfill(3) + '.pkl')
Esempio n. 20
0
def main(args=None):

    parser = argparse.ArgumentParser(description='Training script for training a EfficientDet network.')

    parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
    parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')

    parser.add_argument('--phi', help='EfficientNet scaling coefficient.', type=int, default=0)
    parser.add_argument('--batch-size', help='Batch size', type=int, default=8)
    parser.add_argument('--epochs', help='Number of epochs', type=int, default=100)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer(img_size=512)]))
        dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer(img_size=512)]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO')

        if parser.csv_classes is None:
            raise ValueError('Must provide --csv_classes when training on COCO')


        dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))

    else:
        raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False)
    dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
        dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)

    # Create the model
    efficientdet = model.efficientdet(num_classes=dataset_train.num_classes(), pretrained=True, phi=parser.phi)      

    use_gpu = True

    if use_gpu:
        efficientdet = efficientdet.cuda()
    
    efficientdet = torch.nn.DataParallel(efficientdet).cuda()

    efficientdet.training = True

    optimizer = optim.Adam(efficientdet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)

    loss_hist = collections.deque(maxlen=500)

    efficientdet.train()
    efficientdet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        efficientdet.train()
        efficientdet.module.freeze_bn()
        
        epoch_loss = []
        
        print(('\n' + '%10s' * 5) % ('Epoch', 'gpu_mem', 'Loss', 'cls', 'rls'))
        
        pbar = tqdm(enumerate(dataloader_train), total=len(dataloader_train))
        for iter_num, data in pbar:
            try:
                optimizer.zero_grad()
                
                classification_loss, regression_loss = efficientdet([data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss
                
                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(efficientdet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))
                
                loss = (loss * iter_num) / (iter_num + 1)  # update mean losses
                mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0  # (GB)
                s = ('%10s' * 2 + '%10.3g' * 3) % (
                    '%g/%g' % (epoch_num, parser.epochs - 1), '%.3gG' % mem, np.mean(loss_hist), float(regression_loss), float(classification_loss))
                pbar.set_description(s)
                
                del classification_loss
                del regression_loss
            except Exception as e:
                raise(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, efficientdet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, efficientdet)

        
        scheduler.step(np.mean(epoch_loss))    

        torch.save(efficientdet.module, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))

    efficientdet.eval()

    torch.save(efficientdet, 'model_final.pt'.format(epoch_num))
Esempio n. 21
0
def main(args=None):
    parser = argparse.ArgumentParser(description='Simple visualizing script for visualize a RetinaNet network.')

    parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
    parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')

    parser.add_argument('--model', help='Path to model (.pt) file.')

    parser = parser.parse_args(args)

    if parser.dataset == 'coco':
        dataset_val = CocoDataset(parser.coco_path, set_name='val2017',
                                  transform=transforms.Compose([Normalizer(), Resizer()]))
    elif parser.dataset == 'csv':
        dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes,
                                 transform=transforms.Compose([Normalizer(mean, std), Resizer()]))
    else:
        raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

    #sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
    #dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val)
    dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=None, sampler=None)

    retinanet = torch.load(parser.model)

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet.eval()

    unnormalize = UnNormalizer(mean, std)

    def draw_caption(image, box, caption):
        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

    for idx, data in enumerate(dataloader_val):
        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = retinanet(data['img'].cuda().float())
            print('Elapsed time: {}'.format(time.time() - st))
            # if batch_size = 1, and batch_sampler, sampler is None, then no_shuffle, will use sequential index, then the get_image_name is OK.
            # otherwise, it will failed.
            fn = dataset_val.get_image_name(idx)
            print('fn of image:', fn)
            idxs = np.where(scores.cpu() > 0.5)
            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0))

            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
            print("image shape when drawcaption:", img.shape)
            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = dataset_val.labels[int(classification[idxs[0][j]])]
                draw_caption(img, (x1, y1, x2, y2), label_name)
                cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)

            if idxs[0].shape[0] == 1:
                origin_img = cv2.imread(fn)
                ret = convert_predict_to_origin_bbox(origin_img, img, x1, y1, x2, y2)
                if ret is None:
                    continue

                x1p, y1p, x2p, y2p = ret
                output_file.write(fn+','+str(x1p)+','+str(y1p)+','+str(x2p)+','+str(y2p)+',ROI\n')
                print("!!!! FN {} saved!!!".format(fn))
            else:
                not_processed_file.write(fn+",,,,,\n")

            if debug:
                cv2.imshow('img', img)
                cv2.setWindowTitle('img', fn)
                key = cv2.waitKey(0)
                if 'q'==chr(key & 255):
                    exit(0)

    output_file.close()
    not_processed_file.close()
Esempio n. 22
0
    retinanet.load_state_dict(
        torch.load(weight_file_path, map_location=device),
        strict=False)  # Initialisng Model with loaded weights
    print('model initialized..')

    return retinanet, device


if __name__ == '__main__':

    # Hyperparameters
    val_batch_size = 4
    num_workers = 3
    threshold = 0.05

    model_name = 'retinanet'

    images_folder = '/content/data/val2017'
    test_json_file = '/content/data/test_coco_dataset.json'

    # Load test image folder with corresponding coco json file to test_dataset
    dataset_val = CocoDataset(images_folder,
                              test_json_file,
                              transform=transforms.Compose(
                                  [Normalizer(), Resizer()]))

    num_classes = dataset_val.num_classes()

    # Run test function
    main()
Esempio n. 23
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.',
                        default="coco")
    parser.add_argument('--coco_path',
                        help='Path to COCO directory',
                        default="/default/path/to/COCO2017/")

    parser = parser.parse_args(args)

    dataset_train = CocoDataset(parser.coco_path, set_name='train2017')

    annot_dict = {}

    for k, v in dataset_train.coco.catToImgs.iteritems():
        # aa = Counter(v)
        annot_dict[k] = len(v)

    print annot_dict

    # fig = plt.figure()
    # axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])  # left, bottom, width, height (range 0 to 1)
    # axes.plot(np.arange(0,80,1), np.divide(annot_dict.values(), float(len(dataset_train.image_ids)) ), 'r')
    # axes.plot(np.arange(0,80,1), np.divide(annot_dict.values(), float(len(dataset_train.image_ids)) ), 'r')
    # axes.set_xlabel('Class Id')
    # axes.set_ylabel('Annot Count')
    # axes.set_title('MiniCoco vs Coco 2017 train set')
    # fig.show()
    # fig.savefig("minicoco_vs_coco_train2017_annot.png")

    # here extract object sizes.
    size_dict = {}

    for k, v in dataset_train.coco.anns.iteritems():
        # aa = Counter(v)
        area = v['bbox'][2] * v['bbox'][3]
        cat = v['category_id']
        if area < areaRng[0]:
            kk = str(cat) + "_S"
        elif area < areaRng[1]:
            kk = str(cat) + "_M"
        else:
            kk = str(cat) + "_L"

        # update counts
        if size_dict.has_key(kk):
            size_dict[kk] += 1
        else:
            size_dict[kk] = 1

    print size_dict

    # now sample!!
    imgs_list = []
    ratio_list = []
    best_run_index = 0
    best_diff = 10000
    keys = []
    # get all keys in coco train set, total image count!
    for k, v in dataset_train.coco.imgToAnns.iteritems():
        keys.append(k)

    for rr in range(RUN_COUNT):
        imgs = {}

        # shuffle keys
        shuffle(keys)

        # select first N image
        for i in keys[:MAX_IMG_COUNT]:
            imgs[i] = dataset_train.coco.imgToAnns[i]

        imgs_list.append(imgs)

        # now check for category based annotations
        # annot_sampled = np.zeros(90, int)
        annot_sampled = {}
        for k, v in imgs.iteritems():
            for it in v:
                area = it['bbox'][2] * it['bbox'][3]
                cat = it['category_id']
                if area < areaRng[0]:
                    kk = str(cat) + "_S"
                elif area < areaRng[1]:
                    kk = str(cat) + "_M"
                else:
                    kk = str(cat) + "_L"

                if annot_sampled.has_key(kk):
                    annot_sampled[kk] += 1
                else:
                    annot_sampled[kk] = 1

        print annot_sampled

        # calculate ratios
        ratios_obj_count = {}
        ratios_obj_size = {}

        failed_run = False
        for k, v in size_dict.iteritems():
            if not annot_sampled.has_key(k):
                failed_run = True
                break

            ratios_obj_count[k] = annot_sampled[k] / float(v)
        if failed_run:
            continue

        ratio_list.append(ratios_obj_count)

        min_ratio = min(ratios_obj_count.itervalues())
        max_ratio = max(ratios_obj_count.itervalues())

        diff = max_ratio - min_ratio

        if diff < best_diff:
            best_diff = diff
            best_run_index = rr

        print best_diff, best_run_index

    # print imgs_list[best_run_index]

    # now write to csv file
    csv_file = open("mscoco_sampled.csv", 'w')
    write_str = ""

    for k, v in imgs_list[best_run_index].iteritems():
        f_name = dataset_train.coco.imgs[k]['file_name']
        for ann in v:
            bbox = ann['bbox']
            class_id = ann['category_id']
            write_str = f_name+','+str(bbox[0])+','+str(bbox[1])+','+str(bbox[2])+','+str(bbox[3])+','+ \
               str(dataset_train.labels[dataset_train.coco_labels_inverse[class_id]]) + ',' + '0' + '\n'

            csv_file.write(write_str)

    csv_file.close()
Esempio n. 24
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=2,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        torch.save(
            retinanet.module,
            '{}_retinanet_dilation_{}.pt'.format(parser.dataset, epoch_num))

    retinanet.eval()

    torch.save(retinanet, 'model_final_dilation.pt'.format(epoch_num))
Esempio n. 25
0
def main():
    data_type = 'coco'
    data_root_dir = '/data/data_coco/'
    # model_depth = 50
    epoch_max = 100
    batch_size = 8

    if data_type == 'coco':
        dataset_train = CocoDataset(data_root_dir,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(data_root_dir,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))
    else:
        print('暂不支持')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=batch_size,
                                      drop_last=True)
    loader_train = DataLoader(dataset_train,
                              num_workers=8,
                              collate_fn=collater,
                              batch_sampler=sampler)
    sampler_val = AspectRatioBasedSampler(dataset_val,
                                          batch_size=batch_size,
                                          drop_last=True)
    loader_val = DataLoader(dataset_val,
                            num_workers=8,
                            collate_fn=collater,
                            batch_sampler=sampler_val)

    retinanet = model.retinanet_50(dataset_train.num_classes(),
                                   pretrained=True)

    retinanet = retinanet.cuda()
    optimizer = torch.optim.Adam(retinanet.parameters(), lr=1e-4)
    # optimizer = torch.optim.SGD(retinanet.parameters(), lr=1e-4, momentum=0.9, weight_decay=5e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True,
                                                           factor=0.5)

    model_pretrain_dir = './model/model_final.pt'
    if os.path.exists(model_pretrain_dir):
        print('pretrain model exist!')
        retinanet = torch.load(model_pretrain_dir)

    print('train images num: {}'.format(len(loader_train) * batch_size))
    for epoch_num in range(epoch_max):
        retinanet.train()
        epoch_loss = []
        for iter_num, data in enumerate(loader_train):
            optimizer.zero_grad()
            input_tensor = [data['img'].cuda().float(), data['annot']]
            classification_loss, regression_loss = retinanet(input_tensor)
            classification_loss = classification_loss.mean()
            regression_loss = regression_loss.mean()

            loss = classification_loss + regression_loss

            epoch_loss.append(float(loss))

            if loss.item() == 0:
                continue

            loss.backward()
            optimizer.step()

            print(
                'Epoch:{}/{} | Iters:{}/{} | C loss:{:.4f} | R loss:{:.4f} | Current loss:{:.4f} | Current LR:{:.7f}'
                .format(epoch_num + 1, epoch_max, iter_num + 1,
                        len(loader_train), float(classification_loss),
                        float(regression_loss), np.mean(epoch_loss),
                        optimizer.param_groups[0]['lr']))
            del classification_loss
            del regression_loss

        # 每个epoch 进行验证一次
        eval.eval_coco(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))
        torch.save(
            retinanet,
            './model/{}_retinanet_{}.pt'.format(data_type, epoch_num + 1))
    retinanet.eval()
    torch.save(retinanet, './model/model_final.pt')
Esempio n. 26
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

    #	dataset_train = CocoDataset(parser.coco_path, set_name='trainval35k', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val5k',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    #sampler = AspectRatioBasedSampler(dataset_train, batch_size=16, drop_last=False)
    #dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_val.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_val.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_val.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_val.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_val.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True
    retinanet.load_state_dict(
        torch.load("coco_resnet_50_map_0_335_state_dict.pt",
                   encoding='latin1'))
    if use_gpu:
        retinanet = retinanet.cuda()


#	retinanet = torch.nn.DataParallel(retinanet).cuda()

#retinanet.training = True

#optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

#scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)

#loss_hist = collections.deque(maxlen=500)

    retinanet.eval()
    #retinanet.module.freeze_bn()
    #	print('Num training images: {}'.format(len(dataset_train)))

    if parser.dataset == 'coco':

        print('Evaluating dataset')

        coco_eval.evaluate_coco(dataset_val, retinanet)

    elif parser.dataset == 'csv' and parser.csv_val is not None:

        print('Evaluating dataset')

        mAP = csv_eval.evaluate(dataset_val, retinanet)
Esempio n. 27
0
def main(args=None):
    parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
    parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')

    parser.add_argument('--model', help='Path to model (.pt) file.')

    parser = parser.parse_args(args)

    if parser.dataset == 'coco':
        dataset_val = CocoDataset(parser.coco_path, set_name='val2017',
                                  transform=transforms.Compose([Normalizer(), Resizer()]))
    elif parser.dataset == 'csv':
        dataset_val = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes,
                                 transform=transforms.Compose([Normalizer(), Resizer()]))
    else:
        raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

    sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
    dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val)

    retinanet = torch.load(parser.model)

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet.eval()

    unnormalize = UnNormalizer()

    def draw_caption(image, box, caption):

        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

    for idx, data in enumerate(dataloader_val):

        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = retinanet(data['img'].cuda().float())
            print('Elapsed time: {}'.format(time.time() - st))
            idxs = np.where(scores > 0.5)
            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0))

            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = dataset_val.labels[int(classification[idxs[0][j]])]
                draw_caption(img, (x1, y1, x2, y2), label_name)

                cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)
                print(label_name)

            cv2.imshow('img', img)
            cv2.waitKey(0)
Esempio n. 28
0
def main(args=None):
    """
	todo s:
		################## ToDo ########################
		1. download more images using image_utils and isic-arhive. Also, use more online resources for data.
		2. Use Augmentations fromPytorchSSD using pascal voc data format.
		3. use pair augmentation, random erase
		4. download more images for each classes.
		5. preprocessing and feature extraction
		6. bigger 500 px image size. big image tends to make
		7. use ResNet-152 for better peromance.
		8. adversarial training, use crosssentropy, focal loss
		9. use similar optimizatio adam and learning rate schedule like wider face pedestrian dataset.
		10.BGR to RGB
		11. multi scale testing.
		12. soft nms
		13. save model and load from previous epoch
		14. https://github.com/uoguelph-mlrg/Cutout
	"""

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')
    parser.add_argument('--dataset',
                        default="wider_pedestrain",
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument(
        '--coco_path',
        default=
        "/media/milton/ssd1/research/competitions/data_wider_pedestrian/",
        help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=152)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=200)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train_wider_pedestrian',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val_wider_pedestrian',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'wider_pedestrain':
        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train_wider_pedestrian',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val_wider_pedestrian',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

        # dataset_test = CocoDataset(parser.coco_path, set_name='test_wider_pedestrian',
        # 						  transform=transforms.Compose([Normalizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))
        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')
    batch_size = 4
    num_classes = 1
    print("Total Train:{}".format(len(dataset_train)))
    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=batch_size,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=4,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    print("Total Validation:{}".format(len(dataset_val)))
    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=batch_size,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=1,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)
    best_saved_model_name = "checkpoint/resnet{}_{}_best_model.pth".format(
        parser.depth, parser.dataset)
    best_mAP = 0
    start_epoch = 0

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=num_classes, pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=num_classes, pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=num_classes, pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=num_classes, pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=num_classes, pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')
    use_gpu = True

    optimizer = optim.Adam(retinanet.parameters(), lr=0.001)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    if use_gpu:
        retinanet_sk = copy.deepcopy(
            retinanet.cpu()
        )  # will hold the raw model, later it will be loaded with new model weight to test in seperate gpus.
        retinanet = retinanet.cuda()
        retinanet = torch.nn.DataParallel(retinanet)

    try:
        print("Loading model and optimizer from checkpoint '{}'".format(
            best_saved_model_name))
        checkpoint = torch.load(best_saved_model_name)
        retinanet.load_state_dict(checkpoint['model'].state_dict())
        best_mAP = checkpoint['map']
        start_epoch = checkpoint['epoch']
        optimizer.load_state_dict(checkpoint['optimizer'].state_dict())

        # optimizer.load_state_dict(checkpoint['optimizer_state'])
        print("Loaded checkpoint '{}' (epoch {})".format(
            best_saved_model_name, checkpoint['epoch']))
        start_epoch = checkpoint['epoch']
        print(
            '==> Resuming Sucessfully from checkpoint from epoch {} with mAP {:.7f}..'
            .format(start_epoch, best_mAP))

    except Exception as e:
        print("\nExcpetion: {}".format(repr(e)))
        print('\n==> Resume Failed...')
    retinanet.training = True

    total_loss = losses.loss

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    freeze_bn(retinanet)

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(start_epoch, parser.epochs):

        retinanet.train()
        freeze_bn(retinanet)

        epoch_loss = []
        # threshold=0.05
        for iter_num, data in enumerate(dataloader_train):
            iter_per_epoch = len(dataset_train) / batch_size

            step = epoch_num * iter_per_epoch + iter_num

            if iter_num == 0:
                print('Iteration PEr eEpoch: {}'.format(iter_per_epoch))

            try:
                optimizer.zero_grad()

                classification, regression, anchors = retinanet(
                    data['img'].cuda().float())

                classification_loss, regression_loss = total_loss(
                    classification, regression, anchors, data['annot'])

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))
                writer.add_scalar('Classification loss', classification_loss,
                                  step)
                writer.add_scalar('Regression loss', regression_loss, step)
                writer.add_scalar("Running Loss", np.mean(loss_hist), step)

                msg = 'Epoch:{}, Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(
                    epoch_num, float(classification_loss),
                    float(regression_loss), np.mean(loss_hist))
                progress_bar(iter_num, iter_per_epoch, msg)
                # print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))
                # break
                # if iter_num==0:
                # 	break
                if iter_num % 500 == 0:
                    save_model(retinanet, optimizer, best_saved_model_name,
                               epoch_num + 1, epoch_num)
                if False:
                    retinanet.eval()
                    test_data = get_test_loader_for_upload(1)

                    # coco_eval.evaluate_wider_pedestrian_for_upload(epoch_num, test_data, retinanet, retinanet_sk)

                    new_map = coco_eval.evaluate_wider_pedestrian(
                        epoch_num, dataset_val, retinanet, retinanet_sk,
                        threshold)  # to validate

                    epoch_saved_model_name = "checkpoint/resnet{}_{}_epoch_{}.pth".format(
                        parser.depth, parser.dataset, epoch_num)
                    save_model(retinanet, optimizer, epoch_saved_model_name,
                               new_map, epoch_num)
                    # print("\nepoch:{}, validation average precision score:{}".format(epoch_num, new_map))
                    if new_map == None:
                        continue
                    writer.add_scalar('validation mAP', new_map, epoch_num)
                    scheduler.step(np.mean(epoch_loss))

                    if new_map > best_mAP:
                        print(
                            "Found new best model with mAP:{:.7f}, over {:.7f}"
                            .format(new_map, best_mAP))
                        save_model(retinanet, optimizer, best_saved_model_name,
                                   new_map, epoch_num)
                        best_mAP = new_map
                        coco_eval.evaluate_wider_pedestrian_for_upload(
                            parser.depth, epoch_num, test_data, retinanet,
                            retinanet_sk)
                    retinanet.train()

            except Exception as e:
                print(e)

        if parser.dataset == 'coco':

            print('\n==>Evaluating dataset')
            coco_eval.evaluate_coco(dataset_val, retinanet, threshold=0.2)
            save_model(retinanet, optimizer, best_saved_model_name, 0.5,
                       epoch_num)
            continue
        elif parser.dataset == 'wider_pedestrain':
            for threshold in range(16, 90, 10):
                threshold = threshold / 100
                test_data = get_test_loader_for_upload(1)

                # coco_eval.evaluate_wider_pedestrian_for_upload(epoch_num, test_data, retinanet, retinanet_sk)

                new_map = coco_eval.evaluate_wider_pedestrian(
                    epoch_num, dataset_val, retinanet, retinanet_sk,
                    threshold)  # to validate

                epoch_saved_model_name = "checkpoint/resnet{}_{}_epoch_{}.pth".format(
                    parser.depth, parser.dataset, epoch_num)
                save_model(retinanet, optimizer, epoch_saved_model_name,
                           new_map, epoch_num)
                # print("\nepoch:{}, validation average precision score:{}".format(epoch_num, new_map))
                if new_map == None:
                    continue
                writer.add_scalar('validation mAP', new_map, epoch_num)
                scheduler.step(np.mean(epoch_loss))

                # if new_map>best_mAP:
                print(
                    "Found new best model with mAP:{:.7f}, over {:.7f}".format(
                        new_map, best_mAP))
                save_model(retinanet, optimizer, best_saved_model_name,
                           new_map, epoch_num)
                best_mAP = new_map
                coco_eval.evaluate_wider_pedestrian_for_upload(
                    epoch_num, test_data, retinanet, retinanet_sk, threshold,
                    new_map)

        retinanet.train()
Esempio n. 29
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=20)
    parser.add_argument('--resume',
                        '-r',
                        action='store_true',
                        help='resume from checkpoint')
    parser.add_argument('--batch_size',
                        type=int,
                        default=4,
                        help='set the batch size')
    parser.add_argument('--learning_rate',
                        '-lr',
                        type=float,
                        default=1e-3,
                        help='set the learning rate')
    parser = parser.parse_args(args)

    batch_size = parser.batch_size
    learning_rate = parser.learning_rate
    start_epoch = 0
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    best_acc = 0  # best test accuracy

    # mean & std for ROI extraction
    #mean = (0.1146, 0.1147, 0.1148)
    #std = (0.1089, 0.1090, 0.1090)

    # mean & std for QRCode extraction
    mean = (0.2405, 0.2416, 0.2427)
    std = (0.2194, 0.2208, 0.2223)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(
            train_file=parser.csv_train,
            class_list=parser.csv_classes,
            transform=transforms.Compose([
                Normalizer(mean=mean, std=std),
                Augmenter(),
                #YFlipAugmenter(),
                #CropAugmenter(),
                #Rot180Augmenter(),
                Resizer()
            ]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose([
                                         Normalizer(mean=mean, std=std),
                                         Resizer()
                                     ]))
    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=batch_size,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=4,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=4,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.to(device)

    retinanet = torch.nn.DataParallel(retinanet)
    if parser.resume:
        # Load checkpoint
        print("==> Resuming from checkpoint")
        checkpoint = torch.load('./checkpoint/ckpt.pth')
        retinanet.load_state_dict(checkpoint['net'])
        best_acc = checkpoint['acc']
        start_epoch = checkpoint['epoch']
        print('resume training from epoch:', start_epoch, " with accuracy:",
              best_acc)

    retinanet.training = True

    #optimizer = optim.Adam(retinanet.parameters(), lr=1e-3)
    optimizer = optim.SGD(retinanet.parameters(),
                          lr=learning_rate,
                          momentum=0.9)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(start_epoch, start_epoch + parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue
        writer.add_scalar('train_loss', np.mean(epoch_loss), epoch_num)

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val,
                                    retinanet,
                                    iou_threshold=0.7,
                                    max_detections=5,
                                    score_threshold=0.2,
                                    epoch=epoch_num)
            print('mapROI:', mAP[0])
            AP, num_annotations = mAP[0]
            acc = 100. * AP
            if acc > best_acc:
                print('Saving... acc:', acc)
                state = {
                    'net': retinanet.state_dict(),
                    'acc': acc,
                    'epoch': epoch_num,
                }
                if not os.path.isdir('checkpoint'):
                    os.mkdir('checkpoint')
                torch.save(state, './checkpoint/ckpt.pth')
                torch.save(retinanet, './checkpoint/best.pth')
                best_acc = acc
            writer.add_scalar('test_acc', acc, epoch_num)

        scheduler.step(np.mean(epoch_loss))

        #torch.save(retinanet.module, osp.join('checkpoints','{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)))

    retinanet.eval()

    torch.save(retinanet, 'model_final.pt'.format(epoch_num))
    writer.close()
Esempio n. 30
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--efficientdet',
                        help='Use EfficientDet.',
                        action="store_true")
    parser.add_argument('--scaling-compound',
                        help='EfficientDet scaling compound phi.',
                        type=int,
                        default=0)
    parser.add_argument('--batch-size', help='Batchsize.', type=int, default=6)
    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument('--print-model-complexity',
                        help='Print model complexity.',
                        action="store_true")

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=None)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)

    parser = parser.parse_args(args)

    img_size = parser.scaling_compound * 128 + 512

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose([
                                        Normalizer(),
                                        Augmenter(),
                                        Resizer(img_size=img_size)
                                    ]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose([
                                      Normalizer(),
                                      Resizer(img_size=img_size)
                                  ]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose([
                                       Normalizer(),
                                       Augmenter(),
                                       Resizer(img_size=img_size)
                                   ]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose([
                                         Normalizer(),
                                         Resizer(img_size=img_size)
                                     ]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=parser.batch_size,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        model = retinanet.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        model = retinanet.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        model = retinanet.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        model = retinanet.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        model = retinanet.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.efficientdet:
        model = efficientdet.efficientdet(
            num_classes=dataset_train.num_classes(),
            pretrained=True,
            phi=parser.scaling_compound)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152, or specify '
        )

    use_gpu = True

    if use_gpu:
        model = model.cuda()

    model = torch.nn.DataParallel(model).cuda()

    if parser.print_model_complexity:
        flops, params = get_model_complexity_info(model,
                                                  (3, img_size, img_size),
                                                  as_strings=True,
                                                  print_per_layer_stat=True)
        print('{:<30}  {:<8}'.format('Computational complexity: ', flops))
        print('{:<30}  {:<8}'.format('Number of parameters: ', params))

    model.training = True

    optimizer = optim.SGD(model.parameters(), lr=4e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    model.train()
    model.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        model.train()
        model.module.freeze_bn()

        freeze_layer(model.module.efficientnet)

        epoch_loss = []
        pbar = tqdm(enumerate(dataloader_train), total=len(dataloader_train))
        for iter_num, data in pbar:
            optimizer.zero_grad()

            classification_loss, regression_loss = model(
                [data['img'].cuda().float(), data['annot']])

            classification_loss = classification_loss.mean()
            regression_loss = regression_loss.mean()

            loss = classification_loss + regression_loss

            if bool(loss == 0):
                continue

            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)

            optimizer.step()

            loss_hist.append(float(loss))

            epoch_loss.append(float(loss))

            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available(
            ) else 0
            pbar.set_description(
                f'{mem:.3g}G | {float(classification_loss):1.5f} | {float(regression_loss):1.5f} | {np.mean(loss_hist):1.5f}'
            )
            #print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))

            del classification_loss
            del regression_loss

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, model)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, model)

        scheduler.step(np.mean(epoch_loss))

        torch.save(model.module_model_,
                   '{}_model_{}.pt'.format(parser.dataset, epoch_num))

    model.eval()

    torch.save(model, 'model_final.pt'.format(epoch_num))