Exemple #1
0
def get_data(dataset,
             dataroot,
             augment,
             resize=608,
             split=0.15,
             split_idx=0,
             multinode=False,
             target_lb=-1):

    transform_train = transforms.Compose(
        [Normalizer(), Augmenter(),
         Resizer(min_side=resize)])
    transform_test = transforms.Compose(
        [Normalizer(), Resizer(min_side=resize)])

    if isinstance(C.get().aug, list):
        logger.debug('augmentation provided.')
        policies = policy_decoder(augment, augment['num_policy'],
                                  augment['num_op'])
        transform_train.transforms.insert(
            0, Augmentation(policies, detection=True))

    if dataset == 'coco':
        total_trainset = CocoDataset(dataroot,
                                     set_name='train',
                                     transform=transform_train)
        testset = CocoDataset(dataroot,
                              set_name='val',
                              transform=transform_test)

    return total_trainset, testset
Exemple #2
0
 def get_dataset(self, set_name, sub_dir=None):
     with redirect_stdout(None):
         training_dataset = CocoDataset(root_dir=self.root_dir,
                                        set_name=set_name,
                                        transform=None,
                                        sub_dir=sub_dir)
     [min_w, min_h] = self.get_min_size(training_dataset)
     if set_name == 'val':
         _transforms = transforms.Compose([Normalizer(), Resizer()])
     else:
         _transforms = transforms.Compose([
             Normalizer(),
             Augmenter(),
             RandomCropOrScale(min_w=min_w, min_h=min_h)
         ])
     training_dataset.transform = _transforms
     return training_dataset
Exemple #3
0
def init_data(parser, verb_orders):
	dataset_train = CSVDataset(train_file=parser.train_file, class_list=parser.classes_file, verb_info= verb_orders, is_training=True,
							   transform=transforms.Compose([Normalizer(), Augmenter(), Resizer(True)]))

	if parser.val_file is None:
		dataset_val = None
		print('No validation annotations provided.')
	else:
		dataset_val = CSVDataset(train_file=parser.val_file, class_list=parser.classes_file, verb_info= verb_orders, is_training=False,
								 transform=transforms.Compose([Normalizer(), Resizer(False)]))

	sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=True)
	dataloader_train = DataLoader(dataset_train, num_workers=64, collate_fn=collater, batch_sampler=sampler)

	if dataset_val is not None:
		sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=parser.batch_size, drop_last=True)
		dataloader_val = DataLoader(dataset_val, num_workers=64, collate_fn=collater, batch_sampler=sampler_val)
	return dataloader_train, dataset_train, dataloader_val, dataset_val
Exemple #4
0
 def get_training_dataloader(self,
                             set_name='train'
                             ):  # this can be used for entire sets
     with redirect_stdout(None):
         self.training_dataset = CocoDataset(root_dir=self.root_dir,
                                             set_name=set_name,
                                             transform=None)
     [min_w, min_h] = self.get_min_size(self.training_dataset)
     self.training_dataset.transform = transforms.Compose(
         [Normalizer(), Augmenter(), Resizer()])
     # RandomCropOrScale(min_w, min_h)])
     # training_dataset = self.get_dataset(set_name=set_name)
     sampler_train = AspectRatioBasedSampler(self.training_dataset,
                                             batch_size=self.batch_size,
                                             shuffle=True)
     self.training_dataloader = DataLoader(dataset=self.training_dataset,
                                           num_workers=self.workers,
                                           collate_fn=collater,
                                           batch_sampler=sampler_train,
                                           pin_memory=True)
     self.print_data_statistics(data_loader=self.training_dataloader,
                                set_type='Training')
Exemple #5
0
def main(args=None):

    parser = argparse.ArgumentParser(description='Training script for training a EfficientDet network.')

    parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
    parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')

    parser.add_argument('--phi', help='EfficientNet scaling coefficient.', type=int, default=0)
    parser.add_argument('--batch-size', help='Batch size', type=int, default=8)
    parser.add_argument('--epochs', help='Number of epochs', type=int, default=100)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer(img_size=512)]))
        dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer(img_size=512)]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO')

        if parser.csv_classes is None:
            raise ValueError('Must provide --csv_classes when training on COCO')


        dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))

    else:
        raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False)
    dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
        dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)

    # Create the model
    efficientdet = model.efficientdet(num_classes=dataset_train.num_classes(), pretrained=True, phi=parser.phi)      

    use_gpu = True

    if use_gpu:
        efficientdet = efficientdet.cuda()
    
    efficientdet = torch.nn.DataParallel(efficientdet).cuda()

    efficientdet.training = True

    optimizer = optim.Adam(efficientdet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)

    loss_hist = collections.deque(maxlen=500)

    efficientdet.train()
    efficientdet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        efficientdet.train()
        efficientdet.module.freeze_bn()
        
        epoch_loss = []
        
        print(('\n' + '%10s' * 5) % ('Epoch', 'gpu_mem', 'Loss', 'cls', 'rls'))
        
        pbar = tqdm(enumerate(dataloader_train), total=len(dataloader_train))
        for iter_num, data in pbar:
            try:
                optimizer.zero_grad()
                
                classification_loss, regression_loss = efficientdet([data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss
                
                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(efficientdet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))
                
                loss = (loss * iter_num) / (iter_num + 1)  # update mean losses
                mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0  # (GB)
                s = ('%10s' * 2 + '%10.3g' * 3) % (
                    '%g/%g' % (epoch_num, parser.epochs - 1), '%.3gG' % mem, np.mean(loss_hist), float(regression_loss), float(classification_loss))
                pbar.set_description(s)
                
                del classification_loss
                del regression_loss
            except Exception as e:
                raise(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, efficientdet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, efficientdet)

        
        scheduler.step(np.mean(epoch_loss))    

        torch.save(efficientdet.module, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))

    efficientdet.eval()

    torch.save(efficientdet, 'model_final.pt'.format(epoch_num))
def main(args=None):
    """
    In current implementation, if test csv is provided, we use that as validation set and combine the val and train csv's 
    as the csv for training.

    If train_all_labeled_data flag is use, then we combine all 3 (if test is provided) for training and use a prespecified learning rate step schedule.
    """

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)',
        default=None)
    parser.add_argument(
        '--csv_test',
        help=
        'Path to file containing test annotations (optional, if provided, train & val will be combined for training and test will be used for evaluation)',
        default=None)
    parser.add_argument('--lr', type=float, default=2e-5)
    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=101)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=25)
    parser.add_argument('--model_output_dir', type=str, default='models')
    parser.add_argument(
        '--train_all_labeled_data',
        help=
        'Combine train, val, and test into 1 training set. Will use prespecified learning rate scheduler steps',
        action='store_true')
    parser.add_argument('--resnet-backbone-normalization',
                        choices=['batch_norm', 'group_norm'],
                        type=str,
                        default='batch_norm')

    parser = parser.parse_args(args)

    print('Learning Rate: {}'.format(parser.lr))
    print("Normalization: ", parser.resnet_backbone_normalization)

    # Create folder - will raise error if folder exists
    assert (os.path.exists(parser.model_output_dir) == False)
    os.mkdir(parser.model_output_dir)

    if parser.csv_train is None:
        raise ValueError('Must provide --csv_train when training,')

    if parser.csv_classes is None:
        raise ValueError('Must provide --csv_classes when training,')

    if not parser.csv_val and parser.csv_test:
        raise ValueError(
            "Cannot specify test set without specifying validation set")

    if parser.train_all_labeled_data:
        csv_paths = [parser.csv_train, parser.csv_val, parser.csv_test]
        train_csv = []
        for path in csv_paths:
            if isinstance(path, str):
                train_csv.append(path)
        val_csv = None
    else:
        if parser.csv_train and parser.csv_val and parser.csv_test:
            train_csv = [parser.csv_train, parser.csv_val
                         ]  # Combine train and val sets for training
            val_csv = parser.csv_test
        else:
            train_csv = parser.csv_train
            val_csv = parser.csv_val

    print('loading train data')
    print(train_csv)
    dataset_train = CSVDataset(train_file=train_csv,
                               class_list=parser.csv_classes,
                               transform=transforms.Compose(
                                   [Normalizer(),
                                    Augmenter(),
                                    Resizer()]))
    print(dataset_train.__len__())

    if val_csv is None:
        dataset_val = None
        print('No validation annotations provided.')
    else:
        dataset_val = CSVDataset(train_file=val_csv,
                                 class_list=parser.csv_classes,
                                 transform=transforms.Compose(
                                     [Normalizer(), Resizer()]))

    print('putting data into loader')
    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=2,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    print('creating model')
    if parser.depth == 18:
        retinanet = model.resnet18(
            num_classes=dataset_train.num_classes(),
            pretrained=True,
            normalization=parser.resnet_backbone_normalization)
    elif parser.depth == 34:
        retinanet = model.resnet34(
            num_classes=dataset_train.num_classes(),
            pretrained=True,
            normalization=parser.resnet_backbone_normalization)
    elif parser.depth == 50:
        retinanet = model.resnet50(
            num_classes=dataset_train.num_classes(),
            pretrained=True,
            normalization=parser.resnet_backbone_normalization)
    elif parser.depth == 101:
        retinanet = model.resnet101(
            num_classes=dataset_train.num_classes(),
            pretrained=True,
            normalization=parser.resnet_backbone_normalization)
    elif parser.depth == 152:
        retinanet = model.resnet152(
            num_classes=dataset_train.num_classes(),
            pretrained=True,
            normalization=parser.resnet_backbone_normalization)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr)

    lr_factor = 0.3
    if not parser.train_all_labeled_data:
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                         patience=3,
                                                         factor=lr_factor,
                                                         verbose=True)
    else:
        # these milestones are for when using the lung masks - not for unmasked lung data
        scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=[12, 16, 20,
                                   24], gamma=lr_factor)  # masked training
        #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[14, 18, 22, 26], gamma=lr_factor)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    #initialize tensorboard
    writer = SummaryWriter(comment=parser.model_output_dir)

    # Augmentation
    seq = iaa.Sequential([
        iaa.Fliplr(0.5),
        iaa.Flipud(0.5),
        iaa.Affine(scale={
            "x": (1.0, 1.2),
            "y": (1.0, 1.2)
        },
                   rotate=(-20, 20),
                   shear=(-4, 4))
    ],
                         random_order=True)

    def augment(data, seq):
        for n, img in enumerate(data['img']):
            # imgaug needs dim in format (H, W, C)
            image = data['img'][n].permute(1, 2, 0).numpy()

            bbs_array = []
            for ann in data['annot'][n]:
                x1, y1, x2, y2, _ = ann
                bbs_array.append(BoundingBox(x1=x1, y1=y1, x2=x2, y2=y2))

            bbs = BoundingBoxesOnImage(bbs_array, shape=image.shape)
            image_aug, bbs_aug = seq(image=image, bounding_boxes=bbs)

            # save augmented image and chage dims to (C, H, W)
            data['img'][n] = torch.tensor(image_aug.copy()).permute(2, 0, 1)

            # save augmented annotations
            for i, bbox in enumerate(bbs_aug.bounding_boxes):
                x1, y1, x2, y2 = bbox.x1, bbox.y1, bbox.x2, bbox.y2
                obj_class = data['annot'][n][i][-1]
                data['annot'][n][i] = torch.tensor([x1, y1, x2, y2, obj_class])

        return data

    print('Num training images: {}'.format(len(dataset_train)))
    dir_training_images = os.path.join(os.getcwd(), writer.log_dir,
                                       'training_images')
    os.mkdir(dir_training_images)

    best_validation_loss = None
    best_validation_map = None

    for epoch_num in range(parser.epochs):

        writer.add_scalar('Train/LR', optimizer.param_groups[0]['lr'],
                          epoch_num)

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                data = augment(data, seq)

                # save a few training images to see what augmentation looks like
                if iter_num % 100 == 0 and epoch_num == 0:
                    x1, y1, x2, y2, _ = data['annot'][0][0]

                    fig, ax = plt.subplots(1)
                    ax.imshow(data['img'][0][1])
                    rect = patches.Rectangle((x1, y1),
                                             x2 - x1,
                                             y2 - y1,
                                             linewidth=1,
                                             edgecolor='r',
                                             facecolor='none',
                                             alpha=1)
                    ax.add_patch(rect)
                    fig.savefig(
                        os.path.join(dir_training_images,
                                     '{}.png'.format(iter_num)))
                    plt.close()

                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                if parser.resnet_backbone_normalization == 'batch_norm':
                    torch.nn.utils.clip_grad_norm_(
                        parameters=retinanet.parameters(), max_norm=0.1)
                else:
                    torch.nn.utils.clip_grad_norm_(
                        parameters=retinanet.parameters(), max_norm=0.01
                    )  # Decrease norm to reduce risk of exploding gradients

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        writer.add_scalar('Train/Loss', np.mean(epoch_loss), epoch_num)

        if not parser.train_all_labeled_data:
            print('Evaluating Validation Loss...')
            with torch.no_grad():
                retinanet.train()
                val_losses, val_class_losses, val_reg_losses = [], [], []
                for val_iter_num, val_data in enumerate(dataloader_val):
                    try:
                        val_classification_loss, val_regression_loss = retinanet(
                            [
                                val_data['img'].cuda().float(),
                                val_data['annot']
                            ])
                        val_losses.append(
                            float(val_classification_loss) +
                            float(val_regression_loss))
                        val_class_losses.append(float(val_classification_loss))
                        val_reg_losses.append(float(val_regression_loss))
                        del val_classification_loss, val_regression_loss
                    except Exception as e:
                        print(e)
                        continue
                print(
                    'VALIDATION Epoch: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Total loss: {:1.5f}'
                    .format(epoch_num, np.mean(val_class_losses),
                            np.mean(val_reg_losses), np.mean(val_losses)))

                # Save model with best validation loss
                if best_validation_loss is None:
                    best_validation_loss = np.mean(val_losses)
                if best_validation_loss >= np.mean(val_losses):
                    best_validation_loss = np.mean(val_losses)
                    torch.save(
                        retinanet.module,
                        parser.model_output_dir + '/best_result_valloss.pt')

                writer.add_scalar('Validation/Loss', np.mean(val_losses),
                                  epoch_num)

                # Calculate Validation mAP
                print('Evaluating validation mAP')
                mAP = csv_eval.evaluate(dataset_val, retinanet)
                print("Validation mAP: " + str(mAP[0][0]))
                if best_validation_map is None:
                    best_validation_map = mAP[0][0]
                elif best_validation_map < mAP[0][0]:
                    best_validation_map = mAP[0][0]
                    torch.save(
                        retinanet.module,
                        parser.model_output_dir + '/best_result_valmAP.pt')

                writer.add_scalar('Validation/mAP', mAP[0][0], epoch_num)

        if not parser.train_all_labeled_data:
            scheduler.step(np.mean(val_losses))
        else:
            scheduler.step()

        torch.save(
            retinanet.module,
            parser.model_output_dir + '/retinanet_{}.pt'.format(epoch_num))

    retinanet.eval()

    torch.save(retinanet, parser.model_output_dir + '/model_final.pt')
def train(csv_train=None, csv_classes=None, csv_val=None, epochs=12, depth=50, batch_size=2):

	dataset = "csv"

	# Create the data loaders
	if dataset == 'csv':

		if csv_train is None:
			raise ValueError('Must provide --csv_train when training on COCO,')

		if csv_classes is None:
			raise ValueError('Must provide --csv_classes when training on COCO,')


		dataset_train = CSVDataset(train_file=csv_train, class_list=csv_classes, transform=transforms.Compose([RandomHorizontalFlip(0.3),RandomRotation(6),Gamma_Correction(0.2), Image_Noise(0.2), Blur(0.2) , Normalizer(), Augmenter(), Resizer()]))

		if csv_val is None:
			dataset_val = None
			print('No validation annotations provided.')
		else:
			dataset_val = CSVDataset(train_file=csv_val, class_list=csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))

	else:
		raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

	sampler = AspectRatioBasedSampler(dataset_train, batch_size=batch_size, drop_last=False)
	dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)

	if dataset_val is not None:
		sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
		dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)

	# Create the model
	if depth == 18:
		retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
	elif depth == 34:
		retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
	elif depth == 50:
		retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
	elif depth == 101:
		retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
	elif depth == 152:
		retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
	else:
		raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')		

	use_gpu = True

	if use_gpu:
		retinanet = retinanet.cuda()
	
	retinanet = torch.nn.DataParallel(retinanet).cuda()

	retinanet.training = True

	optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

	scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)

	loss_hist = collections.deque(maxlen=500)

	retinanet.train()
	retinanet.module.freeze_bn()

	print('Num training images: {}'.format(len(dataset_train)))

	# Change
	total_loss_data = []
	class_loss_data = []
	reg_loss_data = []
	# Change

	for epoch_num in range(epochs):

		retinanet.train()
		retinanet.module.freeze_bn()


		epoch_loss = []

		# Change
		epoch_reg_loss = []
		epoch_class_loss = []
		# Change


		for iter_num, data in enumerate(dataloader_train):
			try:
				optimizer.zero_grad()

				classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']])

				classification_loss = classification_loss.mean()
				regression_loss = regression_loss.mean()

				loss = classification_loss + regression_loss
				
				if bool(loss == 0):
					continue

				loss.backward()

				torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

				optimizer.step()

				loss_hist.append(float(loss))

				epoch_loss.append(float(loss))

				# Change
				epoch_reg_loss.append(float(regression_loss))
				epoch_class_loss.append(float(classification_loss))
				# Change

				print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))
				
				del classification_loss
				del regression_loss
			except Exception as e:
				print(e)
				continue


		if dataset == 'csv' and csv_val is not None:

			print('Evaluating dataset')

			mAP = csv_eval.evaluate(dataset_val, retinanet)

		# Change
		total_loss_data.append(np.mean(epoch_loss))
		class_loss_data.append(np.mean(epoch_class_loss))
		reg_loss_data.append(np.mean(epoch_reg_loss))
		print("Epoch loss", total_loss_data)
		print("Epoch loss - classification", class_loss_data)
		print("Epoch loss - Regression", reg_loss_data)
		# Change
		scheduler.step(np.mean(epoch_loss))	

		torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(dataset, epoch_num))

	retinanet.eval()

	torch.save(retinanet, 'model_final.pt'.format(epoch_num))

	# Change
	import matplotlib.pyplot as plt
	plt.plot(total_loss_data, label='Total loss')
	plt.plot(class_loss_data, label='Classification loss')
	plt.plot(reg_loss_data, label='Regression loss')
	plt.ylabel("Loss")
	plt.xlabel("Epoch")
	plt.title("Epoch losses")
	plt.legend()
	plt.show()
Exemple #8
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument(
        '--train-file',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--classes-file',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--val-file',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)
    parser.add_argument('--title', type=str, default='')
    parser.add_argument("--resume_model", type=str, default="")
    parser.add_argument("--resume_epoch", type=int, default=0)
    parser.add_argument("--reinit-classifier",
                        action="store_true",
                        default=False)
    parser.add_argument("--lr", type=float, default=.00001)
    parser.add_argument("--all-box-regression",
                        action="store_true",
                        default=False)
    parser.add_argument("--batch-size", type=int, default=16)

    parser = parser.parse_args(args)

    log_dir = "./runs/" + parser.title
    writer = SummaryWriter(log_dir)

    #pdb.set_trace()

    with open(log_dir + '/config.csv', 'w') as f:
        for item in vars(parser):
            print(item + ',' + str(getattr(parser, item)))
            f.write(item + ',' + str(getattr(parser, item)) + '\n')

    if not os.path.isdir(log_dir + "/checkpoints"):
        os.makedirs(log_dir + "/checkpoints")

    if not os.path.isdir(log_dir + '/map_files'):
        os.makedirs(log_dir + '/map_files')

    dataset_train = CSVDataset(train_file=parser.train_file,
                               class_list=parser.classes_file,
                               transform=transforms.Compose(
                                   [Normalizer(),
                                    Augmenter(),
                                    Resizer()]))

    if parser.val_file is None:
        dataset_val = None
        print('No validation annotations provided.')
    else:
        dataset_val = CSVDataset(train_file=parser.val_file,
                                 class_list=parser.classes_file,
                                 transform=transforms.Compose(
                                     [Normalizer(), Resizer()]))

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=parser.batch_size,
                                      drop_last=True)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=8,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=parser.batch_size,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=8,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    if parser.resume_model:
        x = torch.load(parser.resume_model)
        if parser.reinit_classifier:
            dummy = nn.Conv2d(256,
                              9 * dataset_train.num_classes(),
                              kernel_size=3,
                              padding=1)
            x['classificationModel.output.weight'] = dummy.weight.clone()
            x['classificationModel.output.bias'] = dummy.bias.clone()
            prior = 0.01
            x['classificationModel.output.weight'].data.fill_(0)
            x['classificationModel.output.bias'].data.fill_(-math.log(
                (1.0 - prior) / prior))
        retinanet.load_state_dict(x)

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()
    #torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    # x = torch.load('./csv_retinanet_20.pth')
    # retinanet.module.load_state_dict(x)

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.resume_epoch, parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []
        i = 0
        avg_class_loss = 0.0
        avg_reg_loss = 0.0

        for iter_num, data in enumerate(dataloader_train):
            i += 1

            try:
                optimizer.zero_grad()

                #pdb.set_trace()

                shape = data['img'].shape[2] * data['img'].shape[3]
                writer.add_scalar("train/image_shape", shape,
                                  epoch_num * (len(dataloader_train)) + i)

                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot'].cuda().float()])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                avg_class_loss += classification_loss
                avg_reg_loss += regression_loss

                if i % 100 == 0:
                    writer.add_scalar("train/classification_loss",
                                      avg_class_loss / 100,
                                      epoch_num * (len(dataloader_train)) + i)
                    writer.add_scalar("train/regression_loss",
                                      avg_reg_loss / 100,
                                      epoch_num * (len(dataloader_train)) + i)
                    avg_class_loss = 0.0
                    avg_reg_loss = 0.0

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if epoch_num % 2 == 0:

            print('Evaluating dataset')

            retinanet.eval()
            mAP, AP_string = csv_eval.evaluate(dataset_val,
                                               retinanet.module,
                                               score_threshold=0.1)
            with open(
                    log_dir + '/map_files/retinanet_{}.txt'.format(epoch_num),
                    'w') as f:
                f.write(AP_string)
            total = 0.0
            all = 0.0
            total_unweighted = 0.0
            for c in mAP:
                total += mAP[c][0] * mAP[c][1]
                total_unweighted += mAP[c][0]
                all += mAP[c][1]
            writer.add_scalar("val/mAP", total / all, epoch_num)
            writer.add_scalar("val/mAP_unweighted",
                              total_unweighted / len(mAP), epoch_num)

        scheduler.step(np.mean(epoch_loss))

        torch.save(retinanet.module.state_dict(),
                   log_dir + '/checkpoints/retinanet_{}.pth'.format(epoch_num))

    retinanet.eval()

    torch.save(retinanet.module.state_dict(),
               log_dir + '/checkpoints/model_final.pth'.format(epoch_num))
Exemple #9
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a CTracker network.')

    parser.add_argument('--dataset',
                        default='csv',
                        type=str,
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--model_dir',
                        default='./ctracker/',
                        type=str,
                        help='Path to save the model.')
    parser.add_argument(
        '--root_path',
        default='/Dataset/Tracking/MOT17/',
        type=str,
        help='Path of the directory containing both label and images')
    parser.add_argument(
        '--csv_train',
        default='train_annots.csv',
        type=str,
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        default='train_labels.csv',
                        type=str,
                        help='Path to file containing class list (see readme)')

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)
    parser.add_argument('--print_freq',
                        help='Print frequency',
                        type=int,
                        default=100)
    parser.add_argument(
        '--save_every',
        help='Save a checkpoint of model at given interval of epochs',
        type=int,
        default=5)

    parser = parser.parse_args(args)
    print(parser)

    print(parser.model_dir)
    if not os.path.exists(parser.model_dir):
        os.makedirs(parser.model_dir)

    # Create the data loaders
    if parser.dataset == 'csv':
        if (parser.csv_train is None) or (parser.csv_train == ''):
            raise ValueError('Must provide --csv_train when training on COCO,')

        if (parser.csv_classes is None) or (parser.csv_classes == ''):
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(parser.root_path, train_file=os.path.join(parser.root_path, parser.csv_train), class_list=os.path.join(parser.root_path, parser.csv_classes), \
         transform=transforms.Compose([RandomSampleCrop(), PhotometricDistort(), Augmenter(), Normalizer()]))#transforms.Compose([Normalizer(), Augmenter(), Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    # sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False)
    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=8,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=32,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    # optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
    optimizer = optim.Adam(retinanet.parameters(), lr=5e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))
    total_iter = 0
    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                total_iter = total_iter + 1
                optimizer.zero_grad()

                (classification_loss, regression_loss), reid_loss = retinanet([
                    data['img'].cuda().float(), data['annot'],
                    data['img_next'].cuda().float(), data['annot_next']
                ])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                reid_loss = reid_loss.mean()

                # loss = classification_loss + regression_loss + track_classification_losses
                loss = classification_loss + regression_loss + reid_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))
                epoch_loss.append(float(loss))

                # print frequency default=100 or e.g. --print_freq 500
                if total_iter % parser.print_freq == 0:
                    print(
                        'Epoch: {} | Iter: {} | Cls loss: {:1.5f} | Reid loss: {:1.5f} | Reg loss: {:1.5f} | Running loss: {:1.5f}'
                        .format(epoch_num, iter_num,
                                float(classification_loss), float(reid_loss),
                                float(regression_loss), np.mean(loss_hist)))

            except Exception as e:
                print(e)
                continue

        scheduler.step(np.mean(epoch_loss))
        # Save a checkpoint of model at given interval of epochs e.g. --save_every 10
        if epoch_num % parser.save_every == 0:
            torch.save(
                retinanet,
                os.path.join(parser.model_dir,
                             "weights_epoch_" + str(epoch_num) + ".pt"))

    retinanet.eval()

    torch.save(retinanet, os.path.join(parser.model_dir, 'model_final.pt'))
    run_from_train(parser.model_dir, parser.root_path)
Exemple #10
0
def main():
    data_type = 'coco'
    data_root_dir = '/data/data_coco/'
    # model_depth = 50
    epoch_max = 100
    batch_size = 8

    if data_type == 'coco':
        dataset_train = CocoDataset(data_root_dir,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(data_root_dir,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))
    else:
        print('暂不支持')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=batch_size,
                                      drop_last=True)
    loader_train = DataLoader(dataset_train,
                              num_workers=8,
                              collate_fn=collater,
                              batch_sampler=sampler)
    sampler_val = AspectRatioBasedSampler(dataset_val,
                                          batch_size=batch_size,
                                          drop_last=True)
    loader_val = DataLoader(dataset_val,
                            num_workers=8,
                            collate_fn=collater,
                            batch_sampler=sampler_val)

    retinanet = model.retinanet_50(dataset_train.num_classes(),
                                   pretrained=True)

    retinanet = retinanet.cuda()
    optimizer = torch.optim.Adam(retinanet.parameters(), lr=1e-4)
    # optimizer = torch.optim.SGD(retinanet.parameters(), lr=1e-4, momentum=0.9, weight_decay=5e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True,
                                                           factor=0.5)

    model_pretrain_dir = './model/model_final.pt'
    if os.path.exists(model_pretrain_dir):
        print('pretrain model exist!')
        retinanet = torch.load(model_pretrain_dir)

    print('train images num: {}'.format(len(loader_train) * batch_size))
    for epoch_num in range(epoch_max):
        retinanet.train()
        epoch_loss = []
        for iter_num, data in enumerate(loader_train):
            optimizer.zero_grad()
            input_tensor = [data['img'].cuda().float(), data['annot']]
            classification_loss, regression_loss = retinanet(input_tensor)
            classification_loss = classification_loss.mean()
            regression_loss = regression_loss.mean()

            loss = classification_loss + regression_loss

            epoch_loss.append(float(loss))

            if loss.item() == 0:
                continue

            loss.backward()
            optimizer.step()

            print(
                'Epoch:{}/{} | Iters:{}/{} | C loss:{:.4f} | R loss:{:.4f} | Current loss:{:.4f} | Current LR:{:.7f}'
                .format(epoch_num + 1, epoch_max, iter_num + 1,
                        len(loader_train), float(classification_loss),
                        float(regression_loss), np.mean(epoch_loss),
                        optimizer.param_groups[0]['lr']))
            del classification_loss
            del regression_loss

        # 每个epoch 进行验证一次
        eval.eval_coco(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))
        torch.save(
            retinanet,
            './model/{}_retinanet_{}.pt'.format(data_type, epoch_num + 1))
    retinanet.eval()
    torch.save(retinanet, './model/model_final.pt')
Exemple #11
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--efficientdet',
                        help='Use EfficientDet.',
                        action="store_true")
    parser.add_argument('--scaling-compound',
                        help='EfficientDet scaling compound phi.',
                        type=int,
                        default=0)
    parser.add_argument('--batch-size', help='Batchsize.', type=int, default=6)
    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument('--print-model-complexity',
                        help='Print model complexity.',
                        action="store_true")

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=None)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)

    parser = parser.parse_args(args)

    img_size = parser.scaling_compound * 128 + 512

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose([
                                        Normalizer(),
                                        Augmenter(),
                                        Resizer(img_size=img_size)
                                    ]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose([
                                      Normalizer(),
                                      Resizer(img_size=img_size)
                                  ]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose([
                                       Normalizer(),
                                       Augmenter(),
                                       Resizer(img_size=img_size)
                                   ]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose([
                                         Normalizer(),
                                         Resizer(img_size=img_size)
                                     ]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=parser.batch_size,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        model = retinanet.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        model = retinanet.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        model = retinanet.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        model = retinanet.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        model = retinanet.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.efficientdet:
        model = efficientdet.efficientdet(
            num_classes=dataset_train.num_classes(),
            pretrained=True,
            phi=parser.scaling_compound)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152, or specify '
        )

    use_gpu = True

    if use_gpu:
        model = model.cuda()

    model = torch.nn.DataParallel(model).cuda()

    if parser.print_model_complexity:
        flops, params = get_model_complexity_info(model,
                                                  (3, img_size, img_size),
                                                  as_strings=True,
                                                  print_per_layer_stat=True)
        print('{:<30}  {:<8}'.format('Computational complexity: ', flops))
        print('{:<30}  {:<8}'.format('Number of parameters: ', params))

    model.training = True

    optimizer = optim.SGD(model.parameters(), lr=4e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    model.train()
    model.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        model.train()
        model.module.freeze_bn()

        freeze_layer(model.module.efficientnet)

        epoch_loss = []
        pbar = tqdm(enumerate(dataloader_train), total=len(dataloader_train))
        for iter_num, data in pbar:
            optimizer.zero_grad()

            classification_loss, regression_loss = model(
                [data['img'].cuda().float(), data['annot']])

            classification_loss = classification_loss.mean()
            regression_loss = regression_loss.mean()

            loss = classification_loss + regression_loss

            if bool(loss == 0):
                continue

            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)

            optimizer.step()

            loss_hist.append(float(loss))

            epoch_loss.append(float(loss))

            mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available(
            ) else 0
            pbar.set_description(
                f'{mem:.3g}G | {float(classification_loss):1.5f} | {float(regression_loss):1.5f} | {np.mean(loss_hist):1.5f}'
            )
            #print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))

            del classification_loss
            del regression_loss

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, model)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, model)

        scheduler.step(np.mean(epoch_loss))

        torch.save(model.module_model_,
                   '{}_model_{}.pt'.format(parser.dataset, epoch_num))

    model.eval()

    torch.save(model, 'model_final.pt'.format(epoch_num))
Exemple #12
0
        raise Exception('args.dataset not in ["coco", "csv"]')
    
    return args

if __name__ == "__main__":
    args = parse_args()
    json.dump(vars(args), open(args.config_path, 'w'))
    
    # Create the data loaders
    if args.dataset == 'coco':
        dataset_train = CocoDataset(
            root_dir=args.coco_path,
            set_name='train2017', 
            transform=transforms.Compose([
                Normalizer(),
                Augmenter(),
                Resizer()
            ])
        )
        
        dataset_val = CocoDataset(
            root_dir=args.coco_path,
            set_name='val2017', 
            transform=transforms.Compose([
                Normalizer(),
                Resizer()
            ])
        )
        
    elif args.dataset == 'csv':
        dataset_train = CSVDataset(
def train(args):
    train_csv = args.train_csv
    test_csv = args.test_csv
    labels_csv = args.labels_csv
    model_type = args.model_type
    epochs = int(args.epochs)
    batch_size = int(args.batch_size)

    dataset_train = CSVDataset(train_file=train_csv,
                               class_list=labels_csv,
                               transform=transforms.Compose(
                                   [Normalizer(),
                                    Augmenter(),
                                    Resizer()]))
    dataset_val = CSVDataset(train_file=test_csv,
                             class_list=labels_csv,
                             transform=transforms.Compose(
                                 [Normalizer(), Resizer()]))

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=batch_size,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    retinanet = RetinaNet_efficientnet_b4(
        num_classes=dataset_train.num_classes(), model_type=model_type)

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(epochs):
        retinanet.train()
        retinanet.module.freeze_bn()
        epoch_loss = []
        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()
                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])
                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                loss = classification_loss + regression_loss
                if bool(loss == 0):
                    continue
                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
                optimizer.step()
                loss_hist.append(float(loss))
                epoch_loss.append(float(loss))
                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))
                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue
        mAP, MAP = evaluate(dataset_val, retinanet)
        scheduler.step(np.mean(epoch_loss))
        torch.save(
            retinanet.module,
            '{}_retinanet_{}_map{}.pt'.format("EfficientNet" + model_type,
                                              epoch_num, MAP))
        retinanet.eval()
        torch.save(retinanet, 'model_final.pt'.format(epoch_num))
Exemple #14
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=50)

    parser.add_argument('--model_name', help='name of the model to save')
    parser.add_argument('--pretrained', help='pretrained model name')

    parser = parser.parse_args(args)

    # Create the data loaders
    dataset_train = CSVDataset(train_file=parser.csv_train,
                               class_list=parser.csv_classes,
                               transform=transforms.Compose(
                                   [Resizer(),
                                    Augmenter(),
                                    Normalizer()]))

    if parser.csv_val is None:
        dataset_val = None
        print('No validation annotations provided.')
    else:
        dataset_val = CSVDataset(train_file=parser.csv_val,
                                 class_list=parser.csv_classes,
                                 transform=transforms.Compose(
                                     [Resizer(), Normalizer()]))

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=2,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=16,
                                  collate_fn=collater,
                                  batch_sampler=sampler)
    #dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_size=8, shuffle=True)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=2,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=16,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)
        #dataloader_val = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_size=8, shuffle=True)

    # Create the model_pose_level_attention
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes())
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes())
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes())
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes())
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes())
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    if ckpt:
        retinanet = torch.load('')
        print('load ckpt')
    else:
        retinanet_dict = retinanet.state_dict()
        pretrained_dict = torch.load('./weight/' + parser.pretrained)
        pretrained_dict = {
            k: v
            for k, v in pretrained_dict.items() if k in retinanet_dict
        }
        retinanet_dict.update(pretrained_dict)
        retinanet.load_state_dict(retinanet_dict)
        print('load pretrained backbone')

    print(retinanet)
    retinanet = torch.nn.DataParallel(retinanet, device_ids=[0])
    retinanet.cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
    #optimizer = optim.SGD(retinanet.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-4)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)
    #scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))
    f_map = open('./mAP_txt/' + parser.model_name + '.txt', 'a')
    writer = SummaryWriter(log_dir='./summary')
    iters = 0
    for epoch_num in range(0, parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []
        #scheduler.step()

        for iter_num, data in enumerate(dataloader_train):

            iters += 1

            optimizer.zero_grad()

            classification_loss_f, regression_loss_f, classification_loss_v, regression_loss_v = retinanet(
                [
                    data['img'].cuda().float(), data['annot'], data['vbox'],
                    data['ignore']
                ])

            classification_loss_f = classification_loss_f.mean()
            regression_loss_f = regression_loss_f.mean()
            classification_loss_v = classification_loss_v.mean()
            regression_loss_v = regression_loss_v.mean()

            loss = classification_loss_f + regression_loss_f + classification_loss_v + regression_loss_v

            if bool(loss == 0):
                continue

            loss.backward()

            torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

            optimizer.step()

            loss_hist.append(float(loss))

            epoch_loss.append(float(loss))

            print(
                'Epoch: {} | Iteration: {} | Classification loss_f: {:1.5f} | Regression loss_f: {:1.5f} | Classification loss_v {:1.5f} | Regression loss_v {:1.5f} | Running loss: {:1.5f}'
                .format(epoch_num, iter_num, float(classification_loss_f),
                        float(regression_loss_f), float(classification_loss_v),
                        float(regression_loss_v), np.mean(loss_hist)))

            writer.add_scalar('classification_loss_f', classification_loss_f,
                              iters)
            writer.add_scalar('regression_loss_f', regression_loss_f, iters)
            writer.add_scalar('classification_loss_v', classification_loss_v,
                              iters)
            writer.add_scalar('regression_loss_v', regression_loss_v, iters)
            writer.add_scalar('loss', loss, iters)

        if parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)
            f_map.write('mAP:{}, epoch:{}'.format(mAP[0][0], epoch_num))
            f_map.write('\n')

        scheduler.step(np.mean(epoch_loss))

        torch.save(retinanet.module,
                   './ckpt/' + parser.model_name + '_{}.pt'.format(epoch_num))

    retinanet.eval()

    writer.export_scalars_to_json(
        "./summary/' + parser.pretrained + 'all_scalars.json")
    f_map.close()
    writer.close()
def main(config):
    # set seed for reproducibility
    np.random.seed(0)
    torch.manual_seed(0)
    torch.cuda.manual_seed(0)
    # create folder for model
    newpath = './models/' + config.model_date
    if config.save_model:
        os.makedirs(newpath)

    # Create the data loaders
    if config.csv_train is None:
        raise ValueError('Must provide --csv_train when training on csv,')

    if config.csv_classes is None:
        raise ValueError('Must provide --csv_classes when training on csv,')

    train_dataset = datasets.ImageFolder(os.path.join(config.data_dir,
                                                      'train'))
    dataset_train = GetDataset(train_file=config.csv_train,
                               class_list=config.csv_classes,
                               transform=transforms.Compose(
                                   [Augmenter(), Resizer()]),
                               dataset=train_dataset,
                               seed=0)
    dataloader_train = DataLoader(dataset_train,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  num_workers=1,
                                  collate_fn=collater)

    if config.csv_val is None:
        dataset_val = None
        print('No validation annotations provided.')
    else:
        valid_dataset = datasets.ImageFolder(
            os.path.join(config.data_dir, 'valid'))
        dataset_val = GetDataset(train_file=config.csv_val,
                                 class_list=config.csv_classes,
                                 transform=transforms.Compose([Resizer()]),
                                 dataset=valid_dataset,
                                 seed=0)

    # Create the model
    if config.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif config.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif config.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif config.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif config.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    if config.use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    best_valid_map = 0
    counter = 0
    batch_size = config.batch_size

    for epoch_num in range(config.epochs):
        print('\nEpoch: {}/{}'.format(epoch_num + 1, config.epochs))
        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        train_batch_time = AverageMeter()
        train_losses = AverageMeter()
        tic = time.time()
        with tqdm(total=len(dataset_train)) as pbar:
            for iter_num, data in enumerate(dataloader_train):
                # try:
                optimizer.zero_grad()
                siamese_loss, classification_loss, regression_loss = retinanet(
                    [
                        data['img'].cuda().float(), data['annot'],
                        data['pair'].cuda().float()
                    ])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                loss = classification_loss + regression_loss + siamese_loss

                if bool(loss == 0):
                    continue

                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
                optimizer.step()
                epoch_loss.append(float(loss))

                toc = time.time()
                train_losses.update(float(loss), batch_size)
                train_batch_time.update(toc - tic)
                tic = time.time()

                pbar.set_description(("{:.1f}s - loss: {:.3f}".format(
                    train_batch_time.val,
                    train_losses.val,
                )))
                pbar.update(batch_size)

                del classification_loss
                del regression_loss
                del siamese_loss

                # except Exception as e:
                #     print('Training error: ', e)
                #     continue

        if config.csv_val is not None:
            print('Evaluating dataset')
            mAP, correct = eval_new.evaluate(dataset_val, retinanet)

            # is_best = mAP[0][0] > best_valid_map
            # best_valid_map = max(mAP[0][0], best_valid_map)
            is_best = correct > best_valid_map
            best_valid_map = max(correct, best_valid_map)
            if is_best:
                counter = 0
            else:
                counter += 1
                if counter > 3:
                    print("[!] No improvement in a while, stopping training.")
                    break

        scheduler.step(np.mean(epoch_loss))
        if is_best and config.save_model:
            torch.save(
                retinanet.state_dict(),
                './models/{}/best_retinanet.pt'.format(config.model_date))
        if config.save_model:
            torch.save(
                retinanet.state_dict(),
                './models/{}/{}_retinanet_{}.pt'.format(
                    config.model_date, config.depth, epoch_num))

        msg = "train loss: {:.3f} - val map: {:.3f} - val acc: {:.3f}%"
        print(
            msg.format(train_losses.avg, mAP[0][0],
                       (100. * correct) / len(dataset_val)))
Exemple #16
0
def train(img_dir,classes_csv,model_fname=None,resnet_depth=50,epochs=1000,steps=100,train_split=0.8,out_dir ='',out_prefix=''):

    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    # Create the data loaders

    # Get all image fnames in folder
    img_list = []
    if not isinstance(img_dir, list):
        img_dir = [img_dir]
    for dir in img_dir:
        for file in os.listdir(dir):
            if file.endswith(".png"):
                img_list.append(dir + file)

    randomised_list = random.sample(img_list, len(img_list))
    num_train = int(0.8*len(img_list))
    train_imgs, val_imgs = randomised_list[:num_train], randomised_list[num_train:]

    dataset_train = CustomDataset(img_list=train_imgs, class_list=classes_csv, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
    dataset_val = CustomDataset(img_list=val_imgs, class_list=classes_csv,transform=transforms.Compose([Normalizer(), Resizer()]))
    sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False)
    dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
        dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)

    # Create the model

    if resnet_depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
    elif resnet_depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
    elif resnet_depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
    elif resnet_depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
    elif resnet_depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
    else:
        raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    # retinanet = torch.load(model_fname)

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    if model_fname is not None:
        retinanet.load_state_dict(torch.load(model_fname))

    retinanet.training = True
    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
    loss_hist = collections.deque(maxlen=500)
    retinanet.train()
    retinanet.module.freeze_bn()

    start_time = time.clock()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(epochs):
        retinanet.train()
        retinanet.module.freeze_bn()
        epoch_loss = []
        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()
                classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']])
                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
                optimizer.step()
                loss_hist.append(float(loss))
                epoch_loss.append(float(loss))
                # print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))
                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue
        print('Epoch: {} | Running loss: {:1.5f} | Elapsed Time: {}'.format(epoch_num, np.mean(loss_hist),(time.clock() - start_time)/60))
        mAP = csv_eval.evaluate(dataset_val, retinanet)
        scheduler.step(np.mean(epoch_loss))

        if (epoch_num) % steps == 0:
            torch.save(retinanet.module, '{}{}_model_{}.pt'.format(out_dir, out_prefix, epoch_num))
            torch.save(retinanet.state_dict(), '{}{}_state_{}.pt'.format(out_dir, out_prefix, epoch_num))

    torch.save(retinanet, out_dir + '{}model_final.pt'.format(out_prefix))
    torch.save(retinanet.state_dict(), out_dir + '{}state_final_.pt'.format(out_prefix))
Exemple #17
0
import torch
from torchvision import datasets, models, transforms
import new_model
from dataloader import CocoDataset, CSVDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, UnNormalizer, Normalizer


dataset_train = CocoDataset("./data", set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
retinanet = new_model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)

retinanet.load_state_dict(torch.load("./saved_models/model_final_0.pth", map_location='cuda:0'))

print(retinanet)
print(retinanet.anchors)
Exemple #18
0
def main(args=None):
    """
	todo s:
		################## ToDo ########################
		1. download more images using image_utils and isic-arhive. Also, use more online resources for data.
		2. Use Augmentations fromPytorchSSD using pascal voc data format.
		3. use pair augmentation, random erase
		4. download more images for each classes.
		5. preprocessing and feature extraction
		6. bigger 500 px image size. big image tends to make
		7. use ResNet-152 for better peromance.
		8. adversarial training, use crosssentropy, focal loss
		9. use similar optimizatio adam and learning rate schedule like wider face pedestrian dataset.
		10.BGR to RGB
		11. multi scale testing.
		12. soft nms
		13. save model and load from previous epoch
		14. https://github.com/uoguelph-mlrg/Cutout
	"""

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')
    parser.add_argument('--dataset',
                        default="wider_pedestrain",
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument(
        '--coco_path',
        default=
        "/media/milton/ssd1/research/competitions/data_wider_pedestrian/",
        help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=152)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=200)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train_wider_pedestrian',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val_wider_pedestrian',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'wider_pedestrain':
        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train_wider_pedestrian',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val_wider_pedestrian',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

        # dataset_test = CocoDataset(parser.coco_path, set_name='test_wider_pedestrian',
        # 						  transform=transforms.Compose([Normalizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))
        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')
    batch_size = 4
    num_classes = 1
    print("Total Train:{}".format(len(dataset_train)))
    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=batch_size,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=4,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    print("Total Validation:{}".format(len(dataset_val)))
    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=batch_size,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=1,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)
    best_saved_model_name = "checkpoint/resnet{}_{}_best_model.pth".format(
        parser.depth, parser.dataset)
    best_mAP = 0
    start_epoch = 0

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=num_classes, pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=num_classes, pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=num_classes, pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=num_classes, pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=num_classes, pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')
    use_gpu = True

    optimizer = optim.Adam(retinanet.parameters(), lr=0.001)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    if use_gpu:
        retinanet_sk = copy.deepcopy(
            retinanet.cpu()
        )  # will hold the raw model, later it will be loaded with new model weight to test in seperate gpus.
        retinanet = retinanet.cuda()
        retinanet = torch.nn.DataParallel(retinanet)

    try:
        print("Loading model and optimizer from checkpoint '{}'".format(
            best_saved_model_name))
        checkpoint = torch.load(best_saved_model_name)
        retinanet.load_state_dict(checkpoint['model'].state_dict())
        best_mAP = checkpoint['map']
        start_epoch = checkpoint['epoch']
        optimizer.load_state_dict(checkpoint['optimizer'].state_dict())

        # optimizer.load_state_dict(checkpoint['optimizer_state'])
        print("Loaded checkpoint '{}' (epoch {})".format(
            best_saved_model_name, checkpoint['epoch']))
        start_epoch = checkpoint['epoch']
        print(
            '==> Resuming Sucessfully from checkpoint from epoch {} with mAP {:.7f}..'
            .format(start_epoch, best_mAP))

    except Exception as e:
        print("\nExcpetion: {}".format(repr(e)))
        print('\n==> Resume Failed...')
    retinanet.training = True

    total_loss = losses.loss

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    freeze_bn(retinanet)

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(start_epoch, parser.epochs):

        retinanet.train()
        freeze_bn(retinanet)

        epoch_loss = []
        # threshold=0.05
        for iter_num, data in enumerate(dataloader_train):
            iter_per_epoch = len(dataset_train) / batch_size

            step = epoch_num * iter_per_epoch + iter_num

            if iter_num == 0:
                print('Iteration PEr eEpoch: {}'.format(iter_per_epoch))

            try:
                optimizer.zero_grad()

                classification, regression, anchors = retinanet(
                    data['img'].cuda().float())

                classification_loss, regression_loss = total_loss(
                    classification, regression, anchors, data['annot'])

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))
                writer.add_scalar('Classification loss', classification_loss,
                                  step)
                writer.add_scalar('Regression loss', regression_loss, step)
                writer.add_scalar("Running Loss", np.mean(loss_hist), step)

                msg = 'Epoch:{}, Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(
                    epoch_num, float(classification_loss),
                    float(regression_loss), np.mean(loss_hist))
                progress_bar(iter_num, iter_per_epoch, msg)
                # print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))
                # break
                # if iter_num==0:
                # 	break
                if iter_num % 500 == 0:
                    save_model(retinanet, optimizer, best_saved_model_name,
                               epoch_num + 1, epoch_num)
                if False:
                    retinanet.eval()
                    test_data = get_test_loader_for_upload(1)

                    # coco_eval.evaluate_wider_pedestrian_for_upload(epoch_num, test_data, retinanet, retinanet_sk)

                    new_map = coco_eval.evaluate_wider_pedestrian(
                        epoch_num, dataset_val, retinanet, retinanet_sk,
                        threshold)  # to validate

                    epoch_saved_model_name = "checkpoint/resnet{}_{}_epoch_{}.pth".format(
                        parser.depth, parser.dataset, epoch_num)
                    save_model(retinanet, optimizer, epoch_saved_model_name,
                               new_map, epoch_num)
                    # print("\nepoch:{}, validation average precision score:{}".format(epoch_num, new_map))
                    if new_map == None:
                        continue
                    writer.add_scalar('validation mAP', new_map, epoch_num)
                    scheduler.step(np.mean(epoch_loss))

                    if new_map > best_mAP:
                        print(
                            "Found new best model with mAP:{:.7f}, over {:.7f}"
                            .format(new_map, best_mAP))
                        save_model(retinanet, optimizer, best_saved_model_name,
                                   new_map, epoch_num)
                        best_mAP = new_map
                        coco_eval.evaluate_wider_pedestrian_for_upload(
                            parser.depth, epoch_num, test_data, retinanet,
                            retinanet_sk)
                    retinanet.train()

            except Exception as e:
                print(e)

        if parser.dataset == 'coco':

            print('\n==>Evaluating dataset')
            coco_eval.evaluate_coco(dataset_val, retinanet, threshold=0.2)
            save_model(retinanet, optimizer, best_saved_model_name, 0.5,
                       epoch_num)
            continue
        elif parser.dataset == 'wider_pedestrain':
            for threshold in range(16, 90, 10):
                threshold = threshold / 100
                test_data = get_test_loader_for_upload(1)

                # coco_eval.evaluate_wider_pedestrian_for_upload(epoch_num, test_data, retinanet, retinanet_sk)

                new_map = coco_eval.evaluate_wider_pedestrian(
                    epoch_num, dataset_val, retinanet, retinanet_sk,
                    threshold)  # to validate

                epoch_saved_model_name = "checkpoint/resnet{}_{}_epoch_{}.pth".format(
                    parser.depth, parser.dataset, epoch_num)
                save_model(retinanet, optimizer, epoch_saved_model_name,
                           new_map, epoch_num)
                # print("\nepoch:{}, validation average precision score:{}".format(epoch_num, new_map))
                if new_map == None:
                    continue
                writer.add_scalar('validation mAP', new_map, epoch_num)
                scheduler.step(np.mean(epoch_loss))

                # if new_map>best_mAP:
                print(
                    "Found new best model with mAP:{:.7f}, over {:.7f}".format(
                        new_map, best_mAP))
                save_model(retinanet, optimizer, best_saved_model_name,
                           new_map, epoch_num)
                best_mAP = new_map
                coco_eval.evaluate_wider_pedestrian_for_upload(
                    epoch_num, test_data, retinanet, retinanet_sk, threshold,
                    new_map)

        retinanet.train()
Exemple #19
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument(
        '--wider_train',
        help='Path to file containing WIDER training annotations (see readme)')
    parser.add_argument(
        '--wider_val',
        help=
        'Path to file containing WIDER validation annotations (optional, see readme)'
    )
    parser.add_argument('--wider_train_prefix',
                        help='Prefix path to WIDER train images')
    parser.add_argument('--wider_val_prefix',
                        help='Prefix path to WIDER validation images')

    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=50)
    parser.add_argument('--batch_size',
                        help='Batch size (default 2)',
                        type=int,
                        default=2)

    parser.add_argument('--model_name', help='Name of the model to save')
    parser.add_argument('--parallel',
                        help='Run training with DataParallel',
                        dest='parallel',
                        default=False,
                        action='store_true')
    parser.add_argument('--pretrained',
                        help='Pretrained model name in weight directory')

    parser = parser.parse_args(args)
    create_dirs()

    # Create the data loaders
    if parser.wider_train is None:
        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Resizer(),
                                        Augmenter(),
                                        Normalizer()]))
    else:
        dataset_train = WIDERDataset(train_file=parser.wider_train,
                                     img_prefix=parser.wider_train_prefix,
                                     transform=transforms.Compose([
                                         Resizer(),
                                         Augmenter(),
                                         Normalizer()
                                     ]))

    if parser.wider_val is None:
        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            print('Loading CSV validation dataset')
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Resizer(), Normalizer()]))
    else:
        print('Loading WIDER validation dataset')
        dataset_val = WIDERDataset(train_file=parser.wider_val,
                                   img_prefix=parser.wider_val_prefix,
                                   transform=transforms.Compose(
                                       [Resizer(), Normalizer()]))

    print('Loading training dataset')
    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=parser.batch_size,
                                      drop_last=False)
    if parser.parallel:
        dataloader_train = DataLoader(dataset_train,
                                      num_workers=16,
                                      collate_fn=collater,
                                      batch_sampler=sampler)
    else:
        dataloader_train = DataLoader(dataset_train,
                                      collate_fn=collater,
                                      batch_sampler=sampler)

    # Create the model_pose_level_attention
    if parser.depth == 18:
        retinanet = resnet18(num_classes=dataset_train.num_classes())
    elif parser.depth == 34:
        retinanet = resnet34(num_classes=dataset_train.num_classes())
    elif parser.depth == 50:
        retinanet = resnet50(num_classes=dataset_train.num_classes())
    elif parser.depth == 101:
        retinanet = resnet101(num_classes=dataset_train.num_classes())
    elif parser.depth == 152:
        retinanet = resnet152(num_classes=dataset_train.num_classes())
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    if ckpt:
        retinanet = torch.load('')
        print('Loading checkpoint')
    else:
        print('Loading pretrained model')
        retinanet_dict = retinanet.state_dict()
        if parser.pretrained is None:
            pretrained_dict = model_zoo.load_url(model_urls['resnet' +
                                                            str(parser.depth)])
        else:
            pretrained_dict = torch.load('./weight/' + parser.pretrained)
        pretrained_dict = {
            k: v
            for k, v in pretrained_dict.items() if k in retinanet_dict
        }
        retinanet_dict.update(pretrained_dict)
        retinanet.load_state_dict(retinanet_dict)
        print('load pretrained backbone')

    print(retinanet)
    retinanet = torch.nn.DataParallel(retinanet, device_ids=[0])
    if is_cuda:
        retinanet.cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
    # optimizer = optim.SGD(retinanet.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-4)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)
    # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    if parser.parallel:
        retinanet.module.freeze_bn()
    else:
        retinanet.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))
    f_map = open('./mAP_txt/' + parser.model_name + '.txt', 'a')
    writer = SummaryWriter(log_dir='./summary')
    iters = 0
    for epoch_num in range(0, parser.epochs):

        retinanet.train()
        if parser.parallel:
            retinanet.module.freeze_bn()
        else:
            retinanet.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):

            iters += 1

            optimizer.zero_grad()

            img_data = data['img'].float()
            annot_data = data['annot']
            if is_cuda:
                img_data = img_data.cuda()
                annot_data = annot_data.cuda()

            classification_loss, regression_loss, mask_loss = retinanet(
                [img_data, annot_data])

            classification_loss = classification_loss.mean()
            regression_loss = regression_loss.mean()
            mask_loss = mask_loss.mean()

            loss = classification_loss + regression_loss + mask_loss

            if bool(loss == 0):
                continue

            loss.backward()

            torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

            optimizer.step()

            loss_hist.append(float(loss))

            epoch_loss.append(float(loss))

            print(
                'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | '
                'mask_loss {:1.5f} | Running loss: {:1.5f}'.format(
                    epoch_num, iter_num, float(classification_loss),
                    float(regression_loss), float(mask_loss),
                    np.mean(loss_hist)))

            writer.add_scalar('classification_loss',
                              float(classification_loss), iters)
            writer.add_scalar('regression_loss', float(regression_loss), iters)
            writer.add_scalar('loss', float(loss), iters)

            del classification_loss
            del regression_loss
            del loss

        if parser.wider_val is not None:
            print('Evaluating dataset')

            mAP = evaluate(dataset_val, retinanet, is_cuda=is_cuda)
            f_map.write('mAP:{}, epoch:{}'.format(mAP[0][0], epoch_num))
            f_map.write('\n')

        scheduler.step(np.mean(epoch_loss))

        if parser.parallel:
            torch.save(
                retinanet.module,
                './ckpt/' + parser.model_name + '_{}.pt'.format(epoch_num))
        else:
            torch.save(
                retinanet,
                './ckpt/' + parser.model_name + '_{}.pt'.format(epoch_num))

    retinanet.eval()

    writer.export_scalars_to_json(
        "./summary/' + parser.pretrained + 'all_scalars.json")
    f_map.close()
    writer.close()
Exemple #20
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

    #	dataset_train = CocoDataset(parser.coco_path, set_name='trainval35k', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val5k',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    #sampler = AspectRatioBasedSampler(dataset_train, batch_size=16, drop_last=False)
    #dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_val.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_val.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_val.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_val.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_val.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True
    retinanet.load_state_dict(
        torch.load("coco_resnet_50_map_0_335_state_dict.pt",
                   encoding='latin1'))
    if use_gpu:
        retinanet = retinanet.cuda()


#	retinanet = torch.nn.DataParallel(retinanet).cuda()

#retinanet.training = True

#optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

#scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)

#loss_hist = collections.deque(maxlen=500)

    retinanet.eval()
    #retinanet.module.freeze_bn()
    #	print('Num training images: {}'.format(len(dataset_train)))

    if parser.dataset == 'coco':

        print('Evaluating dataset')

        coco_eval.evaluate_coco(dataset_val, retinanet)

    elif parser.dataset == 'csv' and parser.csv_val is not None:

        print('Evaluating dataset')

        mAP = csv_eval.evaluate(dataset_val, retinanet)
Exemple #21
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )
    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)
    parser.add_argument('--optimizer',
                        help='[SGD | Adam]',
                        type=str,
                        default='SGD')
    parser.add_argument('--model', help='Path to model (.pt) file.')
    parser = parser.parse_args(args)

    # Create the data loaders
    print("\n[Phase 1]: Creating DataLoader for {} dataset".format(
        parser.dataset))
    if parser.dataset == 'coco':
        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2014',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2014',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':
        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=8,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=8,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=16,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=8,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    print('| Num training images: {}'.format(len(dataset_train)))
    print('| Num test images : {}'.format(len(dataset_val)))

    print("\n[Phase 2]: Preparing RetinaNet Detection Model...")
    use_gpu = torch.cuda.is_available()
    if use_gpu:
        device = torch.device('cuda')
        retinanet = retinanet.to(device)

    retinanet = torch.nn.DataParallel(retinanet,
                                      device_ids=range(
                                          torch.cuda.device_count()))
    print("| Using %d GPUs for Train/Validation!" % torch.cuda.device_count())
    retinanet.training = True

    if parser.optimizer == 'Adam':
        optimizer = optim.Adam(retinanet.parameters(),
                               lr=1e-5)  # not mentioned
        print("| Adam Optimizer with Learning Rate = {}".format(1e-5))
    elif parser.optimizer == 'SGD':
        optimizer = optim.SGD(retinanet.parameters(),
                              lr=1e-2,
                              momentum=0.9,
                              weight_decay=1e-4)
        print("| SGD Optimizer with Learning Rate = {}".format(1e-2))
    else:
        raise ValueError('Unsupported Optimizer, must be one of [SGD | Adam]')

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)
    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn(
    )  # Freeze the BN parameters to ImageNet configuration

    # Check if there is a 'checkpoints' path
    if not osp.exists('./checkpoints/'):
        os.makedirs('./checkpoints/')

    print("\n[Phase 3]: Training Model on {} dataset...".format(
        parser.dataset))
    for epoch_num in range(parser.epochs):
        epoch_loss = []
        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()
                classification_loss, regression_loss = retinanet(
                    [data['img'].to(device), data['annot']])
                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                loss = classification_loss + regression_loss
                if bool(loss == 0):
                    continue

                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.001)
                optimizer.step()
                loss_hist.append(float(loss))
                epoch_loss.append(float(loss))

                sys.stdout.write('\r')
                sys.stdout.write(
                    '| Epoch: {} | Iteration: {}/{} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num + 1, iter_num + 1, len(dataloader_train),
                            float(classification_loss), float(regression_loss),
                            np.mean(loss_hist)))
                sys.stdout.flush()

                del classification_loss
                del regression_loss

            except Exception as e:
                print(e)
                continue

        print("\n| Saving current best model at epoch {}...".format(epoch_num +
                                                                    1))
        torch.save(
            retinanet.state_dict(),
            './checkpoints/{}_retinanet_{}.pt'.format(parser.dataset,
                                                      epoch_num + 1))

        if parser.dataset == 'coco':
            #print('Evaluating dataset')
            coco_eval.evaluate_coco(dataset_val, retinanet, device)

        elif parser.dataset == 'csv' and parser.csv_val is not None:
            #print('Evaluating dataset')
            mAP = csv_eval.evaluate(dataset_val, retinanet, device)

        scheduler.step(np.mean(epoch_loss))

    retinanet.eval()
    torch.save(retinanet.state_dict(), './checkpoints/model_final.pt')
from example import eval, model

if __name__ == '__main__':

    data_type = 'coco'
    data_root_dir = '/home/ubuntu/data'
    # model_depth = 50
    epoch_max = 100
    batch_size = 1

    dataset_train = CocoDataset(data_root_dir,
                                set_name='train2017',
                                transform=transforms.Compose(
                                    [Normalizer(),
                                     Augmenter(),
                                     Resizer()]))
    dataset_val = CocoDataset(data_root_dir,
                              set_name='val2017',
                              transform=transforms.Compose(
                                  [Normalizer(), Resizer()]))

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=batch_size,
                                      drop_last=True)
    train_data_loader = DataLoader(dataset_train,
                                   num_workers=8,
                                   collate_fn=collater,
                                   batch_sampler=sampler)
    sampler_val = AspectRatioBasedSampler(dataset_val,
                                          batch_size=batch_size,
Exemple #23
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--coco_path',
                        help='Path to COCO directory',
                        type=str,
                        default='./data/coco')
    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--checkpoint',
                        help='The path to the checkpoint.',
                        type=str,
                        default=None)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)
    parser.add_argument('--batch_size',
                        help='Number of batch',
                        type=int,
                        default=16)
    parser.add_argument('--gpu_ids',
                        help='Gpu parallel',
                        type=str,
                        default='1, 2')

    parser = parser.parse_args(args)

    # Create the data lodaders
    dataset_train = CocoDataset(parser.coco_path,
                                set_name='train2017',
                                transform=transforms.Compose(
                                    [Normalizer(),
                                     Augmenter(),
                                     Resizer()]))
    dataset_val = CocoDataset(parser.coco_path,
                              set_name='val2017',
                              transform=transforms.Compose(
                                  [Normalizer(), Resizer()]))

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=4,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=16,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    sampler_val = AspectRatioBasedSampler(dataset_val,
                                          batch_size=1,
                                          drop_last=False)
    dataloader_val = DataLoader(dataset_val,
                                num_workers=3,
                                collate_fn=collater,
                                batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()
    gpu_ids = parser.gpu_ids.split(',')
    device = torch.device("cuda:" + gpu_ids[0])
    torch.cuda.set_device(device)
    gpu_ids = list(map(int, gpu_ids))
    retinanet = torch.nn.DataParallel(retinanet, device_ids=gpu_ids).to(device)

    if parser.checkpoint:
        pretrained = torch.load(parser.checkpoint).state_dict()
        retinanet.module.load_state_dict(pretrained)

    # add tensorboard to record train log
    retinanet.training = True
    writer = SummaryWriter('./log')
    # writer.add_graph(retinanet, input_to_model=[images, labels])

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data['img'].to(device), data['ann'].to(device)])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))
                writer.add_scalar('Loss/train', loss, iter_num)
                writer.add_scalar('Loss/reg_loss', regression_loss, iter_num)
                writer.add_scalar('Loss/cls_loss', classification_loss,
                                  iter_num)

                epoch_loss.append(float(loss))

                if (iter_num + 1) % 1000 == 0:
                    print('Save model')
                    torch.save(
                        retinanet.module,
                        'COCO_retinanet_epoch{}_iter{}.pt'.format(
                            epoch_num, iter_num))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        print('Evaluating dataset')

        coco_eval.evaluate_coco(dataset_val, retinanet, writer)

        scheduler.step(np.mean(epoch_loss))

        torch.save(retinanet.module, 'COCO_retinanet_{}.pt'.format(epoch_num))

    retinanet.eval()

    torch.save(retinanet, 'model_final.pt'.format(epoch_num))
Exemple #24
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        default="csv",
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        default="./data/train_only.csv",
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        default="./data/classes.csv",
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        default="./data/train_only.csv",
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )
    parser.add_argument('--voc_train',
                        default="./data/voc_train",
                        help='Path to containing images and annAnnotations')
    parser.add_argument('--voc_val',
                        default="./data/bov_train",
                        help='Path to containing images and annAnnotations')
    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=101)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=40)

    parser = parser.parse_args(args)
    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))
    elif parser.dataset == 'voc':
        if parser.voc_train is None:
            raise ValueError(
                'Must provide --voc_train when training on PASCAL VOC,')
        dataset_train = XML_VOCDataset(
            img_path=parser.voc_train + 'JPEGImages/',
            xml_path=parser.voc_train + 'Annotations/',
            class_list=class_list,
            transform=transforms.Compose(
                [Normalizer(), Augmenter(),
                 ResizerMultiScale()]))

        if parser.voc_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = XML_VOCDataset(
                img_path=parser.voc_val + 'JPEGImages/',
                xml_path=parser.voc_val + 'Annotations/',
                class_list=class_list,
                transform=transforms.Compose([Normalizer(),
                                              Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=1,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=2,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=2,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-4)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=15,
                                                     verbose=True,
                                                     mode="max")
    #scheduler = optim.lr_scheduler.StepLR(optimizer,8)
    loss_hist = collections.deque(maxlen=1024)

    retinanet.train()
    retinanet.module.freeze_bn()
    if not os.path.exists("./logs"):
        os.mkdir("./logs")
    log_file = open("./logs/log.txt", "w")
    print('Num training images: {}'.format(len(dataset_train)))
    best_map = 0
    print("Training models...")
    for epoch_num in range(parser.epochs):

        #scheduler.step(epoch_num)
        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            #print('iter num is: ', iter_num)
            try:
                #print(csv_eval.evaluate(dataset_val[:20], retinanet)[0])
                #print(type(csv_eval.evaluate(dataset_val, retinanet)))
                #print('iter num is: ', iter_num % 10 == 0)
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss
                #print(loss)

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))
                if iter_num % 50 == 0:
                    print(
                        'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                        .format(epoch_num, iter_num,
                                float(classification_loss),
                                float(regression_loss), np.mean(loss_hist)))
                    log_file.write(
                        'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f} \n'
                        .format(epoch_num, iter_num,
                                float(classification_loss),
                                float(regression_loss), np.mean(loss_hist)))
                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)
        elif parser.dataset == 'voc' and parser.voc_val is not None:

            print('Evaluating dataset')

            mAP = voc_eval.evaluate(dataset_val, retinanet)

        try:
            is_best_map = mAP[0][0] > best_map
            best_map = max(mAP[0][0], best_map)
        except:
            pass
        if is_best_map:
            print("Get better map: ", best_map)

            torch.save(retinanet.module,
                       './logs/{}_scale15_{}.pt'.format(epoch_num, best_map))
            shutil.copyfile(
                './logs/{}_scale15_{}.pt'.format(epoch_num, best_map),
                "./best_models/model.pt")
        else:
            print("Current map: ", best_map)
        scheduler.step(best_map)
    retinanet.eval()

    torch.save(retinanet, './logs/model_final.pt')
Exemple #25
0
def main(args=None):
#def main(epoch):
	parser     = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

	parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
	parser.add_argument('--coco_path', help='Path to COCO directory')
	parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)')
	parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
	parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')

	parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
	parser.add_argument('--epochs', help='Number of epochs', type=int, default=100)

	#parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint')
	parser.add_argument('--start-epoch', default=0, type=int, help='manual epoch number (useful on restarts)')

	parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)')

	parser = parser.parse_args(args)
	#args = parser.parse_args()        
	#parser = parser.parse_args(epoch)

	# Create the data loaders
	if parser.dataset == 'coco':

		if parser.coco_path is None:
			raise ValueError('Must provide --coco_path when training on COCO,')

		dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
		dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()]))

	elif parser.dataset == 'csv':

		if parser.csv_train is None:
			raise ValueError('Must provide --csv_train when training on COCO,')

		if parser.csv_classes is None:
			raise ValueError('Must provide --csv_classes when training on COCO,')


		dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))

		if parser.csv_val is None:
			dataset_val = None
			print('No validation annotations provided.')
		else:
			dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))

	else:
		raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

	sampler = AspectRatioBasedSampler(dataset_train, batch_size=4, drop_last=False)
	dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)

	if dataset_val is not None:
		sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
		dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)

	# Create the model
	if parser.depth == 18:
		retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
	elif parser.depth == 34:
		retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
	elif parser.depth == 50:
		retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
	elif parser.depth == 101:
		retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
	elif parser.depth == 152:
		retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
	else:
		raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')		

	use_gpu = True

	if use_gpu:
		retinanet = retinanet.cuda()

	#retinanet().load_state_dict(torch.load('/users/wenchi/ghwwc/Pytorch-retinanet-master/resnet50-19c8e357.pth'))
       
	#if True:
           #print('==> Resuming from checkpoint..')
           #checkpoint = torch.load('/users/wenchi/ghwwc/Pytorch-retinanet-master/coco_retinanet_2.pt')
           #retinanet().load_state_dict(checkpoint)
           #best_loss = checkpoint['loss']
           #start_epoch = checkpoint['epoch']
        
	
	retinanet = torch.nn.DataParallel(retinanet).cuda()

	retinanet.training = True

	#optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
	optimizer = optim.SGD(retinanet.parameters(), lr=1e-5)

	scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)

	loss_hist = collections.deque(maxlen=500)

	retinanet.train()
	#retinanet.freeze_bn()               #for train from a middle state
	retinanet.module.freeze_bn()       #for train from the very beginning

	print('Num training images: {}'.format(len(dataset_train)))

	for epoch_num in range(parser.start_epoch, parser.epochs):

		if parser.resume:
		    if os.path.isfile(parser.resume):
                        print("=>loading checkpoint '{}'".format(parser.resume))
                        checkpoint = torch.load(parser.resume)
                        print(parser.start_epoch)
                        #parser.start_epoch = checkpoint['epoch']
                        #retinanet.load_state_dict(checkpoint['state_dict'])
                        retinanet=checkpoint
                        #retinanet.load_state_dict(checkpoint)
                        print(retinanet)
                        #optimizer.load_state_dict(checkpoint)
                        print("=> loaded checkpoint '{}' (epoch {})".format(parser.resume, checkpoint))
		    else:
                        print("=> no checkpoint found at '{}'".format(parser.resume))

		retinanet.train()
		retinanet.freeze_bn()
		#retinanet.module.freeze_bn()

		if parser.dataset == 'coco':

			print('Evaluating dataset')

			coco_eval.evaluate_coco(dataset_val, retinanet)

		elif parser.dataset == 'csv' and parser.csv_val is not None:

			print('Evaluating dataset')

			mAP = csv_eval.evaluate(dataset_val, retinanet)
		
		epoch_loss = []
		
		for iter_num, data in enumerate(dataloader_train):
			try:
				optimizer.zero_grad()

				classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot'].cuda()])

				classification_loss = classification_loss.mean()
				regression_loss = regression_loss.mean()

				loss = classification_loss + regression_loss
				
				if bool(loss == 0):
					continue

				loss.backward()

				torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

				optimizer.step()

				loss_hist.append(float(loss))

				epoch_loss.append(float(loss))

				print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))
				
				del classification_loss
				del regression_loss
			except Exception as e:
				print(e)
				continue

		if parser.dataset == 'coco':

			print('Evaluating dataset')

			coco_eval.evaluate_coco(dataset_val, retinanet)

		elif parser.dataset == 'csv' and parser.csv_val is not None:

			print('Evaluating dataset')

			mAP = csv_eval.evaluate(dataset_val, retinanet)

		
		scheduler.step(np.mean(epoch_loss))	

		#torch.save(retinanet.module, '{}_retinanet_101_{}.pt'.format(parser.dataset, epoch_num))
		torch.save(retinanet, '{}_retinanet_dilation_experiment1_{}.pt'.format(parser.dataset, epoch_num))
		name = '{}_retinanet_dilation_experiment1_{}.pt'.format(parser.dataset, epoch_num)
		parser.resume = '/users/wenchi/ghwwc/pytorch-retinanet-master_new/name'

	retinanet.eval()

	torch.save(retinanet, 'model_final_dilation_experiment1.pt'.format(epoch_num))
def main(args=None):

    parser     = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',default="csv", help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path',default="/home/mayank-s/PycharmProjects/Datasets/coco",help='Path to COCO directory')
    parser.add_argument('--csv_train',default="berkely_ready_to_train_for_retinanet_pytorch.csv", help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',default="berkely_class.csv", help='Path to file containing class list (see readme)')
    parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')

    parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
    parser.add_argument('--epochs', help='Number of epochs', type=int, default=200)
    # parser.add_argument('--resume', default=0, help='resume from checkpoint')
    parser = parser.parse_args(args)
    # print(args.resume)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path, set_name='train2014', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
        dataset_val = CocoDataset(parser.coco_path, set_name='val2014', transform=transforms.Compose([Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError('Must provide --csv_classes when training on COCO,')


        dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))

    else:
        raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train, batch_size=4, drop_last=False)
    dataloader_train = DataLoader(dataset_train, num_workers=0, collate_fn=collater, batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
        dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
    else:
        raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    # if use_gpu:
    if torch.cuda.is_available():
        retinanet = retinanet.cuda()

        retinanet = torch.nn.DataParallel(retinanet).cuda()

        retinanet.training = True
    ###################################################################################3
    # # args.resume=0
    # Resume_model = False
    # start_epoch=0
    # if Resume_model:
    #     print('==> Resuming from checkpoint..')
    #     checkpoint = torch.load('./checkpoint/saved_with_epochs/retina_fpn_1')
    #     retinanet.load_state_dict(checkpoint['net'])
    #     best_loss = checkpoint['loss']
    #     start_epoch = checkpoint['epoch']
    #     print('Resuming from epoch:{ep}  loss:{lp}'.format(ep=start_epoch, lp=best_loss))
    #####################################################################################
    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    retinanet = torch.load("./checkpoint/retina_fpn_1")

    # epoch_num=start_epoch
    for epoch_num in range(parser.epochs):

        # retinanet.train()retina_fpn_1
        # retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()
                if torch.cuda.is_available():
                    classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']])
                else:
                    classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        # print("Saving model...")
        # name = "./checkpoint/retina_fpn_" + str(epoch_num)
        # torch.save(retinanet, name)
        # ###################################################################333
        print('Saving..')
        state = {
            'net': retinanet.module.state_dict(),
            'loss': loss_hist,
            'epoch': epoch_num,
        }
        if not os.path.isdir('checkpoint/saved_with_epochs'):
            os.mkdir('checkpoint/saved_with_epochs')
        # checkpoint_path="./checkpoint/Ckpt_"+
        name = "./checkpoint/saved_with_epochs/retina_fpn_" + str(epoch_num)
        torch.save(state, name)
        # torch.save(state, './checkpoint/retinanet.pth')
        #####################################################################

        '''if parser.dataset == 'coco':
Exemple #27
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.',
                        default="csv")
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)',
                        default="binary_class.csv")
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=18)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=500)
    parser.add_argument('--epochs_only_det',
                        help='Number of epochs to train detection part',
                        type=int,
                        default=1)
    parser.add_argument('--max_epochs_no_improvement',
                        help='Max epochs without improvement',
                        type=int,
                        default=100)
    parser.add_argument('--pretrained_model',
                        help='Path of .pt file with pretrained model',
                        default='esposallescsv_retinanet_0.pt')
    parser.add_argument('--model_out',
                        help='Path of .pt file with trained model to save',
                        default='trained')

    parser.add_argument('--score_threshold',
                        help='Score above which boxes are kept',
                        type=float,
                        default=0.5)
    parser.add_argument('--nms_threshold',
                        help='Score above which boxes are kept',
                        type=float,
                        default=0.2)
    parser.add_argument('--max_boxes',
                        help='Max boxes to be fed to recognition',
                        default=95)
    parser.add_argument('--seg_level',
                        help='[line, word], to choose anchor aspect ratio',
                        default='word')
    parser.add_argument(
        '--early_stop_crit',
        help='Early stop criterion, detection (map) or transcription (cer)',
        default='cer')
    parser.add_argument('--max_iters_epoch',
                        help='Max steps per epoch (for debugging)',
                        default=1000000)
    parser.add_argument('--train_htr',
                        help='Train recognition or not',
                        default='True')
    parser.add_argument('--train_det',
                        help='Train detection or not',
                        default='True')
    parser.add_argument(
        '--binary_classifier',
        help=
        'Wether to use classification branch as binary or not, multiclass instead.',
        default='False')
    parser.add_argument(
        '--htr_gt_box',
        help='Train recognition branch with box gt (for debugging)',
        default='False')
    parser.add_argument(
        '--ner_branch',
        help='Train named entity recognition with separate branch',
        default='False')

    parser = parser.parse_args(args)

    if parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train')

        dataset_name = parser.csv_train.split("/")[-2]

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    # Files for training log

    experiment_id = str(time.time()).split('.')[0]
    valid_cer_f = open('trained_models/' + parser.model_out + 'log.txt', 'w')
    for arg in vars(parser):
        if getattr(parser, arg) is not None:
            valid_cer_f.write(
                str(arg) + ' ' + str(getattr(parser, arg)) + '\n')

    current_commit = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
    valid_cer_f.write(str(current_commit))

    valid_cer_f.write(
        "epoch_num   cer     best cer     mAP    best mAP     time\n")

    valid_cer_f.close()

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=1,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=0,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    if not os.path.exists('trained_models'):
        os.mkdir('trained_models')

    # Create the model

    train_htr = parser.train_htr == 'True'
    htr_gt_box = parser.htr_gt_box == 'True'
    ner_branch = parser.ner_branch == 'True'
    binary_classifier = parser.binary_classifier == 'True'
    torch.backends.cudnn.benchmark = False

    alphabet = dataset_train.alphabet
    if os.path.exists(parser.pretrained_model):
        retinanet = torch.load(parser.pretrained_model)
        retinanet.classificationModel = ClassificationModel(
            num_features_in=256,
            num_anchors=retinanet.anchors.num_anchors,
            num_classes=dataset_train.num_classes())
        if ner_branch:
            retinanet.nerModel = NERModel(
                feature_size=256,
                pool_h=retinanet.pool_h,
                n_classes=dataset_train.num_classes(),
                pool_w=retinanet.pool_w)
    else:
        if parser.depth == 18:
            retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                       pretrained=True,
                                       max_boxes=int(parser.max_boxes),
                                       score_threshold=float(
                                           parser.score_threshold),
                                       seg_level=parser.seg_level,
                                       alphabet=alphabet,
                                       train_htr=train_htr,
                                       htr_gt_box=htr_gt_box,
                                       ner_branch=ner_branch,
                                       binary_classifier=binary_classifier)

        elif parser.depth == 34:

            retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                       pretrained=True,
                                       max_boxes=int(parser.max_boxes),
                                       score_threshold=float(
                                           parser.score_threshold),
                                       seg_level=parser.seg_level,
                                       alphabet=alphabet,
                                       train_htr=train_htr,
                                       htr_gt_box=htr_gt_box)

        elif parser.depth == 50:
            retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                       pretrained=True)
        elif parser.depth == 101:
            retinanet = model.resnet101(
                num_classes=dataset_train.num_classes(), pretrained=True)
        elif parser.depth == 152:
            retinanet = model.resnet152(
                num_classes=dataset_train.num_classes(), pretrained=True)
        else:
            raise ValueError(
                'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True
    train_htr = parser.train_htr == 'True'
    train_det = parser.train_det == 'True'
    retinanet.htr_gt_box = parser.htr_gt_box == 'True'

    retinanet.train_htr = train_htr
    retinanet.epochs_only_det = parser.epochs_only_det

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-4)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=50,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)
    ctc = CTCLoss()
    retinanet.train()
    retinanet.module.freeze_bn()

    best_cer = 1000
    best_map = 0
    epochs_no_improvement = 0
    verbose_each = 20
    optimize_each = 1
    objective = 100
    best_objective = 10000

    print(('Num training images: {}'.format(len(dataset_train))))

    for epoch_num in range(parser.epochs):
        cers = []

        retinanet.training = True

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            if iter_num > int(parser.max_iters_epoch): break
            try:
                if iter_num % optimize_each == 0:
                    optimizer.zero_grad()
                (classification_loss, regression_loss, ctc_loss,
                 ner_loss) = retinanet([
                     data['img'].cuda().float(), data['annot'], ctc, epoch_num
                 ])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                if train_det:

                    if train_htr:
                        loss = ctc_loss + classification_loss + regression_loss + ner_loss

                    else:
                        loss = classification_loss + regression_loss + ner_loss

                elif train_htr:
                    loss = ctc_loss

                else:
                    continue
                if bool(loss == 0):
                    continue
                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
                if iter_num % verbose_each == 0:
                    print((
                        'Epoch: {} | Step: {} |Classification loss: {:1.5f} | Regression loss: {:1.5f} | CTC loss: {:1.5f} | NER loss: {:1.5f} | Running loss: {:1.5f} | Total loss: {:1.5f}\r'
                        .format(epoch_num, iter_num,
                                float(classification_loss),
                                float(regression_loss), float(ctc_loss),
                                float(ner_loss), np.mean(loss_hist),
                                float(loss), "\r")))

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))
                torch.cuda.empty_cache()

            except Exception as e:
                print(e)
                continue
        if parser.dataset == 'csv' and parser.csv_val is not None and train_det:

            print('Evaluating dataset')

            mAP, text_mAP, current_cer = csv_eval.evaluate(
                dataset_val, retinanet, score_threshold=parser.score_threshold)
            #text_mAP,_ = csv_eval_binary_map.evaluate(dataset_val, retinanet,score_threshold=parser.score_threshold)
            objective = current_cer * (1 - mAP)

        retinanet.eval()
        retinanet.training = False
        retinanet.score_threshold = float(parser.score_threshold)
        '''for idx,data in enumerate(dataloader_val):
            if idx>int(parser.max_iters_epoch): break
            print("Eval CER on validation set:",idx,"/",len(dataset_val),"\r")
            image_name = dataset_val.image_names[idx].split('/')[-1].split('.')[-2]

            #generate_pagexml(image_name,data,retinanet,parser.score_threshold,parser.nms_threshold,dataset_val)
            text_gt =".".join(dataset_val.image_names[idx].split('.')[:-1])+'.txt'
            f =open(text_gt,'r')
            text_gt_lines=f.readlines()[0]
            transcript_pred = get_transcript(image_name,data,retinanet,float(parser.score_threshold),float(parser.nms_threshold),dataset_val,alphabet)
            cers.append(float(editdistance.eval(transcript_pred,text_gt_lines))/len(text_gt_lines))'''

        t = str(time.time()).split('.')[0]

        valid_cer_f.close()
        #print("GT",text_gt_lines)
        #print("PREDS SAMPLE:",transcript_pred)

        if parser.early_stop_crit == 'cer':

            if float(objective) < float(
                    best_objective):  #float(current_cer)<float(best_cer):
                best_cer = current_cer
                best_objective = objective

                epochs_no_improvement = 0
                torch.save(
                    retinanet.module, 'trained_models/' + parser.model_out +
                    '{}_retinanet.pt'.format(parser.dataset))

            else:
                epochs_no_improvement += 1
            if mAP > best_map:
                best_map = mAP
        elif parser.early_stop_crit == 'map':
            if mAP > best_map:
                best_map = mAP
                epochs_no_improvement = 0
                torch.save(
                    retinanet.module, 'trained_models/' + parser.model_out +
                    '{}_retinanet.pt'.format(parser.dataset))

            else:
                epochs_no_improvement += 1
            if float(current_cer) < float(best_cer):
                best_cer = current_cer
        if train_det:
            print(epoch_num, "mAP: ", mAP, " best mAP", best_map)
        if train_htr:
            print("VALID CER:", current_cer, "best CER", best_cer)
        print("Epochs no improvement:", epochs_no_improvement)
        valid_cer_f = open('trained_models/' + parser.model_out + 'log.txt',
                           'a')
        valid_cer_f.write(
            str(epoch_num) + " " + str(current_cer) + " " + str(best_cer) +
            ' ' + str(mAP) + ' ' + str(best_map) + ' ' + str(text_mAP) + '\n')
        if epochs_no_improvement > 3:
            for param_group in optimizer.param_groups:
                if param_group['lr'] > 10e-5:
                    param_group['lr'] *= 0.1

        if epochs_no_improvement >= parser.max_epochs_no_improvement:
            print("TRAINING FINISHED AT EPOCH", epoch_num, ".")
            sys.exit()

        scheduler.step(np.mean(epoch_loss))
        torch.cuda.empty_cache()

    retinanet.eval()
Exemple #28
0
def get_dataloaders(dataset,
                    batch,
                    dataroot,
                    resize=608,
                    split=0.15,
                    split_idx=0,
                    multinode=False,
                    target_lb=-1):
    multilabel = False
    detection = False
    if 'coco' in dataset:
        transform_train = transforms.Compose(
            [Normalizer(), Augmenter(),
             Resizer(min_side=resize)])
        transform_test = transforms.Compose(
            [Normalizer(), Resizer(min_side=resize)])

        multilabel = True
        detection = True
    elif 'cifar' in dataset or 'svhn' in dataset:
        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(_CIFAR_MEAN, _CIFAR_STD),
        ])
        transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(_CIFAR_MEAN, _CIFAR_STD),
        ])
    elif 'imagenet' in dataset:
        input_size = 224
        sized_size = 256

        if 'efficientnet' in C.get()['model']:
            input_size = EfficientNet.get_image_size(C.get()['model'])
            sized_size = input_size + 32  # TODO
            # sized_size = int(round(input_size / 224. * 256))
            # sized_size = input_size
            logger.info('size changed to %d/%d.' % (input_size, sized_size))

        transform_train = transforms.Compose([
            EfficientNetRandomCrop(input_size),
            transforms.Resize((input_size, input_size),
                              interpolation=Image.BICUBIC),
            # transforms.RandomResizedCrop(input_size, scale=(0.1, 1.0), interpolation=Image.BICUBIC),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(
                brightness=0.4,
                contrast=0.4,
                saturation=0.4,
            ),
            transforms.ToTensor(),
            Lighting(0.1, _IMAGENET_PCA['eigval'], _IMAGENET_PCA['eigvec']),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

        transform_test = transforms.Compose([
            EfficientNetCenterCrop(input_size),
            transforms.Resize((input_size, input_size),
                              interpolation=Image.BICUBIC),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

    else:
        raise ValueError('dataset=%s' % dataset)

    total_aug = augs = None
    if isinstance(C.get().aug, list):
        logger.debug('augmentation provided.')
        transform_train.transforms.insert(
            0, Augmentation(C.get().aug, detection=detection))
    else:
        logger.debug('augmentation: %s' % C.get().aug)
        if C.get().aug == 'fa_reduced_cifar10':
            transform_train.transforms.insert(
                0, Augmentation(fa_reduced_cifar10()))

        elif C.get().aug == 'fa_reduced_imagenet':
            transform_train.transforms.insert(
                0, Augmentation(fa_resnet50_rimagenet()))

        elif C.get().aug == 'fa_reduced_svhn':
            transform_train.transforms.insert(0,
                                              Augmentation(fa_reduced_svhn()))

        elif C.get().aug == 'arsaug':
            transform_train.transforms.insert(0, Augmentation(arsaug_policy()))
        elif C.get().aug == 'autoaug_cifar10':
            transform_train.transforms.insert(
                0, Augmentation(autoaug_paper_cifar10()))
        elif C.get().aug == 'autoaug_extend':
            transform_train.transforms.insert(0,
                                              Augmentation(autoaug_policy()))
        elif C.get().aug in ['default']:
            pass
        else:
            raise ValueError('not found augmentations. %s' % C.get().aug)

    if C.get()['cutout'] > 0:
        transform_train.transforms.append(CutoutDefault(C.get()['cutout']))

    if dataset == 'coco':
        total_trainset = CocoDataset(dataroot,
                                     set_name='train2017',
                                     transform=transform_train)
        testset = CocoDataset(dataroot,
                              set_name='val2017',
                              transform=transform_test)

    elif dataset == 'cifar10':
        total_trainset = torchvision.datasets.CIFAR10(
            root=dataroot,
            train=True,
            download=True,
            transform=transform_train)
        testset = torchvision.datasets.CIFAR10(root=dataroot,
                                               train=False,
                                               download=True,
                                               transform=transform_test)
    elif dataset == 'reduced_cifar10':
        total_trainset = torchvision.datasets.CIFAR10(
            root=dataroot,
            train=True,
            download=True,
            transform=transform_train)
        sss = StratifiedShuffleSplit(n_splits=1,
                                     test_size=46000,
                                     random_state=0)  # 4000 trainset
        sss = sss.split(list(range(len(total_trainset))),
                        total_trainset.targets)
        train_idx, valid_idx = next(sss)
        targets = [total_trainset.targets[idx] for idx in train_idx]
        total_trainset = Subset(total_trainset, train_idx)
        total_trainset.targets = targets

        testset = torchvision.datasets.CIFAR10(root=dataroot,
                                               train=False,
                                               download=True,
                                               transform=transform_test)
    elif dataset == 'cifar100':
        total_trainset = torchvision.datasets.CIFAR100(
            root=dataroot,
            train=True,
            download=True,
            transform=transform_train)
        testset = torchvision.datasets.CIFAR100(root=dataroot,
                                                train=False,
                                                download=True,
                                                transform=transform_test)
    elif dataset == 'svhn':
        trainset = torchvision.datasets.SVHN(root=dataroot,
                                             split='train',
                                             download=True,
                                             transform=transform_train)
        extraset = torchvision.datasets.SVHN(root=dataroot,
                                             split='extra',
                                             download=True,
                                             transform=transform_train)
        total_trainset = ConcatDataset([trainset, extraset])
        testset = torchvision.datasets.SVHN(root=dataroot,
                                            split='test',
                                            download=True,
                                            transform=transform_test)
    elif dataset == 'reduced_svhn':
        total_trainset = torchvision.datasets.SVHN(root=dataroot,
                                                   split='train',
                                                   download=True,
                                                   transform=transform_train)
        sss = StratifiedShuffleSplit(n_splits=1,
                                     test_size=73257 - 1000,
                                     random_state=0)  # 1000 trainset
        sss = sss.split(list(range(len(total_trainset))),
                        total_trainset.targets)
        train_idx, valid_idx = next(sss)
        targets = [total_trainset.targets[idx] for idx in train_idx]
        total_trainset = Subset(total_trainset, train_idx)
        total_trainset.targets = targets

        testset = torchvision.datasets.SVHN(root=dataroot,
                                            split='test',
                                            download=True,
                                            transform=transform_test)
    elif dataset == 'imagenet':
        total_trainset = ImageNet(root=os.path.join(dataroot,
                                                    'imagenet-pytorch'),
                                  transform=transform_train,
                                  download=True)
        testset = ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'),
                           split='val',
                           transform=transform_test)

        # compatibility
        total_trainset.targets = [lb for _, lb in total_trainset.samples]
    elif dataset == 'reduced_imagenet':
        # randomly chosen indices
        #         idx120 = sorted(random.sample(list(range(1000)), k=120))
        idx120 = [
            16, 23, 52, 57, 76, 93, 95, 96, 99, 121, 122, 128, 148, 172, 181,
            189, 202, 210, 232, 238, 257, 258, 259, 277, 283, 289, 295, 304,
            307, 318, 322, 331, 337, 338, 345, 350, 361, 375, 376, 381, 388,
            399, 401, 408, 424, 431, 432, 440, 447, 462, 464, 472, 483, 497,
            506, 512, 530, 541, 553, 554, 557, 564, 570, 584, 612, 614, 619,
            626, 631, 632, 650, 657, 658, 660, 674, 675, 680, 682, 691, 695,
            699, 711, 734, 736, 741, 754, 757, 764, 769, 770, 780, 781, 787,
            797, 799, 811, 822, 829, 830, 835, 837, 842, 843, 845, 873, 883,
            897, 900, 902, 905, 913, 920, 925, 937, 938, 940, 941, 944, 949,
            959
        ]
        total_trainset = ImageNet(root=os.path.join(dataroot,
                                                    'imagenet-pytorch'),
                                  transform=transform_train)
        testset = ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'),
                           split='val',
                           transform=transform_test)

        # compatibility
        total_trainset.targets = [lb for _, lb in total_trainset.samples]

        sss = StratifiedShuffleSplit(n_splits=1,
                                     test_size=len(total_trainset) - 50000,
                                     random_state=0)  # 4000 trainset
        sss = sss.split(list(range(len(total_trainset))),
                        total_trainset.targets)
        train_idx, valid_idx = next(sss)

        # filter out
        train_idx = list(
            filter(lambda x: total_trainset.labels[x] in idx120, train_idx))
        valid_idx = list(
            filter(lambda x: total_trainset.labels[x] in idx120, valid_idx))
        test_idx = list(
            filter(lambda x: testset.samples[x][1] in idx120,
                   range(len(testset))))

        targets = [
            idx120.index(total_trainset.targets[idx]) for idx in train_idx
        ]
        for idx in range(len(total_trainset.samples)):
            if total_trainset.samples[idx][1] not in idx120:
                continue
            total_trainset.samples[idx] = (total_trainset.samples[idx][0],
                                           idx120.index(
                                               total_trainset.samples[idx][1]))
        total_trainset = Subset(total_trainset, train_idx)
        total_trainset.targets = targets

        for idx in range(len(testset.samples)):
            if testset.samples[idx][1] not in idx120:
                continue
            testset.samples[idx] = (testset.samples[idx][0],
                                    idx120.index(testset.samples[idx][1]))
        testset = Subset(testset, test_idx)
        print('reduced_imagenet train=', len(total_trainset))
    else:
        raise ValueError('invalid dataset name=%s' % dataset)

    if total_aug is not None and augs is not None:
        total_trainset.set_preaug(augs, total_aug)
        print('set_preaug-')

    train_sampler = None
    if split > 0.0:
        if multilabel:
            # not sure how important stratified is, especially for val,
            # might want to test with and without and add it for multilabel in the future
            sss = ShuffleSplit(n_splits=5, test_size=split, random_state=0)
            sss = sss.split(list(range(len(total_trainset))))
            for _ in range(split_idx + 1):
                train_idx, valid_idx = next(sss)
        else:
            sss = StratifiedShuffleSplit(n_splits=5,
                                         test_size=split,
                                         random_state=0)
            sss = sss.split(list(range(len(total_trainset))),
                            total_trainset.targets)
            for _ in range(split_idx + 1):
                train_idx, valid_idx = next(sss)

        if target_lb >= 0:
            train_idx = [
                i for i in train_idx if total_trainset.targets[i] == target_lb
            ]
            valid_idx = [
                i for i in valid_idx if total_trainset.targets[i] == target_lb
            ]

        train_sampler = SubsetRandomSampler(train_idx)
        valid_sampler = SubsetSampler(valid_idx)

        if multinode:
            train_sampler = torch.utils.data.distributed.DistributedSampler(
                Subset(total_trainset, train_idx),
                num_replicas=dist.get_world_size(),
                rank=dist.get_rank())
    else:
        valid_sampler = SubsetSampler([])

        if multinode:
            train_sampler = torch.utils.data.distributed.DistributedSampler(
                total_trainset,
                num_replicas=dist.get_world_size(),
                rank=dist.get_rank())
            logger.info(
                f'----- dataset with DistributedSampler  {dist.get_rank()}/{dist.get_world_size()}'
            )

    trainloader = DataLoader(total_trainset,
                             batch_size=batch,
                             shuffle=True if train_sampler is None else False,
                             num_workers=8,
                             pin_memory=True,
                             sampler=train_sampler,
                             drop_last=True,
                             collate_fn=collater)
    validloader = DataLoader(total_trainset,
                             batch_size=batch,
                             shuffle=False,
                             num_workers=4,
                             pin_memory=True,
                             sampler=valid_sampler,
                             drop_last=False,
                             collate_fn=collater)

    testloader = DataLoader(testset,
                            batch_size=batch,
                            shuffle=False,
                            num_workers=8,
                            pin_memory=True,
                            drop_last=False,
                            collate_fn=collater)

    return train_sampler, trainloader, validloader, testloader
Exemple #29
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=2,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        torch.save(
            retinanet.module,
            '{}_retinanet_dilation_{}.pt'.format(parser.dataset, epoch_num))

    retinanet.eval()

    torch.save(retinanet, 'model_final_dilation.pt'.format(epoch_num))
Exemple #30
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=20)
    parser.add_argument('--resume',
                        '-r',
                        action='store_true',
                        help='resume from checkpoint')
    parser.add_argument('--batch_size',
                        type=int,
                        default=4,
                        help='set the batch size')
    parser.add_argument('--learning_rate',
                        '-lr',
                        type=float,
                        default=1e-3,
                        help='set the learning rate')
    parser = parser.parse_args(args)

    batch_size = parser.batch_size
    learning_rate = parser.learning_rate
    start_epoch = 0
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    best_acc = 0  # best test accuracy

    # mean & std for ROI extraction
    #mean = (0.1146, 0.1147, 0.1148)
    #std = (0.1089, 0.1090, 0.1090)

    # mean & std for QRCode extraction
    mean = (0.2405, 0.2416, 0.2427)
    std = (0.2194, 0.2208, 0.2223)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(
            train_file=parser.csv_train,
            class_list=parser.csv_classes,
            transform=transforms.Compose([
                Normalizer(mean=mean, std=std),
                Augmenter(),
                #YFlipAugmenter(),
                #CropAugmenter(),
                #Rot180Augmenter(),
                Resizer()
            ]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose([
                                         Normalizer(mean=mean, std=std),
                                         Resizer()
                                     ]))
    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=batch_size,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=4,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=4,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.to(device)

    retinanet = torch.nn.DataParallel(retinanet)
    if parser.resume:
        # Load checkpoint
        print("==> Resuming from checkpoint")
        checkpoint = torch.load('./checkpoint/ckpt.pth')
        retinanet.load_state_dict(checkpoint['net'])
        best_acc = checkpoint['acc']
        start_epoch = checkpoint['epoch']
        print('resume training from epoch:', start_epoch, " with accuracy:",
              best_acc)

    retinanet.training = True

    #optimizer = optim.Adam(retinanet.parameters(), lr=1e-3)
    optimizer = optim.SGD(retinanet.parameters(),
                          lr=learning_rate,
                          momentum=0.9)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(start_epoch, start_epoch + parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue
        writer.add_scalar('train_loss', np.mean(epoch_loss), epoch_num)

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val,
                                    retinanet,
                                    iou_threshold=0.7,
                                    max_detections=5,
                                    score_threshold=0.2,
                                    epoch=epoch_num)
            print('mapROI:', mAP[0])
            AP, num_annotations = mAP[0]
            acc = 100. * AP
            if acc > best_acc:
                print('Saving... acc:', acc)
                state = {
                    'net': retinanet.state_dict(),
                    'acc': acc,
                    'epoch': epoch_num,
                }
                if not os.path.isdir('checkpoint'):
                    os.mkdir('checkpoint')
                torch.save(state, './checkpoint/ckpt.pth')
                torch.save(retinanet, './checkpoint/best.pth')
                best_acc = acc
            writer.add_scalar('test_acc', acc, epoch_num)

        scheduler.step(np.mean(epoch_loss))

        #torch.save(retinanet.module, osp.join('checkpoints','{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)))

    retinanet.eval()

    torch.save(retinanet, 'model_final.pt'.format(epoch_num))
    writer.close()