def main():
    args = parser.parse_args()

    with open(args.config) as f:
        config = yaml.load(f)
    config = EasyDict(config)

    config.save_path = os.path.join(os.path.dirname(args.config), 'snapshots')
    if not os.path.exists(config.save_path):
        os.mkdir(config.save_path)
    config.log_path = os.path.join(os.path.dirname(args.config), 'logs')
    if not os.path.exists(config.log_path):
        os.mkdir(config.log_path)
    config.event_path = os.path.join(os.path.dirname(args.config), 'events')
    if not os.path.exists(config.event_path):
        os.mkdir(config.event_path)

    batch_size = config.batch_size
    config.num_iter_flag = batch_size // config.internal_batchsize
    with open(config.label_path, 'r') as j:
        config.label_map = json.load(j)

    config.n_classes = len(
        config.label_map)  # number of different types of objects

    iterations = config.optimizer['max_iter'] * config.num_iter_flag
    workers = config.workers
    val_freq = config.val_freq
    lr = config.optimizer['base_lr']
    decay_lr_at = [
        it * config.num_iter_flag for it in config.optimizer['decay_iter']
    ]

    decay_lr_to = config.optimizer['decay_lr']
    momentum = config.optimizer['momentum']
    weight_decay = config.optimizer['weight_decay']
    if torch.cuda.device_count() < 2:
        config.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
    else:
        if config.device == 1:  # only for 2 GPUs max
            config.device = torch.device(
                "cuda:1" if torch.cuda.is_available() else "cpu")
        else:
            config.device = torch.device(
                "cuda:0" if torch.cuda.is_available() else "cpu")

    train_data_folder = config.train_data_root
    val_data_folder = config.val_data_root
    input_size = (int(config.model['input_size']),
                  int(config.model['input_size']))

    # Learning parameters
    if args.recover:
        assert args.load_path is not None
        checkpoint = torch.load(args.load_path)
        start_epoch = checkpoint['epoch'] + 1
        print('Resume training from checkpoint %s epoch %d.\n' %
              (args.load_path, start_epoch))
        model = checkpoint['model']
        _, criterion = model_entry(config)
        optimizer = checkpoint['optimizer']
    else:
        start_epoch = 0
        model, criterion = model_entry(config)
        # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo
        biases = list()
        not_biases = list()
        for param_name, param in model.named_parameters():
            if param.requires_grad:
                if param_name.endswith('.bias'):
                    biases.append(param)
                else:
                    not_biases.append(param)
        if config.optimizer['type'].upper() == 'SGD':
            optimizer = torch.optim.SGD(params=[{
                'params': biases,
                'lr': 2 * lr
            }, {
                'params': not_biases
            }],
                                        lr=lr,
                                        momentum=momentum,
                                        weight_decay=weight_decay)
        elif config.optimizer['type'].upper() == 'ADAM':
            optimizer = torch.optim.Adam(params=[{
                'params': biases,
                'lr': 2 * lr
            }, {
                'params': not_biases
            }],
                                         lr=lr,
                                         momentum=momentum,
                                         weight_decay=weight_decay)
        else:
            raise NotImplementedError

    # Custom dataloaders
    if config.data_name.upper() == 'COCO':
        train_dataset = COCO17Dataset(train_data_folder,
                                      split='train',
                                      input_size=input_size,
                                      config=config)
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=config.internal_batchsize,
            shuffle=True,
            collate_fn=train_dataset.collate_fn,
            num_workers=workers,
            pin_memory=False)
        test_dataset = COCO17Dataset(val_data_folder,
                                     split='val',
                                     input_size=input_size,
                                     config=config)
        test_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=config.internal_batchsize,
            shuffle=False,
            collate_fn=test_dataset.collate_fn,
            num_workers=workers,
            pin_memory=False)
    elif config.data_name.upper() == 'VOC':
        train_dataset = PascalVOCDataset(train_data_folder,
                                         split='train',
                                         input_size=input_size,
                                         config=config)
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=config.internal_batchsize,
            shuffle=True,
            collate_fn=train_dataset.collate_fn,
            num_workers=workers,
            pin_memory=False)
        test_dataset = PascalVOCDataset(val_data_folder,
                                        split='val',
                                        input_size=input_size,
                                        config=config)
        test_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=config.internal_batchsize,
            shuffle=False,
            collate_fn=test_dataset.collate_fn,
            num_workers=workers,
            pin_memory=False)
    else:
        raise NotImplementedError

    if args.evaluate:
        assert args.load_path is not None
        checkpoint = torch.load(args.load_path)
        print('Evaluate model from checkpoint %s at epoch %d.\n' %
              (args.load_path, start_epoch))
        model = checkpoint['model']
        saved_epoch = checkpoint['epoch']
        model = model.to(config.device)
        optimizer = checkpoint['optimizer']

        now = datetime.now()
        date_time = now.strftime("%m-%d-%Y_H-%M-%S")
        config.logger = create_logger(
            'global_logger',
            os.path.join(
                config.log_path,
                'eval_result_{}_{}.txt'.format(config.model['arch'],
                                               date_time)))
        print('Length of Testing Dataset:', len(test_dataset))
        print('evaluate checkpoint: ', args.load_path, ' at epoch: ',
              saved_epoch)
        evaluate(test_loader, model, optimizer, config=config)

    cudnn.benchmark = True
    model = model.to(config.device)
    criterion = criterion(priors_cxcy=model.priors_cxcy,
                          config=config).to(config.device)

    # create logger to track training results
    now = datetime.now()
    date_time = now.strftime("%m-%d-%Y_H-%M-%S")
    config.tb_logger = SummaryWriter(config.event_path)
    config.logger = create_logger(
        'global_logger',
        os.path.join(config.log_path,
                     'log_{}_{}.txt'.format(config.model['arch'], date_time)))
    config.logger.info('args: {}'.format(pprint.pformat(args)))
    config.logger.info('config: {}'.format(pprint.pformat(config)))

    epochs = iterations // (len(train_dataset) // config.internal_batchsize)

    decay_lr_at = [
        it // (len(train_dataset) // config.internal_batchsize)
        for it in decay_lr_at
    ]  # calculate epoch to decay
    print('total train epochs: ', epochs, ' training starts ......')
    str_print = 'Dataset size: {}'.format(len(train_dataset))
    config.logger.info(str_print)

    # Epochs
    best_mAP = -1.

    for epoch in range(start_epoch, epochs):
        # Decay learning rate at particular epochs
        if epoch in decay_lr_at:
            adjust_learning_rate(optimizer, decay_lr_to)

        config.tb_logger.add_scalar('learning_rate', epoch)

        # evaluate(test_loader, model, optimizer, config=config)

        train(train_loader=train_loader,
              model=model,
              criterion=criterion,
              optimizer=optimizer,
              epoch=epoch,
              config=config)

        # Save checkpoint
        if (epoch > 0 and epoch % val_freq == 0) or epoch == 3:
            _, current_mAP = evaluate(test_loader,
                                      model,
                                      optimizer,
                                      config=config)
            config.tb_logger.add_scalar('mAP', current_mAP, epoch)
            if current_mAP > best_mAP:
                save_checkpoint(
                    epoch,
                    model,
                    optimizer,
                    name='{}/{}_{}_checkpoint_epoch-{}.pth.tar'.format(
                        config.save_path, config.model['arch'].lower(),
                        config.data_name.lower(), epoch))
                best_mAP = current_mAP

    # Save the last checkpoint if it is better
    _, current_mAP = evaluate(test_loader, model, optimizer, config=config)
    config.tb_logger.add_scalar('mAP', current_mAP, epoch)
    if current_mAP > best_mAP:
        save_checkpoint(epoch,
                        model,
                        optimizer,
                        name='{}/{}_{}_checkpoint_epoch-{}.pth.tar'.format(
                            config.save_path, config.model['arch'].lower(),
                            config.data_name.lower(), epoch))
def main():
    args = parser.parse_args()

    with open(args.config) as f:
        config = yaml.load(f)
    config = EasyDict(config)

    config.save_path = os.path.join(os.path.dirname(args.config), 'snapshots')
    if not os.path.exists(config.save_path):
        os.mkdir(config.save_path)
    config.log_path = os.path.join(os.path.dirname(args.config), 'logs')
    if not os.path.exists(config.log_path):
        os.mkdir(config.log_path)
    config.event_path = os.path.join(os.path.dirname(args.config), 'events')
    if not os.path.exists(config.event_path):
        os.mkdir(config.event_path)

    with open(config.label_path, 'r') as j:
        config.label_map = json.load(j)

    config.n_classes = len(
        config.label_map)  # number of different types of objects

    iterations = config.optimizer['max_iter']
    workers = config.workers
    val_freq = config.val_freq
    lr = config.optimizer['base_lr']

    momentum = config.optimizer['momentum']
    weight_decay = config.optimizer['weight_decay']
    if torch.cuda.device_count() < 2:
        config.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
    else:
        if config.device == 1:  # only for 2 GPUs max
            config.device = torch.device(
                "cuda:1" if torch.cuda.is_available() else "cpu")
        else:
            config.device = torch.device(
                "cuda:0" if torch.cuda.is_available() else "cpu")

    train_data_folder = config.train_data_root
    val_data_folder = config.val_data_root

    now = datetime.now()
    date_time = now.strftime("%m-%d-%Y_H-%M-%S")
    config.logger = create_logger(
        'global_logger',
        os.path.join(config.log_path,
                     'log_{}_{}.txt'.format(config.model['arch'], date_time)))

    # Learning parameters
    if args.recover:
        assert args.load_path is not None
        checkpoint = torch.load(args.load_path)
        start_epoch = checkpoint['epoch'] + 1
        print('Resume training from checkpoint %s epoch %d.\n' %
              (args.load_path, start_epoch))
        model = checkpoint['model']
        _, criterion = model_entry(config)
        optimizer = checkpoint['optimizer']
        del checkpoint
        torch.cuda.empty_cache()
    else:
        start_epoch = 0
        model, criterion = model_entry(config)
        if args.finetune:
            checkpoint = torch.load(args.load_path, map_location=config.device)
            init_model = checkpoint['model']
            reuse_layers = {}
            for param_tensor in init_model.state_dict().keys():
                if param_tensor.startswith('aux_convs') or param_tensor.startswith('base.') \
                        or param_tensor.startswith('fpn.') or param_tensor.startswith('conv1') \
                        or param_tensor.startswith('bn1') or param_tensor.startswith('layer'):
                    reuse_layers[param_tensor] = init_model.state_dict(
                    )[param_tensor]
                    print("Reusing:", param_tensor, "\t",
                          init_model.state_dict()[param_tensor].size())
            model.load_state_dict(reuse_layers, strict=False)
            str_info = 'Fintuning model-{} from {}'.format(
                config.model['arch'].upper(), args.load_path)
            config.logger.info(str_info)
            del checkpoint, init_model
            torch.cuda.empty_cache()
        # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo
        biases = list()
        not_biases = list()
        for param_name, param in model.named_parameters():
            if param.requires_grad:
                if param_name.endswith('.bias'):
                    biases.append(param)
                else:
                    not_biases.append(param)
        if config.optimizer['type'].upper() == 'SGD':
            optimizer = torch.optim.SGD(params=[{
                'params': biases,
                'lr': 2 * lr
            }, {
                'params': not_biases
            }],
                                        lr=lr,
                                        momentum=momentum,
                                        weight_decay=weight_decay)
        elif config.optimizer['type'].upper() == 'ADAM':
            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
        else:
            raise NotImplementedError

    # Custom dataloaders
    if config.data_name.upper() == 'COCO':
        train_dataset = COCO17Dataset(train_data_folder,
                                      split='train',
                                      config=config)
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=config.batch_size,
            shuffle=True,
            collate_fn=train_dataset.collate_fn,
            num_workers=workers,
            pin_memory=False,
            drop_last=True)
        test_dataset = COCO17Dataset(val_data_folder,
                                     split='val',
                                     config=config)
        test_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=config.batch_size,
            shuffle=False,
            collate_fn=test_dataset.collate_fn,
            num_workers=workers,
            pin_memory=False)
    elif config.data_name.upper() == 'VOC':
        train_dataset = PascalVOCDataset(train_data_folder,
                                         split='train',
                                         config=config)
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=config.batch_size,
            shuffle=True,
            collate_fn=train_dataset.collate_fn,
            num_workers=workers,
            pin_memory=False,
            drop_last=True)
        test_dataset = PascalVOCDataset(val_data_folder,
                                        split='val',
                                        config=config)
        test_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=config.batch_size,
            shuffle=False,
            collate_fn=test_dataset.collate_fn,
            num_workers=workers,
            pin_memory=False)
    elif config.data_name.upper() == 'DETRAC':
        train_dataset = DetracDataset(train_data_folder,
                                      split='train',
                                      config=config)
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=config.batch_size,
            shuffle=True,
            collate_fn=train_dataset.collate_fn,
            num_workers=workers,
            pin_memory=False,
            drop_last=True)
        test_dataset = DetracDataset(val_data_folder,
                                     split='val',
                                     config=config)
        test_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=config.batch_size,
            shuffle=False,
            collate_fn=test_dataset.collate_fn,
            num_workers=workers,
            pin_memory=False)
    else:
        print('The dataset is not available for training.')
        raise NotImplementedError

    if args.evaluate:
        assert args.load_path is not None
        checkpoint = torch.load(args.load_path)
        print('Evaluate model from checkpoint %s at epoch %d.\n' %
              (args.load_path, start_epoch))
        model = checkpoint['model']
        saved_epoch = checkpoint['epoch']
        model = model.to(config.device)
        optimizer = checkpoint['optimizer']

        now = datetime.now()
        date_time = now.strftime("%m-%d-%Y_H-%M-%S")
        config.logger = create_logger(
            'global_logger',
            os.path.join(
                config.log_path,
                'eval_result_{}_{}.txt'.format(config.model['arch'],
                                               date_time)))
        print('Length of Testing Dataset:', len(test_dataset))
        print('evaluate checkpoint: ', args.load_path, ' at epoch: ',
              saved_epoch)
        evaluate(test_loader, model, optimizer, config=config)

    cudnn.benchmark = True
    model = model.to(config.device)
    criterion = criterion(priors_cxcy=model.priors_cxcy,
                          config=config).to(config.device)

    # create logger to track training results
    now = datetime.now()
    date_time = now.strftime("%m-%d-%Y_H-%M-%S")
    config.tb_logger = SummaryWriter(config.event_path)
    config.logger.info('args: {}'.format(pprint.pformat(args)))
    config.logger.info('config: {}'.format(pprint.pformat(config)))

    epochs = iterations // (len(train_dataset) // config.batch_size)

    str_print = 'Total training epochs: {}, dataset size: {}, training starts ......'.format(
        epochs, len(train_dataset))
    config.logger.info(str_print)

    # Epochs
    best_mAP = -1.
    # config.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs, config.optimizer['min_lr'])
    config.scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer,
        config.optimizer['lr_step'],
        gamma=config.optimizer['lr_decay'])

    for epoch in range(start_epoch, epochs):
        config.tb_logger.add_scalar('learning_rate', epoch)

        # evaluate(test_loader, model, optimizer, config=config)

        train(train_loader=train_loader,
              model=model,
              criterion=criterion,
              optimizer=optimizer,
              epoch=epoch,
              config=config)

        config.scheduler.step()

        # Save checkpoint
        if (epoch > 0 and epoch % val_freq == 0) or epoch == 1:
            _, current_mAP = evaluate(test_loader,
                                      model,
                                      optimizer,
                                      config=config)
            config.tb_logger.add_scalar('mAP', current_mAP, epoch)
            if current_mAP > best_mAP:
                save_checkpoint(
                    epoch,
                    model,
                    optimizer,
                    name='{}/{}_{}_checkpoint_epoch-{}.pth.tar'.format(
                        config.save_path, config.model['arch'].lower(),
                        config.data_name.lower(), epoch))
                best_mAP = current_mAP

    # Save the last checkpoint if it is better
    _, current_mAP = evaluate(test_loader, model, optimizer, config=config)
    config.tb_logger.add_scalar('mAP', current_mAP, epoch)
    if current_mAP > best_mAP:
        save_checkpoint(epoch,
                        model,
                        optimizer,
                        name='{}/{}_{}_checkpoint_epoch-{}.pth.tar'.format(
                            config.save_path, config.model['arch'].lower(),
                            config.data_name.lower(), epoch))
Ejemplo n.º 3
0
def main():
    args = parser.parse_args()

    with open(args.config) as f:
        config = yaml.load(f)
    config = EasyDict(config)
    config.model_type = args.model_type

    config.save_path = os.path.join(os.path.dirname(args.config), 'snapshots')
    if not os.path.exists(config.save_path):
        os.mkdir(config.save_path)
    config.log_path = os.path.join(os.path.dirname(args.config), 'logs')
    if not os.path.exists(config.log_path):
        os.mkdir(config.log_path)
    config.event_path = os.path.join(os.path.dirname(args.config), 'events')
    if not os.path.exists(config.event_path):
        os.mkdir(config.event_path)

    batch_size = config.batch_size
    config.num_iter_flag = batch_size // config.internal_batchsize
    with open(config.label_path, 'r') as j:
        config.label_map = json.load(j)

    config.rev_coco_label_map = {
        str(v): k
        for k, v in config.label_map.items()
    }

    config.n_classes = len(
        config.label_map)  # number of different types of objects

    workers = config.workers

    if torch.cuda.device_count() < 2:
        config.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
    else:
        if config.device == 1:  # only for 2 GPUs max
            config.device = torch.device(
                "cuda:1" if torch.cuda.is_available() else "cpu")
        else:
            config.device = torch.device(
                "cuda:0" if torch.cuda.is_available() else "cpu")

    val_data_folder = config.val_data_root

    # Custom dataloaders
    if config.data_name.upper() == 'COCO':
        config.coco = COCO(config.annotation_file)
        test_dataset = COCO17Dataset(val_data_folder,
                                     split='val',
                                     config=config)
        test_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=config.batch_size,
            shuffle=False,
            collate_fn=test_dataset.collate_fn,
            num_workers=workers,
            pin_memory=False)
    elif config.data_name.upper() == 'VOC':
        test_dataset = PascalVOCDataset(val_data_folder,
                                        split='val',
                                        config=config)
        test_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=config.batch_size,
            shuffle=False,
            collate_fn=test_dataset.collate_fn,
            num_workers=workers,
            pin_memory=False)
    elif config.data_name.upper() == 'DETRAC':
        test_dataset = DetracDataset(val_data_folder,
                                     split='val',
                                     config=config)
        test_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=config.batch_size,
            shuffle=False,
            collate_fn=test_dataset.collate_fn,
            num_workers=workers,
            pin_memory=False)
    else:
        raise NotImplementedError

    assert args.load_path is not None
    checkpoint = torch.load(args.load_path)
    model = checkpoint['model']
    saved_epoch = checkpoint['epoch']
    model = model.to(config.device)
    optimizer = checkpoint['optimizer']
    print('Evaluate model from checkpoint %s at epoch %d.\n' %
          (args.load_path, saved_epoch))

    now = datetime.now()
    date_time = now.strftime("%m-%d-%Y_H-%M-%S")
    config.logger = create_logger(
        'global_logger',
        os.path.join(
            config.log_path,
            'eval_result_{}_{}.txt'.format(config.model['arch'], date_time)))
    print('Length of Testing Dataset:', len(test_dataset))
    print('evaluate checkpoint: ', args.load_path, ' at epoch: ', saved_epoch)
    evaluate(test_loader, model, optimizer, config=config)
Ejemplo n.º 4
0
# device = 'cpu'
print(device, 'available for usage.')
# checkpoint = './checkpoint_ssd300.pth.tar'
checkpoint = './checkpoints/my_checkpoint_deform300_b32.pth.tar'

# Load model checkpoint that is to be evaluated
checkpoint = torch.load(checkpoint)
model = checkpoint['model']
model = model.to(device)

# Switch to eval mode
model.eval()

# Load test data
test_dataset = PascalVOCDataset(data_folder,
                                split='test',
                                keep_difficult=keep_difficult)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          collate_fn=test_dataset.collate_fn,
                                          num_workers=workers,
                                          pin_memory=False)


def evaluate(test_loader, model):
    """
    Evaluate.

    :param test_loader: DataLoader for test data
    :param model: model