def main():
    if config.output_dir is None:
        config.output_dir = 'output'
    if config.restart_training:
        shutil.rmtree(config.output_dir, ignore_errors=True)
    if not os.path.exists(config.output_dir):
        os.makedirs(config.output_dir)

    logger = setup_logger(os.path.join(config.output_dir, 'train_log'))
    logger.info(config.print())

    torch.manual_seed(config.seed)  # 为CPU设置随机种子
    if config.gpu_id is not None and torch.cuda.is_available():
        torch.backends.cudnn.benchmark = True
        logger.info('train with gpu {} and pytorch {}'.format(
            config.gpu_id, torch.__version__))
        device = torch.device("cuda:0")
        torch.cuda.manual_seed(config.seed)  # 为当前GPU设置随机种子
        torch.cuda.manual_seed_all(config.seed)  # 为所有GPU设置随机种子
    else:
        logger.info('train with cpu and pytorch {}'.format(torch.__version__))
        device = torch.device("cpu")

    train_data = TibetanDataset(config.json_path,
                                data_shape=config.data_shape,
                                n=config.n,
                                m=config.m,
                                transform=transforms.ToTensor(),
                                base_path=config.base_path)
    train_loader = Data.DataLoader(dataset=train_data,
                                   batch_size=config.train_batch_size,
                                   shuffle=True,
                                   num_workers=int(config.workers))

    writer = SummaryWriter(config.output_dir)
    model = PSENet(backbone=config.backbone,
                   pretrained=config.pretrained,
                   result_num=config.n,
                   scale=config.scale)
    if not config.pretrained and not config.restart_training:
        model.apply(weights_init)

    num_gpus = torch.cuda.device_count()
    if num_gpus > 1:
        model = nn.DataParallel(model)
    model = model.to(device)
    # dummy_input = torch.autograd.Variable(torch.Tensor(1, 3, 600, 800).to(device))
    # writer.add_graph(models=models, input_to_model=dummy_input)
    criterion = PSELoss(Lambda=config.Lambda,
                        ratio=config.OHEM_ratio,
                        reduction='mean')
    # optimizer = torch.optim.SGD(models.parameters(), lr=config.lr, momentum=0.99)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)
    if config.checkpoint != '' and not config.restart_training:
        start_epoch = load_checkpoint(config.checkpoint, model, logger, device,
                                      optimizer)
        start_epoch += 1
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer,
            config.lr_decay_step,
            gamma=config.lr_gamma,
            last_epoch=start_epoch)
    else:
        start_epoch = config.start_epoch
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                         config.lr_decay_step,
                                                         gamma=config.lr_gamma)

    all_step = len(train_loader)
    logger.info('train dataset has {} samples,{} in dataloader'.format(
        train_data.__len__(), all_step))
    epoch = 0
    best_model = {'recall': 0, 'precision': 0, 'f1': 0, 'models': ''}
    try:
        for epoch in range(start_epoch, config.epochs):
            start = time.time()
            train_loss, lr = train_epoch(model, optimizer, scheduler,
                                         train_loader, device, criterion,
                                         epoch, all_step, writer, logger)
            logger.info(
                '[{}/{}], train_loss: {:.4f}, time: {:.4f}, lr: {}'.format(
                    epoch, config.epochs, train_loss,
                    time.time() - start, lr))
            # net_save_path = '{}/PSENet_{}_loss{:.6f}.pth'.format(config.output_dir, epoch,
            #                                                                               train_loss)
            # save_checkpoint(net_save_path, models, optimizer, epoch, logger)
            if (0.3 < train_loss < 0.4 and epoch % 4 == 0) or train_loss < 0.3:
                recall, precision, f1 = merge_eval(model=model,
                                                   save_path=os.path.join(
                                                       config.output_dir,
                                                       'output'),
                                                   test_path=config.testroot,
                                                   device=device,
                                                   base_path=config.base_path,
                                                   use_sub=config.use_sub)
                logger.info(
                    'test: recall: {:.6f}, precision: {:.6f}, f1: {:.6f}'.
                    format(recall, precision, f1))

                net_save_path = '{}/PSENet_{}_loss{:.6f}_r{:.6f}_p{:.6f}_f1{:.6f}.pth'.format(
                    config.output_dir, epoch, train_loss, recall, precision,
                    f1)
                save_checkpoint(net_save_path, model, optimizer, epoch, logger)
                if f1 > best_model['f1']:
                    best_path = glob.glob(config.output_dir + '/Best_*.pth')
                    for b_path in best_path:
                        if os.path.exists(b_path):
                            os.remove(b_path)

                    best_model['recall'] = recall
                    best_model['precision'] = precision
                    best_model['f1'] = f1
                    best_model['models'] = net_save_path

                    best_save_path = '{}/Best_{}_r{:.6f}_p{:.6f}_f1{:.6f}.pth'.format(
                        config.output_dir, epoch, recall, precision, f1)
                    if os.path.exists(net_save_path):
                        shutil.copyfile(net_save_path, best_save_path)
                    else:
                        save_checkpoint(best_save_path, model, optimizer,
                                        epoch, logger)

                    pse_path = glob.glob(config.output_dir + '/PSENet_*.pth')
                    for p_path in pse_path:
                        if os.path.exists(p_path):
                            os.remove(p_path)

                writer.add_scalar(tag='Test/recall',
                                  scalar_value=recall,
                                  global_step=epoch)
                writer.add_scalar(tag='Test/precision',
                                  scalar_value=precision,
                                  global_step=epoch)
                writer.add_scalar(tag='Test/f1',
                                  scalar_value=f1,
                                  global_step=epoch)
        writer.close()
    except KeyboardInterrupt:
        save_checkpoint('{}/final.pth'.format(config.output_dir), model,
                        optimizer, epoch, logger)
    finally:
        if best_model['models']:
            logger.info(best_model)
Exemple #2
0
def main():

    config.workspace = os.path.join(config.workspace_dir, config.exp_name)
    if config.restart_training:
        shutil.rmtree(config.workspace, ignore_errors=True)
    if not os.path.exists(config.workspace):
        os.makedirs(config.workspace)

    shutil.rmtree(os.path.join(config.workspace, 'train_log'),
                  ignore_errors=True)
    logger = setup_logger(os.path.join(config.workspace, 'train_log'))
    logger.info(config.print())

    torch.manual_seed(config.seed)  # 为CPU设置随机种子
    if config.gpu_id is not None and torch.cuda.is_available():
        torch.backends.cudnn.benchmark = True
        logger.info('train with gpu {} and pytorch {}'.format(
            config.gpu_id, torch.__version__))
        device = torch.device("cuda:0")
        torch.cuda.manual_seed(config.seed)  # 为当前GPU设置随机种子
        torch.cuda.manual_seed_all(config.seed)  # 为所有GPU设置随机种子
    else:
        logger.info('train with cpu and pytorch {}'.format(torch.__version__))
        device = torch.device("cpu")

    train_data = ICDAR17(config.trainroot,
                         data_shape=config.data_shape,
                         n=config.kernel_num,
                         m=config.min_scale)
    train_loader = Data.DataLoader(dataset=train_data,
                                   batch_size=config.train_batch_size,
                                   shuffle=True,
                                   num_workers=int(config.workers))

    # writer = SummaryWriter(config.output_dir)
    model = PSENet(backbone=config.backbone,
                   pretrained=config.pretrained,
                   result_num=config.kernel_num,
                   scale=config.scale)
    if not config.pretrained and not config.restart_training:
        model.apply(weights_init)

    num_gpus = torch.cuda.device_count()
    if num_gpus > 1:
        model = nn.DataParallel(model)
    model = model.to(device)
    criterion = PSELoss(Lambda=config.Lambda,
                        ratio=config.OHEM_ratio,
                        reduction='mean')
    # optimizer = torch.optim.SGD(models.parameters(), lr=config.lr, momentum=0.99)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)
    if config.checkpoint != '' and not config.restart_training:
        start_epoch = load_checkpoint(config.checkpoint, model, logger, device,
                                      optimizer)
        start_epoch += 1
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer,
            config.lr_decay_step,
            gamma=config.lr_gamma,
            last_epoch=start_epoch)
    else:
        start_epoch = config.start_epoch
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                         config.lr_decay_step,
                                                         gamma=config.lr_gamma)

    all_step = len(train_loader)
    logger.info('train dataset has {} samples,{} in dataloader'.format(
        train_data.__len__(), all_step))
    epoch = 0
    f1 = 0
    try:
        for epoch in range(start_epoch, config.epochs):
            start = time.time()
            train_loss, lr = train_epoch(model, optimizer, scheduler,
                                         train_loader, device, criterion,
                                         epoch, all_step, logger)
            logger.info(
                '[{}/{}], train_loss: {:.4f}, time: {:.4f}, lr: {}'.format(
                    epoch, config.epochs, train_loss,
                    time.time() - start, lr))

            save_path = '{}/epoch_model.pth'.format(config.workspace)
            save_checkpoint(save_path, model, optimizer, epoch, logger)

            if epoch >= 50 and epoch % 10 == 0:
                f_score_new = eval(model,
                                   os.path.join(config.workspace, 'output'),
                                   config.testroot, device)
                logger.info('  ---------------------------------------')
                logger.info('     test: f_score : {:.6f}'.format(f_score_new))
                logger.info('  ---------------------------------------')

                if f_score_new > f1:
                    f1 = f_score_new
                    best_save_path = '{}/Best_model_{:.6f}.pth'.format(
                        config.workspace, f1)

                    save_checkpoint(best_save_path, model, optimizer, epoch,
                                    logger)

                # writer.add_scalar(tag='Test/recall', scalar_value=recall, global_step=epoch)
                # writer.add_scalar(tag='Test/precision', scalar_value=precision, global_step=epoch)
                # writer.add_scalar(tag='Test/f1', scalar_value=f1, global_step=epoch)
        # writer.close()
    except KeyboardInterrupt:
        save_checkpoint('{}/final.pth'.format(config.workspace), model,
                        optimizer, epoch, logger)
Exemple #3
0
def main():
    if config.output_dir is None:
        config.output_dir = 'output'
    if config.restart_training:
        shutil.rmtree(config.output_dir, ignore_errors=True)
    if not os.path.exists(config.output_dir):
        os.makedirs(config.output_dir)

    logger = setup_logger(os.path.join(config.output_dir, 'train_log'))
    logger.info(config.print())

    torch.manual_seed(config.seed)  # 为CPU设置随机种子
    if config.gpu_id is not None and torch.cuda.is_available():
        torch.backends.cudnn.benchmark = True
        logger.info('train with gpu {} and pytorch {}'.format(
            config.gpu_id, torch.__version__))
        device = torch.device("cuda:0")
        torch.cuda.manual_seed(config.seed)  # 为当前GPU设置随机种子
        torch.cuda.manual_seed_all(config.seed)  # 为所有GPU设置随机种子
    else:
        logger.info('train with cpu and pytorch {}'.format(torch.__version__))
        device = torch.device("cpu")

    train_data = MyDataset(args.train_dir,
                           data_shape=config.data_shape,
                           n=config.n,
                           m=config.m,
                           transform=transforms.ToTensor())
    train_loader = Data.DataLoader(dataset=train_data,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=int(config.workers))

    writer = SummaryWriter(config.output_dir)
    model = PSENet(backbone=config.backbone,
                   pretrained=config.pretrained,
                   result_num=config.n,
                   scale=config.scale)
    if not config.pretrained and not config.restart_training:
        model.apply(weights_init)

    if args.resume_model:
        resume_model(model, args.resume_model)

    num_gpus = torch.cuda.device_count()
    if num_gpus > 1:
        model = nn.DataParallel(model)
    model = model.to(device)
    # dummy_input = torch.autograd.Variable(torch.Tensor(1, 3, 600, 800).to(device))
    # writer.add_graph(models=models, input_to_model=dummy_input)
    criterion = PSELoss(Lambda=config.Lambda,
                        ratio=config.OHEM_ratio,
                        reduction='mean')
    # optimizer = torch.optim.SGD(models.parameters(), lr=config.lr, momentum=0.99)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    if config.checkpoint != '' and not config.restart_training:
        start_epoch = load_checkpoint(config.checkpoint, model, logger, device,
                                      optimizer)
        start_epoch += 1
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer,
            config.lr_decay_step,
            gamma=config.lr_gamma,
            last_epoch=start_epoch)
    else:
        start_epoch = config.start_epoch
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                         config.lr_decay_step,
                                                         gamma=config.lr_gamma)

    all_step = len(train_loader)
    logger.info('train dataset has {} samples,{} in dataloader'.format(
        train_data.__len__(), all_step))
    epoch = 0
    best_model = {'recall': 0, 'precision': 0, 'f1': 0, 'models': ''}
    try:
        for epoch in range(start_epoch, args.epochs):
            start = time.time()
            train_loss, lr = train_epoch(model, optimizer, scheduler,
                                         train_loader, device, criterion,
                                         epoch, all_step, writer, logger)
            logger.info(
                '[{}/{}], train_loss: {:.4f}, time: {:.4f}, lr: {}'.format(
                    epoch, config.epochs, train_loss,
                    time.time() - start, lr))

            if epoch % args.save_per_epoch == 0:
                save_model(model, epoch)
        writer.close()

    except KeyboardInterrupt:
        save_checkpoint('{}/final.pth'.format(config.output_dir), model,
                        optimizer, epoch, logger)
    finally:
        if best_model['models']:
            logger.info(best_model)
def main():

    config.workspace = os.path.join(config.workspace_dir, config.exp_name)
    # if config.restart_training:
    #     shutil.rmtree(config.workspace, ignore_errors=True)
    if not os.path.exists(config.workspace):
        os.makedirs(config.workspace)

    logger = setup_logger(os.path.join(config.workspace, 'train_log'))
    logger.info(config.print())

    torch.manual_seed(config.seed)  # 为CPU设置随机种子
    if config.gpu_id is not None and torch.cuda.is_available():
        torch.backends.cudnn.benchmark = True
        logger.info('train with gpu {} and pytorch {}'.format(
            config.gpu_id, torch.__version__))
        device = torch.device("cuda:0")
        torch.cuda.manual_seed(config.seed)  # 为当前GPU设置随机种子
        torch.cuda.manual_seed_all(config.seed)  # 为所有GPU设置随机种子
    else:
        logger.info('train with cpu and pytorch {}'.format(torch.__version__))
        device = torch.device("cpu")

    train_data = ICDAR15(config.trainroot,
                         config.is_pseudo,
                         data_shape=config.data_shape,
                         n=config.kernel_num,
                         m=config.m)
    train_loader = Data.DataLoader(dataset=train_data,
                                   batch_size=config.train_batch_size,
                                   shuffle=True,
                                   num_workers=int(config.workers))

    # writer = SummaryWriter(config.output_dir)
    model = PSENet(backbone=config.backbone,
                   pretrained=config.pretrained,
                   result_num=config.kernel_num,
                   scale=config.scale)
    if not config.pretrained and not config.restart_training:
        model.apply(weights_init)

    num_gpus = torch.cuda.device_count()
    if num_gpus > 1:
        model = nn.DataParallel(model)
    model = model.to(device)

    criterion = PSELoss(Lambda=config.Lambda,
                        ratio=config.OHEM_ratio,
                        reduction='mean')

    optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)
    if config.checkpoint != '' and not config.restart_training:
        start_epoch = load_checkpoint(config.checkpoint, model, logger, device,
                                      optimizer)
        start_epoch += 1
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer,
            config.lr_decay_step,
            gamma=config.lr_gamma,
            last_epoch=start_epoch)
        logger.info('resume from {}, epoch={}'.format(config.checkpoint,
                                                      start_epoch))
    else:
        start_epoch = config.start_epoch
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                         config.lr_decay_step,
                                                         gamma=config.lr_gamma)

    all_step = len(train_loader)
    logger.info('train dataset has {} samples,{} in dataloader'.format(
        train_data.__len__(), all_step))
    epoch = 0
    f1 = 0

    for epoch in range(start_epoch, config.epochs):
        start = time.time()
        train_loss, lr = train_epoch(model, optimizer, scheduler, train_loader,
                                     device, criterion, epoch, all_step,
                                     logger)
        logger.info('[{}/{}], train_loss: {:.4f}, time: {:.4f}, lr: {}'.format(
            epoch, config.epochs, train_loss,
            time.time() - start, lr))

        if epoch % config.save_interval == 0:
            save_path = '{}/epoch_{}.pth'.format(config.workspace, epoch)
            latest_path = '{}/latest.pth'.format(config.workspace)
            save_checkpoint(save_path, model, optimizer, epoch, logger)
            save_checkpoint(latest_path, model, optimizer, epoch, logger)