Example #1
0
def main():
    # ===> 获取配置文件参数
    cfg = parse_args()
    os.makedirs(cfg.train_options['checkpoint_save_dir'], exist_ok=True)
    logger = get_logger('torchocr', log_file=os.path.join(cfg.train_options['checkpoint_save_dir'], 'train.log'))

    # ===> 训练信息的打印
    train_options = cfg.train_options
    logger.info(cfg)
    # ===>
    to_use_device = torch.device(
        train_options['device'] if torch.cuda.is_available() and ('cuda' in train_options['device']) else 'cpu')
    set_random_seed(cfg['SEED'], 'cuda' in train_options['device'], deterministic=True)

    # ===> build network
    net = build_model(cfg['model'])

    # ===> 模型初始化及模型部署到对应的设备
    net.apply(weight_init)
    # if torch.cuda.device_count() > 1:
    net = nn.DataParallel(net)
    net = net.to(to_use_device)
    net.train()

    # ===> get fine tune layers
    params_to_train = get_fine_tune_params(net, train_options['fine_tune_stage'])
    # ===> solver and lr scheduler
    optimizer = build_optimizer(net.parameters(), cfg['optimizer'])
    scheduler = build_scheduler(optimizer, cfg['lr_scheduler'])

    # ===> whether to resume from checkpoint
    resume_from = train_options['resume_from']
    if resume_from:
        net, _resumed_optimizer,global_state = load_checkpoint(net, resume_from, to_use_device, optimizer,
                                                                 third_name=train_options['third_party_name'])
        if _resumed_optimizer:
            optimizer = _resumed_optimizer
        logger.info(f'net resume from {resume_from}')
    else:
        global_state = {}
        logger.info(f'net resume from scratch.')

    # ===> loss function
    loss_func = build_loss(cfg['loss'])
    loss_func = loss_func.to(to_use_device)

    with open(cfg.dataset.alphabet, 'r', encoding='utf-8') as file:
        cfg.dataset.alphabet = ''.join([s.strip('\n') for s in file.readlines()])

    # ===> data loader
    cfg.dataset.train.dataset.alphabet = cfg.dataset.alphabet
    train_loader = build_dataloader(cfg.dataset.train)
    cfg.dataset.eval.dataset.alphabet = cfg.dataset.alphabet
    eval_loader = build_dataloader(cfg.dataset.eval)

    # ===> train
    train(net, optimizer, scheduler, loss_func, train_loader, eval_loader, to_use_device, cfg, global_state, logger)
Example #2
0
def main():
    # ===> 获取配置文件参数
    cfg = parse_args()
    os.makedirs(cfg.train_options['checkpoint_save_dir'], exist_ok=True)
    # ===> 训练信息的打印
    logger = get_logger('torchocr', log_file=os.path.join(cfg.train_options['checkpoint_save_dir'], 'train.log'))
    logger.info(cfg)

    # ===>
    train_options = cfg.train_options
    to_use_device = torch.device(
        train_options['device'] if torch.cuda.is_available() and ('cuda' in train_options['device']) else 'cpu')
    set_random_seed(cfg['SEED'], 'cuda' in train_options['device'], deterministic=True)

    # ===> build network
    net = build_model(cfg['model'])

    # ===> 模型部署到对应的设备
    net = nn.DataParallel(net)
    net = net.to(to_use_device)


    # ===> 创建metric
    metric = build_metric(cfg['metric'])

    # ===> get fine tune layers
    # params_to_train = get_fine_tune_params(net, train_options['fine_tune_stage'])
    # ===> solver and lr scheduler
    optimizer = build_optimizer(net.parameters(), cfg['optimizer'])
    net.train()
    net.module.model_dict['Teacher'].eval()
    # ===> whether to resume from checkpoint
    resume_from = train_options['resume_from']
    if resume_from:
        net, _resumed_optimizer, global_state = load_checkpoint(net, resume_from, to_use_device, optimizer)
        if _resumed_optimizer:
            optimizer = _resumed_optimizer
        logger.info(f'net resume from {resume_from}')
    else:
        global_state = {}
        logger.info(f'net resume from scratch.')

    # ===> loss function
    loss_func = build_loss(cfg['loss'])
    loss_func = loss_func.to(to_use_device)

    # ===> data loader
    train_loader = build_dataloader(cfg.dataset.train)
    eval_loader = build_dataloader(cfg.dataset.eval)

    # ===> post_process
    post_process = build_post_process(cfg['post_process'])
    # ===> train
    train(net, optimizer, loss_func, train_loader, eval_loader, to_use_device, cfg, global_state, logger, post_process,metric)
Example #3
0
def main():
    # ===> 获取配置文件参数
    cfg = parse_args()
    os.makedirs(cfg.train_options['checkpoint_save_dir'], exist_ok=True)
    logger = get_logger('torchocr', log_file=os.path.join(cfg.train_options['checkpoint_save_dir'], 'train.log'))

    # ===> 训练信息的打印
    train_options = cfg.train_options
    logger.info(cfg)
    # ===>
    to_use_device = torch.device(
        train_options['device'] if torch.cuda.is_available() and ('cuda' in train_options['device']) else 'cpu')
    set_random_seed(cfg['SEED'], 'cuda' in train_options['device'], deterministic=True)

    # ===> build network
    net = build_model(cfg['model'])

    # ===> 模型初始化及模型部署到对应的设备
    # net.apply(weight_init) # 使用 pretrained时,注释掉这句话
    # if torch.cuda.device_count() > 1:
    net = nn.DataParallel(net)
    net = net.to(to_use_device)
    net.train()

    # ===> get fine tune layers
    params_to_train = get_fine_tune_params(net, train_options['fine_tune_stage'])
    # ===> solver and lr scheduler
    optimizer = build_optimizer(net.parameters(), cfg['optimizer'])

    # ===> whether to resume from checkpoint
    resume_from = train_options['resume_from']
    if resume_from:
        net, current_epoch, _resumed_optimizer = load_checkpoint(net, resume_from, to_use_device, optimizer,
                                                                 third_name=train_options['third_party_name'])
        if _resumed_optimizer:
            optimizer = _resumed_optimizer
        logger.info(f'net resume from {resume_from}')
    else:
        current_epoch = 0
        logger.info(f'net resume from scratch.')

    # ===> loss function
    loss_func = build_loss(cfg['loss'])
    loss_func = loss_func.to(to_use_device)

    # ===> data loader
    train_loader = build_dataloader(cfg.dataset.train)
    eval_loader = build_dataloader(cfg.dataset.eval)

    # post_process
    post_process = build_post_process(cfg['post_process'])
    # ===> train
    train(net, optimizer, loss_func, train_loader, eval_loader, to_use_device, cfg, current_epoch, logger, post_process)
Example #4
0
def main():
    # ===> 获取配置文件参数
    parser = argparse.ArgumentParser(description='train')
    parser.add_argument('--config',
                        type=str,
                        default='config/det.json',
                        help='train config file path')
    cfg = parser.parse_args()
    with open(cfg.config) as fin:
        cfg = json.load(fin, object_hook=lambda d: SimpleNamespace(**d))

    # cfg = parse_args()
    os.makedirs(cfg.train_options.checkpoint_save_dir, exist_ok=True)
    logger = get_logger('torchocr',
                        log_file=os.path.join(
                            cfg.train_options.checkpoint_save_dir,
                            'train.log'))

    # ===> 训练信息的打印
    train_options = cfg.train_options
    logger.info(cfg)
    # ===>
    to_use_device = torch.device(
        train_options.device if torch.cuda.is_available() and (
            'cuda' in train_options.device) else 'cpu')
    # set_random_seed(cfg.SEED, 'cuda' in train_options.device, deterministic=True)

    # ===> build network
    net = build_model(cfg.model)

    # ===> 模型初始化及模型部署到对应的设备
    # net.apply(weight_init) # 使用 pretrained时,注释掉这句话
    # if torch.cuda.device_count() > 1:
    net = nn.DataParallel(net)
    net = net.to(to_use_device)
    net.train()

    # ===> get fine tune layers
    params_to_train = get_fine_tune_params(net, train_options.fine_tune_stage)
    # ===> solver and lr scheduler
    optimizer = build_optimizer(net.parameters(), cfg.optimizer)

    # ===> whether to resume from checkpoint
    resume_from = train_options.resume_from
    if resume_from:
        net, _resumed_optimizer, global_state = load_checkpoint(
            net,
            resume_from,
            to_use_device,
            optimizer,
            third_name=train_options.third_party_name)
        if _resumed_optimizer:
            optimizer = _resumed_optimizer
        logger.info(f'net resume from {resume_from}')
    else:
        global_state = {}
        logger.info(f'net resume from scratch.')

    # ===> loss function
    loss_func = build_loss(cfg.loss)
    loss_func = loss_func.to(to_use_device)

    # ===> data loader
    train_loader = build_dataloader(cfg.dataset.train)
    eval_loader = build_dataloader(cfg.dataset.eval)

    # post_process
    post_process = build_post_process(cfg.post_process)
    # ===> train
    train(net, optimizer, loss_func, train_loader, eval_loader, to_use_device,
          cfg, global_state, logger, post_process)
Example #5
0
model_dict = net.state_dict()
ckpt = torch.load(resume_from, map_location='cpu')
pretrained_dict = ckpt['state_dict']

# txt_file = os.path.join('test_results/', 'pretrainedmodel_state.txt')
# txt_f = open(txt_file, 'w')

# txt_f.write('############## Model Dict ################' + '\n')
# for j in model_dict:
#     txt_f.write(str(j) +'\n' )
# # txt_f.write('############## Pretrained Dict ################' + '\n')
# # for k in pretrained_dict:
# #     txt_f.write(str(k) +'\n')

# txt_f.close()

net, _, global_state = load_checkpoint(net, resume_from, to_use_device, _optimizers=None, third_name=train_options['third_party_name'])

# ===> loss function
loss_func = build_loss(cfg['loss'])
loss_func = loss_func.to(to_use_device)

# ===> data loader
train_loader = build_dataloader(cfg.dataset.train)
eval_loader = build_dataloader(cfg.dataset.eval)

# post_process
post_process = build_post_process(cfg['post_process'])
# ===> train
train(net, optimizer, loss_func, train_loader, eval_loader, to_use_device, cfg, global_state, logger, post_process)
Example #6
0
def main():
    # ===> 获取配置文件参数
    # cfg = parse_args()
    parser = argparse.ArgumentParser(description='train')
    parser.add_argument('-c', '--config', type=str, default='config/recrbt3.json', help='train config file path')
    args = parser.parse_args()
    with open(args.config) as fin:
        cfg = json.load(fin, object_hook=lambda d: SimpleNamespace(**d))

    os.makedirs(cfg.train_options.checkpoint_save_dir, exist_ok=True)
    logger = get_logger('torchocr', log_file=os.path.join(cfg.train_options.checkpoint_save_dir, 'train.log'))

    # ===> 训练信息的打印
    train_options = cfg.train_options
    logger.info(cfg)
    # ===>
    # to_use_device = torch.device(
    #     train_options.device if torch.cuda.is_available() and ('cuda' in train_options.device) else 'cpu')
    # set_random_seed(cfg.SEED, 'cuda' in train_options.device, deterministic=True)

    # ===> build network
    net = build_model(cfg.model)

    # ===> 模型初始化及模型部署到对应的设备
    # net.apply(weight_init)
    # # if torch.cuda.device_count() > 1:
    # net = nn.DataParallel(net)
    # net = net.to(to_use_device)
    net.train()

    # ===> get fine tune layers
    # params_to_train = get_fine_tune_params(net, train_options.fine_tune_stage)
    # ===> solver and lr scheduler
    optimizer = build_optimizer(net.parameters(), cfg.optimizer)
    scheduler = build_scheduler(optimizer, cfg.lr_scheduler)

    # ===> whether to resume from checkpoint
    resume_from = train_options.resume_from
    if resume_from:
        net, _resumed_optimizer, global_state = load_checkpoint(net, resume_from, 0, optimizer,
                                                                 third_name=train_options.third_party_name)
        if _resumed_optimizer:
            optimizer = _resumed_optimizer
        logger.info(f'net resume from {resume_from}')
    else:
        global_state = {}
        logger.info(f'net resume from scratch.')

    # ===> loss function
    loss_func = build_loss(cfg.loss)
    #loss_func = loss_func.to(to_use_device)

    if "dogclass.txt" in cfg.dataset.alphabet:
        with open(cfg.dataset.alphabet, 'r', encoding='utf-8') as file:
            cfg.dataset.alphabet = [s.strip('\n') for s in file.readlines()]
    else:
        with open(cfg.dataset.alphabet, 'r', encoding='utf-8') as file:
            cfg.dataset.alphabet = ''.join([s.strip('\n') for s in file.readlines()])

    # ===> data loader
    cfg.dataset.train.dataset.alphabet = cfg.dataset.alphabet
    train_loader = build_dataloader(cfg.dataset.train)
    cfg.dataset.eval.dataset.alphabet = cfg.dataset.alphabet
    eval_loader = build_dataloader(cfg.dataset.eval)

    # ===> train
    train(net, optimizer, scheduler, loss_func, train_loader, eval_loader, 0, cfg, global_state, logger)