Esempio n. 1
0
def train():
    g_exit = GracefulExit()
    timestamp = datetime.datetime.utcnow().strftime(TIMESTAMP_FORMAT)
    logger = Logger(ENV_NAME, timestamp)
    env = gym.make(ENV_NAME)
    dim_obs = env.observation_space.shape[0] + 1
    dim_act = env.action_space.shape[0]
    scaler = VecScaler(dim_obs)
    rec_dir = os.path.join(REC_DIR, ENV_NAME, timestamp)
    env = gym.wrappers.Monitor(env, rec_dir, force=True)
    agent = PPO(dim_obs, dim_act, GAMMA, LAMBDA, CLIP_RANGE, LR_POLICY,
                LR_VALUE_F, logger)
    run_batch(env, agent.policy, 5, scaler)
    episode = 0
    while episode < NUM_EPISODES:
        batch_size = min(MAX_BATCH, NUM_EPISODES - episode)
        trajectories, steps, mean_return = run_batch(env, agent.policy, batch_size, scaler)
        episode += batch_size
        logger.log({'_time': datetime.datetime.utcnow().strftime(TIMESTAMP_FORMAT),
                    '_episode': episode,
                    'steps': steps,
                    '_mean_return': mean_return})
        agent.update(trajectories)
        logger.write()
        if g_exit.exit:
            break
    agent.close()
    logger.close()
Esempio n. 2
0
def main():
    now = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    parser = argparse.ArgumentParser(description='Description')
    parser.add_argument('--tag', default=None, type=str, help='Experiment tag')
    parser.add_argument('--preset',
                        choices=['gan', 'syn', 'mod', 'gan+syn'],
                        default='gan',
                        type=str,
                        required=True,
                        help='Training configuration preset')
    parser.add_argument('--clear_cache',
                        action='store_true',
                        help='Remove all cache files')
    parser.add_argument('--set',
                        default=None,
                        type=str,
                        nargs=argparse.REMAINDER,
                        help='Optional settings')
    args = parser.parse_args()

    cfg = get_config(args, now, os.getcwd())
    checkpoint = Checkpoint(cfg)

    logger = Logger(cfg, checkpoint.tensorboard_path)
    logger.write('Now: ' + now)

    Trainer(cfg, checkpoint, logger).train()

    logger.close()
Esempio n. 3
0
def main(args):
    now = datetime.utcnow().strftime("%b_%d_%H_%M_%S")
    monitor_dir = os.path.join('videos', args['env'],
                               "no-of-update_" + args["no_of_updates"],
                               "random_seed" + str(args["random_seed"]))
    logger = Logger(logname=args['env'], args=args, now=now)
    with tf.Session() as sess:
        env = gym.make(args['env'])
        monitor_env = gym.make(args['env'])
        np.random.seed(int(args['random_seed']))
        tf.set_random_seed(int(args['random_seed']))
        env.seed(int(args['random_seed']))
        monitor_env.seed(int(args['random_seed']))

        state_dim = env.observation_space.shape[0]
        action_dim = env.action_space.shape[0]
        action_bound = env.action_space.high

        print("****** state dimension", state_dim)
        print("****** actions dimension", action_dim)
        print("****** actions high bound", action_bound)

        # Ensure action bound is symmetric
        assert (np.array_equal(env.action_space.high, -env.action_space.low))

        actor = ActorNetwork(sess, state_dim, action_dim, action_bound,
                             float(args['actor_lr']), float(args['tau']),
                             int(args['minibatch_size']))

        critic = CriticNetwork(sess, state_dim, action_dim,
                               float(args['critic_lr']), float(args['tau']),
                               float(args['gamma']),
                               actor.get_num_trainable_vars())

        actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))

        if args['use_gym_monitor']:
            monitor_env = wrappers.Monitor(monitor_env,
                                           monitor_dir,
                                           force=True)

        train(sess, env, args, actor, critic, actor_noise, logger, monitor_env)
        logger.close()
        if args['use_gym_monitor']:
            env.monitor.close()
            monitor_env.monitor.close()
Esempio n. 4
0
def main():

    config = (Logger.Log_Config.STREAM_LOG | Logger.Log_Config.FILE_LOG)

    # Logging
    logger = Logger()

    # Robot
    robot_id = "000"
    robot_logger = logger.init("ROBOT", robot_id, config)

    my_robot = Robot(robot_id, robot_logger)
    sched = Robot_Scheduler(my_robot)
    my_robot.register_scheduler(sched)

    # ----- Manual User Loop -------
    user_id = "000"
    user_logger = logger.init("USER", user_id)
    user_option = ""

    robot_logger.info('User setup complete!')

    while user_option != "X":
        user_option = user_input(user_logger)

        my_robot.queue_command(user_option)

        time.sleep(2)

    # Clean up
    print("Exiting program...")
    logger.close()

    if log_file:
        log_uart.close()
    if serial_port or serial_file:
        uart.close()

    sched.close()
    my_robot.close()
Esempio n. 5
0
def main(args):
    if args.checkpoint == '':
        args.checkpoint = "checkpoints/ctw1500_%s_bs_%d_ep_%d" % (
            args.arch, args.batch_size, args.n_epoch)
    if args.pretrain:
        if 'synth' in args.pretrain:
            args.checkpoint += "_pretrain_synth"
        else:
            args.checkpoint += "_pretrain_ic17"

    print(('checkpoint path: %s' % args.checkpoint))
    print(('init lr: %.8f' % args.lr))
    print(('schedule: ', args.schedule))
    sys.stdout.flush()

    if not os.path.isdir(args.checkpoint):
        os.makedirs(args.checkpoint)

    kernel_num = 7
    min_scale = 0.4
    start_epoch = 0

    data_loader = CTW1500Loader(is_transform=True,
                                img_size=args.img_size,
                                kernel_num=kernel_num,
                                min_scale=min_scale)
    train_loader = torch.utils.data.DataLoader(data_loader,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=3,
                                               drop_last=True,
                                               pin_memory=True)

    if args.arch == "resnet50":
        model = models.resnet50(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet101":
        model = models.resnet101(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet152":
        model = models.resnet152(pretrained=True, num_classes=kernel_num)

    model = torch.nn.DataParallel(model).cuda()

    if hasattr(model.module, 'optimizer'):
        optimizer = model.module.optimizer
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr,
                                    momentum=0.99,
                                    weight_decay=5e-4)

    title = 'CTW1500'
    if args.pretrain:
        print('Using pretrained model.')
        assert os.path.isfile(
            args.pretrain), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.pretrain)
        model.load_state_dict(checkpoint['state_dict'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(
            ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.'])
    elif args.resume:
        print('Resuming from checkpoint.')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.resume)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        print('Training from scratch.')
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(
            ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.'])

    for epoch in range(start_epoch, args.n_epoch):
        adjust_learning_rate(args, optimizer, epoch)
        print(('\nEpoch: [%d | %d] LR: %f' %
               (epoch + 1, args.n_epoch, optimizer.param_groups[0]['lr'])))

        train_loss, train_te_acc, train_ke_acc, train_te_iou, train_ke_iou = train(
            train_loader, model, dice_loss, optimizer, epoch)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'lr': args.lr,
                'optimizer': optimizer.state_dict(),
            },
            checkpoint=args.checkpoint)

        logger.append([
            optimizer.param_groups[0]['lr'], train_loss, train_te_acc,
            train_te_iou
        ])
    logger.close()
Esempio n. 6
0
def main(args):
    if args.checkpoint == '':
        args.checkpoint = "checkpoints/ctw1500_%s_bs_%d_ep_%d" % (
            args.arch, args.batch_size, args.n_epoch)
    if args.pretrain:
        if 'synth' in args.pretrain:
            args.checkpoint += "_pretrain_synth"
        else:
            args.checkpoint += "_pretrain_ic17"

    print('checkpoint path: %s' % args.checkpoint)
    print('init lr: %.8f' % args.lr)
    print('schedule: ', args.schedule)
    sys.stdout.flush()

    if not os.path.isdir(args.checkpoint):
        os.makedirs(args.checkpoint)

    kernel_num = 7
    min_scale = 0.4
    start_epoch = 0

    data_loader = CTW1500Loader(is_transform=True,
                                img_size=args.img_size,
                                kernel_num=kernel_num,
                                min_scale=min_scale)
    #train_loader = ctw_train_loader(data_loader, batch_size=args.batch_size)

    if args.arch == "resnet50":
        model = models.resnet50(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet101":
        model = models.resnet101(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet152":
        model = models.resnet152(pretrained=True, num_classes=kernel_num)

    #resnet18 and 34 didn't inplement pretrained
    elif args.arch == "resnet18":
        model = models.resnet18(pretrained=False, num_classes=kernel_num)
    elif args.arch == "resnet34":
        model = models.resnet34(pretrained=False, num_classes=kernel_num)

    elif args.arch == "mobilenetv2":
        model = models.resnet152(pretrained=True, num_classes=kernel_num)
    elif args.arch == "mobilenetv3large":
        model = models.mobilenetv3_large(pretrained=False,
                                         num_classes=kernel_num)

    elif args.arch == "mobilenetv3small":
        model = models.mobilenetv3_small(pretrained=False,
                                         num_classes=kernel_num)

    optimizer = tf.keras.optimizers.SGD(learning_rate=args.lr,
                                        momentum=0.99,
                                        decay=5e-4)

    title = 'CTW1500'
    if args.pretrain:
        print('Using pretrained model.')
        assert os.path.isfile(
            args.pretrain), 'Error: no checkpoint directory found!'

        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(
            ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.'])
    elif args.resume:
        print('Resuming from checkpoint.')

        model.load_weights(args.resume)

        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        print('Training from scratch.')
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(
            ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.'])

    for epoch in range(start_epoch, args.n_epoch):
        optimizer = get_new_optimizer(args, optimizer, epoch)
        print(
            '\nEpoch: [%d | %d] LR: %f' %
            (epoch + 1, args.n_epoch, optimizer.get_config()['learning_rate']))

        train_loader = ctw_train_loader(data_loader,
                                        batch_size=args.batch_size)

        train_loss, train_te_acc, train_ke_acc, train_te_iou, train_ke_iou = train(train_loader, model, dice_loss,\
                                                                                   optimizer, epoch)

        model.save_weights('%s%s' % (args.checkpoint, '/model_tf/weights'))

        logger.append([
            optimizer.get_config()['learning_rate'], train_loss, train_te_acc,
            train_te_iou
        ])
    logger.close()
Esempio n. 7
0
def main(args):
    start_epoch = 0
    if args.checkpoint == '':
        args.checkpoint = "finetune_lista_checkpoint/n%d_s%d_p%d_snr%d/%s_bs_%d_ep_%d/measurements%d"\
        %(args.sample_nums, args.antenna_x*args.antenna_y, args.fault_prob*100, args.SNR, args.arch, args.batch_size, args.n_epoch, args.measurements)
    print('checkpoint path: %s' % args.checkpoint)
    print('init lr: %.8f' % args.lr)
    #print('schedule: ', args.schedule)

    sys.stdout.flush()

    if not os.path.isdir(args.checkpoint):
        os.makedirs(args.checkpoint)
    data_loader = ListaDataLoader(args)
    train_loader = torch.utils.data.DataLoader(data_loader,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=0,
                                               drop_last=True,
                                               pin_memory=True)
    # sensing matrix
    A = ArrayResposeLoad(measurements=args.measurements,
                         antenna_x=args.antenna_x,
                         antenna_y=args.antenna_y)

    if args.arch == "LISTA":
        model = models.LISTA(A=A,
                             T=args.T,
                             lam=args.lam,
                             untied=args.untied,
                             coord=args.coord)

    model = torch.nn.DataParallel(model).cuda()
    #for p,v in model.named_parameters():
    #    pdb.set_trace()

    #if hasattr(model.module, 'optimizer'):
    #    optimizer = model.module.optimizer
    #else:
    #optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    #optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)

    if args.pretrain:
        print('Using pretrained model.')
        assert os.path.isfile(
            args.pretrain), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.pretrain)
        model.load_state_dict(checkpoint['state_dict'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'))
        logger.set_names(['Epoch', 'Learning Rate', 'nmse'])
    elif args.resume:
        print("Resuming from checkpoint")
        assert os.path.isfile(
            args.resume), 'Error: no resume checkpoint directory found!'
        checkpoint = torch.load(args.resume)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), resume=True)
    else:
        print('Training from scratch.')
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'))
        #logger.set_names(['Epoch', 'Learning Rate', 'Train Loss'])
        logger.set_names(['Epoch', 'Learning Rate', 'nmse'])

    bestResult = np.inf
    lr_scheduler = CosineAnnealingLR(optimizer, T_max=70, eta_min=5e-6)
    for epoch in range(start_epoch, args.n_epoch):
        adjust_learning_rate(args, optimizer, epoch)
        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.n_epoch, optimizer.param_groups[0]['lr']))

        nmse = train(train_loader, model, optimizer, epoch)
        lr_scheduler.step()
        #save_checkpoint({
        #    'epoch':epoch+1,
        #    'state_dict': model.state_dict(),
        #    'lr': args.lr,
        #    'optimizer': optimizer.state_dict(),
        #    }, epoch+1, checkpoint=args.checkpoint)

        if args.need_validate and (epoch + 1) % 5 == 0:
            print('Validating the model')
            avgNmse = validate(model, args)
            print('The normalized mse in val set is:{nmse:.6f}'.format(
                nmse=avgNmse))

            if True and avgNmse < bestResult:
                print('Save the best model!')
                bestResult = avgNmse
                save_best_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'state_dict': model.state_dict(),
                        'lr': args.lr,
                        'optimizer': optimizer.state_dict(),
                    },
                    checkpoint=args.checkpoint)

        logger.append([epoch + 1, optimizer.param_groups[0]['lr'], nmse])
    logger.close()
Esempio n. 8
0
def main(args):
    print('checkpoint path: %s' % args.checkpoint)
    print('init lr: %.8f' % args.lr)
    print('schedule: ', args.schedule)
    print("useMultiGPUS?:" + args.multiGPU + ' running on device', end=' ')
    print(device)
    sys.stdout.flush()

    if not os.path.isdir(args.checkpoint):
        os.makedirs(args.checkpoint)

    kernel_num = args.kernelnum
    min_scale = args.min_scale
    start_epoch = 0

    hostname = socket.gethostname()
    img_dir, label_dir = '', ''
    if hostname == 'DESKTOP-JBG1JGC':
        img_dir_root = 'C:/Users/xiangpu/Downloads/icdar2017rctw_train_v1.2/train/'
        img_dir = [img_dir_root + 'part' + str(i + 1) + '/' for i in range(3)]
        label_dir = 'labels.txt'
    elif hostname == 'zxp':
        img_dir_root = '/root/myDataSet/SceneText/'
        img_dir = [img_dir_root + 'part' + str(i + 1) + '/' for i in range(3)]
        label_dir = 'labels.txt'
    dataLoader = IC15Loader(img_dir,
                            label_dir,
                            False,
                            args.img_size,
                            kernel_num=kernel_num,
                            min_scale=min_scale)
    train_loader = DataLoader(dataLoader,
                              args.batch_size,
                              shuffle=True,
                              num_workers=3,
                              drop_last=True,
                              pin_memory=True)

    if args.arch == "resnet18":
        model = pseNets.resnet18(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet34":
        model = pseNets.resnet34(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet50":
        model = pseNets.resnet50(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet101":
        model = pseNets.resnet101(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet152":
        model = pseNets.resnet152(pretrained=True, num_classes=kernel_num)

    if args.multiGPU == 'true' and torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model)
    model = model.to(device)

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=0.99,
                                weight_decay=5e-4)

    title = 'ocrSegmentation'
    if args.pretrain:
        print('Using pretrained model.')
        assert os.path.isfile(
            args.pretrain), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.pretrain, map_location='cpu')
        model.load_state_dict(checkpoint['state_dict'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(
            ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.'])
    elif args.resume:
        print('Resuming from checkpoint.')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.resume)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        print('Training from scratch.')
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(
            ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.'])

    model = model.to(device)
    for epoch in range(start_epoch, args.n_epoch):
        adjust_learning_rate(args, optimizer, epoch)
        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.n_epoch, optimizer.param_groups[0]['lr']))

        train_loss, train_te_acc, train_ke_acc, train_te_iou, train_ke_iou = train(
            train_loader, model, dice_loss, optimizer, epoch)
        logger.append([
            optimizer.param_groups[0]['lr'], train_loss, train_te_acc,
            train_te_iou
        ])
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'lr': args.lr,
                'optimizer': optimizer.state_dict(),
            },
            checkpoint=args.checkpoint,
            filename="pseNet")
    os.remove('training_model')
    t = time.strftime('%Y_%m_%d_%H_%M', time.localtime())
    os.rename('trained_models/pseNet', 'trained_models/pseNet_' + t)
    logger.close()
    os.rename(os.path.join(args.checkpoint, 'log.txt'),
              os.path.join(args.checkpoint, 'log_' + t + '.txt'))
Esempio n. 9
0
def main(args):
    best_acc = 0

    # Use CUDA
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus
    use_cuda = torch.cuda.is_available()

    # Random seed
    random.seed(time.time())
    if args.manual_seed is None:
        args.manual_seed = random.randint(1, 10000)

    if os.path.exists(args.out):
        shutil.rmtree(args.out)
    mkdir_p(args.out)

    args.n_gpus = len(args.gpus.split(','))
    state = {k: v for k, v in args._get_kwargs()}
    with open(os.path.join(args.out, 'args.json'), 'w', encoding='utf8') as f:
        json.dump(state, f)
        print('==> saved arguments')
    print(json.dumps(state, indent=4))
    set_seed(args)

    # Data
    print(f'==> Preparing IMDB')
    train_labeled_set, train_unlabeled_set, valid_set, test_set,\
    text_field, label_field = get_imdb('./data/aclImdb/')
    text_field.build_vocab(train_unlabeled_set, max_size=args.vocab_size,
                           vectors=GloVe(name='6B', dim=300, cache='./data/'))
    label_field.build_vocab(train_unlabeled_set)
    text_vocab, label_vocab = text_field.vocab, label_field.vocab
    print(f"Unique tokens in TEXT vocabulary: {len(text_vocab)}")
    print(f"Unique tokens in LABEL vocabulary: {len(label_vocab)}")
    embedding_matrix = text_vocab.vectors
    train_labeled_set = MyIMDB(train_labeled_set, text_vocab, label_vocab)
    train_unlabeled_set = MyIMDB(train_unlabeled_set, text_vocab, label_vocab, unlabeled=True)
    valid_set = MyIMDB(valid_set, text_vocab, label_vocab)
    test_set = MyIMDB(test_set, text_vocab, label_vocab)

    train_labeled_loader = DataLoader(train_labeled_set, batch_size=args.batch_size, shuffle=True, num_workers=0,
                                      drop_last=True)
    train_unlabeled_loader = DataLoader(train_unlabeled_set, batch_size=args.batch_size, shuffle=True, num_workers=0,
                                        drop_last=True)
    valid_loader = DataLoader(valid_set, batch_size=args.batch_size, shuffle=False, num_workers=0)
    test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=False, num_workers=0)

    # Model
    print("==> creating TextCNN")

    def create_model(config, model=MixTextCNN, use_cuda=False, ema=False):
        model = model(config)
        if use_cuda: model = model.cuda()

        if ema:
            for param in model.parameters():
                param.detach_()

        return model

    config = Config(text_field, label_field, embedding=embedding_matrix)
    model = create_model(config, use_cuda=use_cuda)
    ema_model = create_model(config, use_cuda=use_cuda, ema=True)

    cudnn.benchmark = True
    print('    Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0))

    train_criterion = SemiLoss()
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=2)

    ema_optimizer = WeightEMA(model, ema_model, args.lr, alpha=args.ema_decay)
    start_epoch = 0

    # Resume
    title = 'noisy-imdb'
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
        args.out = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        ema_model.load_state_dict(checkpoint['ema_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.out, 'log.txt'), title=title, resume=True)
    else:
        logger = Logger(os.path.join(args.out, 'log.txt'), title=title)
        logger.set_names(
            ['Train Loss', 'Train Loss X', 'Train Loss U', 'Valid Loss', 'Valid Acc.', 'Test Loss', 'Test Acc.'])

    writer = SummaryWriter(args.out)
    step = 0
    test_accs = []
    # Train and val
    for epoch in range(start_epoch, args.epochs):
        print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))

        train_loss, train_loss_x, train_loss_u = train(train_labeled_loader, train_unlabeled_loader, text_vocab, model,
                                                       optimizer, ema_optimizer, train_criterion, epoch, use_cuda)
        _, train_acc = validate(train_labeled_loader, ema_model, criterion, use_cuda, mode='Train Stats')
        val_loss, val_acc = validate(valid_loader, ema_model, criterion, use_cuda, mode='Valid Stats')
        test_loss, test_acc = validate(test_loader, ema_model, criterion, use_cuda, mode='Test Stats ')

        lr_scheduler.step(test_acc)

        step = args.val_iteration * (epoch + 1)

        writer.add_scalar('losses/train_loss', train_loss, step)
        writer.add_scalar('losses/valid_loss', val_loss, step)
        writer.add_scalar('losses/test_loss', test_loss, step)

        writer.add_scalar('accuracy/train_acc', train_acc, step)
        writer.add_scalar('accuracy/val_acc', val_acc, step)
        writer.add_scalar('accuracy/test_acc', test_acc, step)

        # append logger file
        logger.append([train_loss, train_loss_x, train_loss_u, val_loss, val_acc, test_loss, test_acc])

        # save model
        is_best = val_acc > best_acc
        best_acc = max(val_acc, best_acc)
        save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'ema_state_dict': ema_model.state_dict(),
            'acc': val_acc,
            'best_val_acc': best_acc,
            'optimizer': optimizer.state_dict(),
        }, is_best, args.out)
        test_accs.append(test_acc)
    logger.close()
    writer.close()

    print('Best val acc:')
    print(best_acc)

    print('Mean test acc:')
    print(np.mean(test_accs[-20:]))
Esempio n. 10
0
def main():
    # parser = argparse.ArgumentParser(description='Hyperparams')
    # parser.add_argument('--arch', nargs='?', type=str, default='resnet50')
    # parser.add_argument('--img_size', nargs='?', type=int, default=640,
    #                     help='Height of the input image')
    # parser.add_argument('--n_epoch', nargs='?', type=int, default=600,
    #                     help='# of the epochs')
    # parser.add_argument('--schedule', type=int, nargs='+', default=[200, 400],
    #                     help='Decrease learning rate at these epochs.')
    # parser.add_argument('--batch_size', nargs='?', type=int, default=1,
    #                     help='Batch Size')
    # parser.add_argument('--lr', nargs='?', type=float, default=1e-3,
    #                     help='Learning Rate')
    # parser.add_argument('--resume', nargs='?', type=str, default=None,
    #                     help='Path to previous saved model to restart from')
    # parser.add_argument('--checkpoint', default='', type=str, metavar='PATH',
    #                     help='path to save checkpoint (default: checkpoint)')
    # args = parser.parse_args()

    # lr = args.lr
    # schedule = args.schedule
    # batch_size = args.batch_size
    # n_epoch = args.n_epoch
    # image_size = args.img_size
    # resume = args.resume
    # checkpoint_path = args.checkpoint
    # arch = args.arch

    lr = 1e-3
    schedule = [200, 400]
    batch_size = 16
    # batch_size = 1
    n_epoch = 100
    image_size = 640
    checkpoint_path = ''
    # arch = 'resnet50'
    arch = 'mobilenetV2'
    resume = "checkpoints/ReCTS_%s_bs_%d_ep_%d" % (arch, batch_size, 5)
    # resume = None

    if checkpoint_path == '':
        checkpoint_path = "checkpoints/ReCTS_%s_bs_%d_ep_%d" % (
            arch, batch_size, n_epoch)

    print('checkpoint path: %s' % checkpoint_path)
    print('init lr: %.8f' % lr)
    print('schedule: ', schedule)
    sys.stdout.flush()

    if not os.path.isdir(checkpoint_path):
        os.makedirs(checkpoint_path)

    kernel_num = 7
    min_scale = 0.4
    start_epoch = 0

    data_loader = ReCTSDataLoader(
        need_transform=True,
        img_size=image_size,
        kernel_num=kernel_num,
        min_scale=min_scale,
        train_data_dir='../ocr_data/ReCTS/img/',
        train_gt_dir='../ocr_data/ReCTS/gt/'
        # train_data_dir='/kaggle/input/rects-ocr/img/',
        # train_gt_dir='/kaggle/input/rects-ocr/gt/'
    )

    ctw_root_dir = 'data/'

    train_loader = torch.utils.data.DataLoader(data_loader,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=3,
                                               drop_last=True,
                                               pin_memory=True)

    if arch == "resnet50":
        model = models.resnet50(pretrained=False, num_classes=kernel_num)
    elif arch == "resnet101":
        model = models.resnet101(pretrained=False, num_classes=kernel_num)
    elif arch == "resnet152":
        model = models.resnet152(pretrained=False, num_classes=kernel_num)
    elif arch == "mobilenetV2":
        model = PSENet(backbone="mobilenetv2",
                       pretrained=False,
                       result_num=kernel_num,
                       scale=1)

    if torch.cuda.is_available():
        model = torch.nn.DataParallel(model).cuda()
        device = 'cuda'
    else:
        model = torch.nn.DataParallel(model)
        device = 'cpu'

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=lr,
                                momentum=0.99,
                                weight_decay=5e-4)

    title = 'ReCTS'
    if resume:
        print('Resuming from checkpoint.')
        checkpoint_file_path = os.path.join(resume, "checkpoint.pth.tar")
        assert os.path.isfile(
            checkpoint_file_path
        ), 'Error: no checkpoint directory: %s found!' % checkpoint_file_path

        checkpoint = torch.load(checkpoint_file_path,
                                map_location=torch.device(device))
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        shutil.copy(os.path.join(resume, 'log.txt'),
                    os.path.join(checkpoint_path, 'log.txt'))
        logger = Logger(os.path.join(checkpoint_path, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        print('Training from scratch.')
        logger = Logger(os.path.join(checkpoint_path, 'log.txt'), title=title)
        logger.set_names(
            ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.'])

    for epoch in range(start_epoch, n_epoch):
        lr = adjust_learning_rate(schedule, lr, optimizer, epoch)
        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, n_epoch, optimizer.param_groups[0]['lr']))

        stat(model, (3, image_size, image_size))

        train_loss, train_te_acc, train_ke_acc, train_te_iou, train_ke_iou = train(
            train_loader, model, dice_loss, optimizer, epoch, lr,
            checkpoint_path)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'lr': lr,
                'optimizer': optimizer.state_dict(),
            },
            checkpoint=checkpoint_path)

        logger.append([
            optimizer.param_groups[0]['lr'], train_loss, train_te_acc,
            train_te_iou
        ])
    logger.close()
Esempio n. 11
0
def main(args):
    if args.checkpoint == '':
        args.checkpoint = "checkpoints/ic15_%s_bs_%d_ep_%d"%(args.arch, args.batch_size, args.n_epoch)
    if args.pretrain:
        if 'synth' in args.pretrain:
            args.checkpoint += "_pretrain_synth"
        else:
            args.checkpoint += "_pretrain_s1280"

    print(('checkpoint path: %s'%args.checkpoint))
    print(('init lr: %.8f'%args.lr))
    print(('schedule: ', args.schedule))
    sys.stdout.flush()

    if not os.path.isdir(args.checkpoint):
        os.makedirs(args.checkpoint)

    writer=SummaryWriter(args.checkpoint)

    kernel_num=18
    start_epoch = 0
    #####
    #
    #
    #
    #####
    data_loader = IC15Loader(is_transform=True, img_size=args.img_size)
    train_loader = torch.utils.data.DataLoader(
        data_loader,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=3,
        drop_last=False,
        pin_memory=True)

    if args.arch == "resnet50":
        model = models.resnet50(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet101":
        model = models.resnet101(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet152":
        model = models.resnet152(pretrained=True, num_classes=kernel_num)
    elif args.arch == "vgg16":
        model = models.vgg16(pretrained=False,num_classes=kernel_num)
    
    model = torch.nn.DataParallel(model).cuda()
    model.train()

    if hasattr(model.module, 'optimizer'):
        optimizer = model.module.optimizer
    else:
        # NOTE 这个地方的momentum对训练影响相当之大,使用0.99时训练crossentropy无法收敛.
        optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)

    title = 'icdar2015'
    if args.pretrain:
        print('Using pretrained model.')
        assert os.path.isfile(args.pretrain), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.pretrain)
        model.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(['Learning Rate', 'Train Loss','Train Acc.', 'Train IOU.'])
    elif args.resume:
        print('Resuming from checkpoint.')
        assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.resume)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        # optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True)
    else:
        print('Training from scratch.')
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(['Learning Rate', 'Train Loss','Train Acc.', 'Train IOU.'])
    images_loss = {}
    # data_plot = images_loss.values()
    # import matplotlib.pyplot as plt
    # plt.plot(data_plot)
    # plt.ylabel('Loss plot')
    # plt.show()
    for epoch in range(start_epoch, args.n_epoch):
        adjust_learning_rate(args, optimizer, epoch)
        print(('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.n_epoch, optimizer.param_groups[0]['lr'])))
        
        train_loss, train_te_acc, train_te_iou = train(train_loader,images_loss, model, dice_loss, optimizer, epoch,writer)

        if epoch %5 == 0 and epoch != 0:
            save_checkpoint({
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'lr': args.lr,
                    'optimizer' : optimizer.state_dict(),
                }, checkpoint=args.checkpoint,filename='checkpoint_%d.pth'%epoch)

        logger.append([optimizer.param_groups[0]['lr'], train_loss, train_te_acc, train_te_iou])
    logger.close()
    writer.flush()
    writer.close()
Esempio n. 12
0
def main(args):
    # torch.backends.cudnn.benchmark = True
    title = args.title
    if args.checkpoint == '':
        args.checkpoint = "checkpoints/%s_%s_bs_%d_ep_%d" % (
            title, args.arch, args.batch_size, args.n_epoch)
    if args.pretrain:
        if 'synth' in args.pretrain:
            args.checkpoint += "_pretrain_synth"
        else:
            args.checkpoint += "_pretrain_ic17"

    print(('checkpoint path: %s' % args.checkpoint))
    print(('init lr: %.8f' % args.lr))
    print(('schedule: ', args.schedule))
    args.vals = args.vals.split(';') if args.vals else []
    print('vals:', args.vals)
    sys.stdout.flush()

    if not os.path.isdir(args.checkpoint):
        os.makedirs(args.checkpoint)

    kernel_num = 7
    min_scale = 0.4
    start_epoch = 0

    #data_loader = CTW1500Loader(is_transform=True, img_size=args.img_size, kernel_num=kernel_num, min_scale=min_scale)
    #data_loader = IC15Loader(is_transform=True, img_size=args.img_size, kernel_num=kernel_num, min_scale=min_scale)
    data_loader = OcrDataLoader(args,
                                is_transform=True,
                                img_size=args.img_size,
                                kernel_num=kernel_num,
                                min_scale=min_scale)
    train_loader = torch.utils.data.DataLoader(data_loader,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.num_workers,
                                               drop_last=True,
                                               pin_memory=True)

    if args.arch == "resnet50":
        model = models.resnet50(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet101":
        model = models.resnet101(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet152":
        model = models.resnet152(pretrained=True, num_classes=kernel_num)

    if len(args.gpus) > 1:
        model = DataParallel(model,
                             device_ids=args.gpus,
                             chunk_sizes=args.chunk_sizes).cuda()
        optimizer = model.module.optimizer
    else:
        model = model.cuda()
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr,
                                    momentum=0.99,
                                    weight_decay=5e-4)

    # if hasattr(model.module, 'optimizer'):
    #     optimizer = model.module.optimizer
    # else:
    #     optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.99, weight_decay=5e-4)

    if args.pretrain:
        print('Using pretrained model.')
        assert os.path.isfile(
            args.pretrain), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.pretrain)
        model.load_state_dict(checkpoint['state_dict'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(
            ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.'])
    elif args.resume:
        print('Resuming from checkpoint.')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.resume)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        print('Training from scratch.')
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(
            ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.'])

    best_target = {'epoch': 0, 'val': 0}
    for epoch in range(start_epoch, args.n_epoch):
        adjust_learning_rate(args, optimizer, epoch)
        print(('\nEpoch: [%d | %d] LR: %f' %
               (epoch + 1, args.n_epoch, optimizer.param_groups[0]['lr'])))

        train_loss, train_te_acc, train_ke_acc, train_te_iou, train_ke_iou = train(
            train_loader, model, dice_loss, optimizer, epoch)
        # validate
        if args.vals:
            target = run_tests(args, model, epoch)
            # save best model
            if target > best_target['val']:
                best_target['val'] = target
                best_target['epoch'] = epoch + 1
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'state_dict': model.state_dict(),
                        'lr': args.lr,
                        'optimizer': optimizer.state_dict(),
                    },
                    checkpoint=args.checkpoint,
                    filename='best.pth.tar')
            print('best_target: epoch: %d,  val:%.4f' %
                  (best_target['epoch'], best_target['val']))
        # save latest model
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'lr': args.lr,
                'optimizer': optimizer.state_dict(),
            },
            checkpoint=args.checkpoint)

        logger.append([
            optimizer.param_groups[0]['lr'], train_loss, train_te_acc,
            train_te_iou
        ])
    logger.close()
Esempio n. 13
0
def main(args):
    if args.checkpoint == '':
        # args.checkpoint = "checkpointsfuns/funs19_%s_bs_%d_ep_%d"%(args.arch, args.batch_size, args.n_epoch)
        args.checkpoint = "checkpoints/model_funs_pretrain_ic15_frozen_dense_layers"
    if args.pretrain:
        if 'synth' in args.pretrain:
            args.checkpoint += "_pretrain_synth"
        else:
            args.checkpoint += "_pretrain_ic17"

    print('checkpoint path: %s' % args.checkpoint)
    print('init lr: %.8f' % args.lr)
    print('schedule: ', args.schedule)
    sys.stdout.flush()

    if not os.path.isdir(args.checkpoint):
        os.makedirs(args.checkpoint)

    kernel_num = 7
    min_scale = 0.4
    start_epoch = 0

    data_loader = IC15Loader(is_transform=True,
                             img_size=args.img_size,
                             kernel_num=kernel_num,
                             min_scale=min_scale)
    train_loader = torch.utils.data.DataLoader(data_loader,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=3,
                                               drop_last=True,
                                               pin_memory=True)

    if args.arch == "resnet50":
        model = models.resnet50(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet101":
        model = models.resnet101(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet152":
        model = models.resnet152(pretrained=True, num_classes=kernel_num)
    elif args.arch == "pvanet":
        model = models.pvanet(inputsize=args.img_size, num_classes=kernel_num)

    model = torch.nn.DataParallel(model).cuda()

    if hasattr(model.module, 'optimizer'):
        optimizer = model.module.optimizer
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr,
                                    momentum=0.99,
                                    weight_decay=5e-4)

    title = 'icdar2015'
    if args.pretrain:
        print('Using pretrained model.')
        assert os.path.isfile(
            args.pretrain), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.pretrain)
        model.load_state_dict(checkpoint['state_dict'])
        # fine tune output layers
        # grad = [
        #     'module.conv2.weight'
        #     'module.conv2.bias',
        #     'module.bn2.weight',
        #     'module.bn2.bias',
        #     'module.conv3.weight',
        #     'module.conv3.bias'
        # ]
        # for name,value in model.named_parameters():
        #     if name in grad:
        #         value.requires_grad = True
        #     else:
        #         value.requires_grad = False
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(
            ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.'])
    elif args.resume:
        print('Resuming from checkpoint.')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.resume)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        print('Training from scratch.')
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(
            ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.'])

    writer = SummaryWriter(args.summary_path)

    for epoch in range(start_epoch, args.n_epoch):
        adjust_learning_rate(args, optimizer, epoch)
        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.n_epoch, optimizer.param_groups[0]['lr']))

        train_loss, train_te_acc, train_ke_acc, train_te_iou, train_ke_iou = train(
            train_loader, model, dice_loss, optimizer, epoch)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'lr': args.lr,
                'optimizer': optimizer.state_dict(),
            },
            checkpoint=args.checkpoint)

        writer.add_scalar('Loss', train_loss, epoch)
        writer.add_scalar('train_te_acc', train_te_acc, epoch)
        writer.add_scalar('train_te_iou', train_te_iou, epoch)
        writer.flush()

        logger.append([
            optimizer.param_groups[0]['lr'], train_loss, train_te_acc,
            train_te_iou
        ])
    logger.close()
Esempio n. 14
0
def train_psenet(config_file):
    import sys
    sys.path.append('./detection_model/PSENet')
    # sys.path.append('/home/cjy/PSENet-master')

    import torch
    import argparse
    import numpy as np
    import torch.nn as nn
    import torch.nn.functional as F
    import shutil

    from torch.autograd import Variable
    from torch.utils import data
    import os

    from dataset import IC15Loader
    from metrics import runningScore
    import models
    from util import Logger, AverageMeter
    import time
    from tensorboardX import SummaryWriter
    import util
    from yacs.config import CfgNode as CN

    writer = SummaryWriter()

    def read_config_file(config_file):
        # 用yaml重构配置文件
        f = open(config_file)
        opt = CN.load_cfg(f)
        return opt

    args = read_config_file(config_file)

    def ohem_single(score, gt_text, training_mask):
        pos_num = (int)(np.sum(gt_text > 0.5)) - (int)(
            np.sum((gt_text > 0.5) & (training_mask <= 0.5)))

        if pos_num == 0:
            # selected_mask = gt_text.copy() * 0 # may be not good
            selected_mask = training_mask
            selected_mask = selected_mask.reshape(
                1, selected_mask.shape[0],
                selected_mask.shape[1]).astype('float32')
            return selected_mask

        neg_num = (int)(np.sum(gt_text <= 0.5))
        neg_num = (int)(min(pos_num * 3, neg_num))

        if neg_num == 0:
            selected_mask = training_mask
            selected_mask = selected_mask.reshape(
                1, selected_mask.shape[0],
                selected_mask.shape[1]).astype('float32')
            return selected_mask

        neg_score = score[gt_text <= 0.5]
        neg_score_sorted = np.sort(-neg_score)
        threshold = -neg_score_sorted[neg_num - 1]

        selected_mask = ((score >= threshold) |
                         (gt_text > 0.5)) & (training_mask > 0.5)
        selected_mask = selected_mask.reshape(
            1, selected_mask.shape[0],
            selected_mask.shape[1]).astype('float32')
        return selected_mask

    def ohem_batch(scores, gt_texts, training_masks):
        scores = scores.data.cpu().numpy()
        gt_texts = gt_texts.data.cpu().numpy()
        training_masks = training_masks.data.cpu().numpy()

        selected_masks = []
        for i in range(scores.shape[0]):
            selected_masks.append(
                ohem_single(scores[i, :, :], gt_texts[i, :, :],
                            training_masks[i, :, :]))

        selected_masks = np.concatenate(selected_masks, 0)
        selected_masks = torch.from_numpy(selected_masks).float()

        return selected_masks

    def dice_loss(input, target, mask):
        input = torch.sigmoid(input)

        input = input.contiguous().view(input.size()[0], -1)
        target = target.contiguous().view(target.size()[0], -1)
        mask = mask.contiguous().view(mask.size()[0], -1)

        input = input * mask
        target = target * mask

        a = torch.sum(input * target, 1)
        b = torch.sum(input * input, 1) + 0.001
        c = torch.sum(target * target, 1) + 0.001
        d = (2 * a) / (b + c)
        dice_loss = torch.mean(d)
        return 1 - dice_loss

    def cal_text_score(texts, gt_texts, training_masks, running_metric_text):
        training_masks = training_masks.data.cpu().numpy()
        pred_text = torch.sigmoid(texts).data.cpu().numpy() * training_masks
        pred_text[pred_text <= 0.5] = 0
        pred_text[pred_text > 0.5] = 1
        pred_text = pred_text.astype(np.int32)
        gt_text = gt_texts.data.cpu().numpy() * training_masks
        gt_text = gt_text.astype(np.int32)
        running_metric_text.update(gt_text, pred_text)
        score_text, _ = running_metric_text.get_scores()
        return score_text

    def cal_kernel_score(kernels, gt_kernels, gt_texts, training_masks,
                         running_metric_kernel):
        mask = (gt_texts * training_masks).data.cpu().numpy()
        kernel = kernels[:, -1, :, :]
        gt_kernel = gt_kernels[:, -1, :, :]
        pred_kernel = torch.sigmoid(kernel).data.cpu().numpy()
        pred_kernel[pred_kernel <= 0.5] = 0
        pred_kernel[pred_kernel > 0.5] = 1
        pred_kernel = (pred_kernel * mask).astype(np.int32)
        gt_kernel = gt_kernel.data.cpu().numpy()
        gt_kernel = (gt_kernel * mask).astype(np.int32)
        running_metric_kernel.update(gt_kernel, pred_kernel)
        score_kernel, _ = running_metric_kernel.get_scores()
        return score_kernel

    def train(train_loader, model, criterion, optimizer, epoch):
        model.train()

        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        running_metric_text = runningScore(2)
        running_metric_kernel = runningScore(2)

        end = time.time()
        for batch_idx, (imgs, gt_texts, gt_kernels,
                        training_masks) in enumerate(train_loader):
            data_time.update(time.time() - end)

            imgs = Variable(imgs.cuda())
            gt_texts = Variable(gt_texts.cuda())
            gt_kernels = Variable(gt_kernels.cuda())
            training_masks = Variable(training_masks.cuda())

            outputs = model(imgs)
            texts = outputs[:, 0, :, :]
            kernels = outputs[:, 1:, :, :]

            selected_masks = ohem_batch(texts, gt_texts, training_masks)
            selected_masks = Variable(selected_masks.cuda())

            loss_text = criterion(texts, gt_texts, selected_masks)

            loss_kernels = []
            mask0 = torch.sigmoid(texts).data.cpu().numpy()
            mask1 = training_masks.data.cpu().numpy()
            selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32')
            selected_masks = torch.from_numpy(selected_masks).float()
            selected_masks = Variable(selected_masks.cuda())
            for i in range(6):
                kernel_i = kernels[:, i, :, :]
                gt_kernel_i = gt_kernels[:, i, :, :]
                loss_kernel_i = criterion(kernel_i, gt_kernel_i,
                                          selected_masks)
                loss_kernels.append(loss_kernel_i)
            loss_kernel = sum(loss_kernels) / len(loss_kernels)

            loss = 0.7 * loss_text + 0.3 * loss_kernel
            losses.update(loss.item(), imgs.size(0))

            if batch_idx % 100 == 0:
                writer.add_scalar('loss_text', loss_text,
                                  batch_idx + epoch * len(train_loader))
                writer.add_scalar('loss_kernel', loss_kernel,
                                  batch_idx + epoch * len(train_loader))
                writer.add_scalar('total_loss', loss,
                                  batch_idx + epoch * len(train_loader))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            score_text = cal_text_score(texts, gt_texts, training_masks,
                                        running_metric_text)
            score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts,
                                            training_masks,
                                            running_metric_kernel)

            batch_time.update(time.time() - end)
            end = time.time()

            if batch_idx % 20 == 0:
                output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f}'.format(
                    batch=batch_idx + 1,
                    size=len(train_loader),
                    bt=batch_time.avg,
                    total=batch_time.avg * batch_idx / 60.0,
                    eta=batch_time.avg * (len(train_loader) - batch_idx) /
                    60.0,
                    loss=losses.avg,
                    acc=score_text['Mean Acc'],
                    iou_t=score_text['Mean IoU'],
                    iou_k=score_kernel['Mean IoU'])
                print(output_log)
                sys.stdout.flush()

        return (losses.avg, score_text['Mean Acc'], score_kernel['Mean Acc'],
                score_text['Mean IoU'], score_kernel['Mean IoU'])

    def adjust_learning_rate(args, optimizer, epoch):
        global state
        if epoch in args.schedule:
            args.lr = args.lr * 0.1
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr

    def save_checkpoint(state,
                        checkpoint='checkpoint',
                        filename='_checkpoint.pth.tar',
                        epoch=0):

        filepath = os.path.join(checkpoint, 'epoch_' + str(epoch) + filename)
        torch.save(state, filepath)

    if args.checkpoint == '':
        args.checkpoint = "checkpoints/ic15_%s_bs_%d_ep_%d" % (
            args.arch, args.batch_size, args.n_epoch)
    if args.pretrain:
        if 'synth' in args.pretrain:
            args.checkpoint += "_pretrain_synth"
        else:
            args.checkpoint += "_pretrain_LSVT"

    print('checkpoint path: %s' % args.checkpoint)
    print('init lr: %.8f' % args.lr)
    print('schedule: ', args.schedule)
    sys.stdout.flush()

    # if not os.path.isdir(args.checkpoint):
    #     os.makedirs(args.checkpoint)

    kernel_num = 7
    min_scale = 0.4
    start_epoch = 0

    data_loader = IC15Loader(is_transform=True,
                             img_size=args.img_size,
                             kernel_num=kernel_num,
                             min_scale=min_scale)
    train_loader = torch.utils.data.DataLoader(data_loader,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=3,
                                               drop_last=True,
                                               pin_memory=True)

    if args.arch == "resnet50":
        model = models.resnet50(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet101":
        model = models.resnet101(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet152":
        model = models.resnet152(pretrained=True, num_classes=kernel_num)

    model = torch.nn.DataParallel(model).cuda()

    if hasattr(model.module, 'optimizer'):
        optimizer = model.module.optimizer
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr,
                                    momentum=0.99,
                                    weight_decay=5e-4)

    title = 'icdar2015'
    if args.pretrain:
        print('Using pretrained model.')
        assert os.path.isfile(
            args.pretrain), 'Error: no checkpoint directory found!'
        print(args.pretrain)
        checkpoint = torch.load(args.pretrain)
        state = model.state_dict()
        for key in state.keys():
            if key in checkpoint.keys():
                state[key] = pretrained_model[key]
        model.load_state_dict(state)
        # model.load_state_dict(checkpoint['state_dict'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(
            ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.'])
    elif args.resume:
        print('Resuming from checkpoint.')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.resume)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        print('Training from scratch.')
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(
            ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.'])

    for epoch in range(start_epoch, args.n_epoch):
        adjust_learning_rate(args, optimizer, epoch)
        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.n_epoch, optimizer.param_groups[0]['lr']))

        train_loss, train_te_acc, train_ke_acc, train_te_iou, train_ke_iou = train(
            train_loader, model, dice_loss, optimizer, epoch)
        if (epoch + 1) % 5 == 0:
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'lr': args.lr,
                    'optimizer': optimizer.state_dict(),
                },
                checkpoint=args.checkpoint,
                epoch=epoch)

        logger.append([
            optimizer.param_groups[0]['lr'], train_loss, train_te_acc,
            train_te_iou
        ])
    logger.close()
Esempio n. 15
0
def main(args):
    start_epoch = 0
    start_layer = 1
    if args.checkpoint == '':
        args.checkpoint = "lista_checkpoint/n%d_s%d_p%d_snr%d/%s_bs_%d_ep_%d/measurements%d"\
        %(args.sample_nums, args.antenna_x*args.antenna_y, args.fault_prob*100, args.SNR, args.arch, args.batch_size, args.n_epoch, args.measurements)
    print('checkpoint path: %s' % args.checkpoint)
    print('init lr: %.8f' % args.lr)
    #print('schedule: ', args.schedule)

    sys.stdout.flush()

    if not os.path.isdir(args.checkpoint):
        os.makedirs(args.checkpoint)
    data_loader = ListaDataLoader(args)
    train_loader = torch.utils.data.DataLoader(data_loader,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=0,
                                               drop_last=True,
                                               pin_memory=True)
    # sensing matrix
    A = ArrayResposeLoad(measurements=args.measurements,
                         antenna_x=args.antenna_x,
                         antenna_y=args.antenna_y)
    if args.arch == "LISTA":
        model = models.LISTA(A=A,
                             T=args.T,
                             lam=args.lam,
                             untied=args.untied,
                             coord=args.coord)

    model = torch.nn.DataParallel(model).cuda()
    #for p,v in model.named_parameters():
    #    pdb.set_trace()

    if args.pretrain:
        print('Using pretrained model.')
        assert os.path.isfile(
            args.pretrain), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.pretrain)
        d = collections.OrderedDict()
        keys = list(checkpoint['state_dict'].keys())
        for pname, para in model.named_parameters():
            if pname in keys and checkpoint['state_dict'][
                    pname].shape == para.shape:
                d[pname] = checkpoint['state_dict'][pname]
        model.load_state_dict(d)
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'))
        logger.set_names(['Layer', 'Epoch', 'Learning Rate', 'nmse'])
    elif args.resume:
        print("Resuming from checkpoint")
        assert os.path.isfile(
            args.resume), 'Error: no resume checkpoint directory found!'
        checkpoint = torch.load(args.resume)
        start_epoch = checkpoint['epoch'] + 1
        start_layer = checkpoint['layer']
        model.load_state_dict(checkpoint['state_dict'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), resume=True)
    else:
        print('Training from scratch.')
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'))
        logger.set_names(['Layer', 'Epoch', 'Learning Rate', 'nmse'])

    bestResult = np.inf
    for layer in range(start_layer, args.T + 1):
        print('Start training layer:{}'.format(layer))
        if args.untied:
            for name, para in model.named_parameters():
                if name.endswith('_{}'.format(layer)):
                    para.requires_grad = True
                else:
                    para.requires_grad = False
        else:
            for name, para in model.named_parameters():
                if name.endswith('W') or name.endswith('B'):
                    para.requires_grad = True
                    continue
                if name.endswith('theta_{}'.format(layer)):
                    para.requires_grad = True
                else:
                    para.requires_grad = False
        #for name, para in model.named_parameters():
        #    pdb.set_trace()
        optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad == True,
                                            model.parameters()),
                                     lr=args.lr)
        lr_scheduler = CosineAnnealingLR(optimizer,
                                         T_max=args.n_epoch,
                                         eta_min=5e-6)

        if args.resume:
            checkpoint = torch.load(args.resume)
            optimizer.load_state_dict(checkpoint['optimizer'])
            lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])

        for epoch in range(start_epoch, args.n_epoch):
            start_epoch = 0
            #adjust_learning_rate(args, optimizer, epoch)
            print('\nEpoch: [%d | %d] LR: %f' %
                  (epoch + 1, args.n_epoch, optimizer.param_groups[0]['lr']))

            nmse = train(train_loader, model, optimizer, epoch, layer)

            lr_scheduler.step()

            save_checkpoint(
                {
                    'layer': layer,
                    'epoch': epoch,
                    'state_dict': model.state_dict(),
                    'lr': args.lr,
                    'lr_scheduler': lr_scheduler.state_dict(),
                    'optimizer': optimizer.state_dict(),
                },
                layer,
                epoch + 1,
                checkpoint=args.checkpoint)

            if args.need_validate and (epoch + 1) % 5 == 0:
                print('Validating the model')
                avgNmse = validate(model, args, layer)
                print('The normalized mse in val set is:{nmse:.6f}'.format(
                    nmse=avgNmse))

                if True and avgNmse < bestResult:
                    print('Save the best model!')
                    bestResult = avgNmse
                    save_best_checkpoint(
                        {
                            'layer': layer,
                            'epoch': epoch,
                            'state_dict': model.state_dict(),
                            'lr': args.lr,
                            'lr_scheduler': lr_scheduler.state_dict(),
                            'optimizer': optimizer.state_dict(),
                        },
                        checkpoint=args.checkpoint)

            logger.append(
                [layer, epoch + 1, optimizer.param_groups[0]['lr'], nmse])
        #recovery parameters

        args.resume = None

    # lastly finetune the model
    finetune(args, model, train_loader, start_epoch, logger, bestResult)
    logger.close()