Python Averager.val Exemples, utils.utils.Averager.val Python Exemples

Exemple #1

0

Afficher le fichier

def train(opt):
    #logging.info(opt)
    train_dataset = Batch_Dataset(opt)
    AlignCollate_valid = AlignCollate(imgH=opt.imgH,
                                      imgW=opt.imgW,
                                      keep_ratio_with_pad=opt.PAD)
    valid_dataset = LmdbDataset(root=opt.valid_data, opt=opt)
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=opt.batch_size,
                                               shuffle=True,
                                               num_workers=int(opt.workers),
                                               collate_fn=AlignCollate_valid,
                                               pin_memory=True)
    print('-' * 80)
    """ model configuration """

    ctc_converter = CTCLabelConverter(opt.character, opt.subword)
    attn_converter = AttnLabelConverter(opt.character, opt.subword,
                                        opt.batch_max_length)

    opt.num_class = len(attn_converter.character)
    opt.ctc_num_class = len(ctc_converter.character)
    print("ctc num class {}".format(len(ctc_converter.character)))
    print("attention num class {}".format(len(attn_converter.character)))

    if opt.rgb:
        opt.input_channel = 3

    model = MyModel(opt)

    # weight initialization
    for name, param in model.named_parameters():
        if 'localization_fc2' in name:
            print('Skip {name} as it is already initialized'.format(name))
            continue
        try:
            if 'bias' in name:
                init.constant_(param, 0.0)
            elif 'weight' in name:
                init.kaiming_normal_(param)
        except Exception as e:
            if 'weight' in name:
                param.data.fill_(1)
            continue

    model = torch.nn.DataParallel(model).to(device)

    model.train()
    if opt.continue_model != '':
        print('loading pretrained model from {}'.format(opt.continue_model))
        model.load_state_dict(torch.load(opt.continue_model))
    """ setup loss """
    ctc_criterion = torch.nn.CTCLoss(zero_infinity=True).to(device)
    attn_criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(device)

    loss_avg = Averager()
    filtered_parameters = []
    params_num = []
    for p in filter(lambda p: p.requires_grad, model.parameters()):
        filtered_parameters.append(p)
        params_num.append(np.prod(p.size()))
    print('Trainable params num : ', sum(params_num))

    if opt.adam:
        optimizer = optim.Adam(filtered_parameters,
                               lr=opt.lr,
                               betas=(opt.beta1, 0.999))
    else:
        optimizer = optim.Adadelta(filtered_parameters,
                                   lr=opt.lr,
                                   rho=opt.rho,
                                   eps=opt.eps)
    print("Optimizer:")
    print(optimizer)
    """ final options """
    with open(osj(opt.outPath, '{}/opt.txt'.format(opt.experiment_name)),
              'a') as opt_file:
        opt_log = '------------ Options -------------\n'
        args = vars(opt)
        for k, v in args.items():
            opt_log += '{}: {}\n'.format(str(k), str(v))
        opt_log += '---------------------------------------\n'
        print(opt_log)
        opt_file.write(opt_log)
    """ start training """
    start_iter = 0
    if opt.continue_model != '':
        print('continue to train, start_iter: {}'.format(start_iter))

    start_time = time.time()
    best_accuracy = -1
    i = start_iter

    while True:
        # train part
        for p in model.parameters():
            p.requires_grad = True

        image_tensors, labels = train_dataset.get_batch()
        image = image_tensors.to(device)

        ctc_text, ctc_length = ctc_converter.encode(labels)
        attn_text, attn_length = attn_converter.encode(labels)
        batch_size = image.size(0)
        # ctc loss
        ctc_preds, attn_preds = model(image, attn_text)
        ctc_preds = ctc_preds.log_softmax(2)
        preds_size = torch.IntTensor([ctc_preds.size(1)] * batch_size)
        ctc_preds = ctc_preds.permute(1, 0, 2)
        ctc_cost = ctc_criterion(ctc_preds, ctc_text, preds_size, ctc_length)
        # attn loss
        target = attn_text[:, 1:]
        attn_cost = attn_criterion(attn_preds.view(-1, attn_preds.shape[-1]),
                                   target.contiguous().view(-1))
        cost = opt.ctc_weight * ctc_cost + (1.0 - opt.ctc_weight) * attn_cost

        model.zero_grad()
        cost.backward()
        torch.nn.utils.clip_grad_norm_(
            model.parameters(),
            opt.grad_clip)  # gradient clipping with 5 (Default)
        optimizer.step()
        loss_avg.add(cost)
        # validation part
        if i % opt.valInterval == 0:
            elapsed_time = time.time() - start_time
            logging.info('[{}/{}] Loss: {:0.5f} elapsed_time: {:0.5f}'.format(
                i, opt.num_iter, loss_avg.val(), elapsed_time))
            # for log
            with open(
                    osj(opt.outPath,
                        '{}/log_train.txt'.format(opt.experiment_name)),
                    'a') as log:
                log.write(
                    '[{}/{}] Loss: {:0.5f} elapsed_time: {:0.5f}\n'.format(
                        i, opt.num_iter, loss_avg.val(), elapsed_time))
                loss_avg.reset()

                model.eval()
                with torch.no_grad():
                    valid_loss, current_accuracy, ctc_accuracy, current_norm_ED, preds, labels, infer_time, length_of_data \
                        = mtl_validation(model, ctc_criterion, attn_criterion, valid_loader, ctc_converter, attn_converter, opt)
                model.train()

                for pred, gt in zip(preds[:5], labels[:5]):
                    pred = pred[:pred.find('[s]')]
                    gt = gt[:gt.find('[s]')]
                    print('{:20s}, gt: {:20s},   {}'.format(
                        pred, gt, str(pred == gt)))
                    log.write('{:20s}, gt: {:20s},   {}\n'.format(
                        pred, gt, str(pred == gt)))

                valid_log = '[{}/{}] valid loss: {:0.5f}'.format(
                    i, opt.num_iter, valid_loss)
                valid_log += ' accuracy: {:0.3f}'.format(current_accuracy)

                log.write(valid_log + '\n')

                # save best accuracy model
                if current_accuracy > best_accuracy:
                    best_accuracy = current_accuracy
                    torch.save(
                        model.state_dict(),
                        osj(opt.outPath, '{}/best_accuracy.pth'.format(
                            opt.experiment_name)))

                best_model_log = 'best_accuracy: {:0.3f}'.format(best_accuracy)
                logging.info(best_model_log)
                log.write(best_model_log + '\n')

        if (i + 1) % 50000 == 0:
            torch.save(
                model.state_dict(),
                osj(opt.outPath,
                    '{}/iter_{}.pth'.format(opt.experiment_name, i + 1)))

        if i == opt.num_iter:
            logging.info('end the training')
            sys.exit()
        i += 1

Exemple #2

0

Afficher le fichier

Fichier : mtl_test.py Projet : peternara/mtl-text-recognition-ocr

def validation(model, ctc_criterion, attn_criterion, evaluation_loader, ctc_converter, attn_converter, opt):
    """ validation or evaluation """
    for p in model.parameters():
        p.requires_grad = False
    n_correct = 0
    norm_ED = 0
    length_of_data = 0
    infer_time = 0
    valid_loss_avg = Averager()
    ctc_correct = 0
    for i, (image_tensors, labels) in enumerate(evaluation_loader):
        batch_size = image_tensors.size(0)
        length_of_data = length_of_data + batch_size
        #image = image_tensors.cuda()
        image = image_tensors.to(device)
        length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device)
        text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device)

        ctc_text_for_loss, ctc_length_for_loss = ctc_converter.encode(labels)
        attn_text_for_loss, attn_length_for_loss = attn_converter.encode(labels)

        start_time = time.time()
        ctc_preds, attn_preds = model(image, text_for_pred)
        forward_time = time.time() - start_time
        # ctc
        ctc_preds = ctc_preds.log_softmax(2)
        # Calculate evaluation loss for CTC deocder.
        preds_size = torch.IntTensor([ctc_preds.size(1)] * batch_size)
        ctc_preds = ctc_preds.permute(1, 0, 2)  # to use CTCloss format
        ctc_cost = ctc_criterion(ctc_preds, ctc_text_for_loss, preds_size, ctc_length_for_loss)
        # Select max probabilty (greedy decoding) then decode index to character
        _, preds_index = ctc_preds.max(2)
        preds_index = preds_index.transpose(1, 0).contiguous().view(-1)
        ctc_preds_str = ctc_converter.decode(preds_index.data, preds_size.data)

        # attention
        attn_preds = attn_preds[:, :attn_text_for_loss.shape[1] - 1, :]
        target = attn_text_for_loss[:, 1:]  # without [GO] Symbol
        attn_cost = attn_criterion(attn_preds.contiguous().view(-1, attn_preds.shape[-1]), target.contiguous().view(-1))
        # select max probabilty (greedy decoding) then decode index to character
        _, attn_preds_index = attn_preds.max(2)
        attn_preds_str = attn_converter.decode(attn_preds_index, length_for_pred)
        attn_labels = attn_converter.decode(attn_text_for_loss[:, 1:], attn_length_for_loss)

        cost = opt.ctc_weight * ctc_cost + (1.0 - opt.ctc_weight) * attn_cost
        infer_time += forward_time
        valid_loss_avg.add(cost)
        # calculate accuracy.
        #for attn_pred, attn_gt in zip(attn_preds_str, attn_labels):
        for pred, gt, attn_pred, attn_gt in zip(ctc_preds_str, labels, attn_preds_str, attn_labels):
            attn_pred = attn_pred[:attn_pred.find('[s]')]  # prune after "end of sentence" token ([s])
            attn_gt = attn_gt[:attn_gt.find('[s]')]

            if pred == gt:
                ctc_correct += 1
            if attn_pred == attn_gt:
                n_correct += 1
            norm_ED += edit_distance(attn_pred, attn_gt) / len(attn_gt)

    accuracy = n_correct / float(length_of_data) * 100
    ctc_accuracy = ctc_correct / float(length_of_data) * 100

    return valid_loss_avg.val(), accuracy, ctc_accuracy, norm_ED, attn_preds_str, attn_labels, infer_time, length_of_data

Exemple #3

0

Afficher le fichier

def validation(model, criterion, evaluation_loader, converter, opt):
    """ validation or evaluation """
    for p in model.parameters():
        p.requires_grad = False

    n_correct = 0
    norm_ED = 0
    length_of_data = 0
    infer_time = 0
    valid_loss_avg = Averager()

    for i, (image_tensors, labels) in enumerate(evaluation_loader):
        batch_size = image_tensors.size(0)
        length_of_data = length_of_data + batch_size
        with torch.no_grad():
            image = image_tensors.cuda()
            # For max length prediction
            length_for_pred = torch.cuda.IntTensor([opt.batch_max_length] *
                                                   batch_size)
            text_for_pred = torch.cuda.LongTensor(
                batch_size, opt.batch_max_length + 1).fill_(0)

            text_for_loss, length_for_loss = converter.encode(labels)

        start_time = time.time()
        if 'CTC' in opt.Prediction:
            preds = model(image, text_for_pred).log_softmax(2)
            forward_time = time.time() - start_time

            # Calculate evaluation loss for CTC deocder.
            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
            preds = preds.permute(1, 0, 2)  # to use CTCloss format
            cost = criterion(preds, text_for_loss, preds_size, length_for_loss)

            # Select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_index = preds_index.transpose(1, 0).contiguous().view(-1)
            preds_str = converter.decode(preds_index.data, preds_size.data)

        else:
            preds = model(image, text_for_pred, is_train=False)
            forward_time = time.time() - start_time

            preds = preds[:, :text_for_loss.shape[1] - 1, :]
            target = text_for_loss[:, 1:]  # without [GO] Symbol
            cost = criterion(preds.contiguous().view(-1, preds.shape[-1]),
                             target.contiguous().view(-1))

            # select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index, length_for_pred)
            labels = converter.decode(text_for_loss[:, 1:], length_for_loss)

        infer_time += forward_time
        valid_loss_avg.add(cost)

        # calculate accuracy.
        for pred, gt in zip(preds_str, labels):
            if 'Attn' in opt.Prediction:
                pred = pred[:pred.find(
                    '[s]')]  # prune after "end of sentence" token ([s])
                gt = gt[:gt.find('[s]')]

            if pred == gt:
                n_correct += 1
            norm_ED += edit_distance(pred, gt) / len(gt)

    accuracy = n_correct / float(length_of_data) * 100

    return valid_loss_avg.val(
    ), accuracy, norm_ED, preds_str, labels, infer_time, length_of_data