Exemple #1
0
    def CTCLoss(opt):
        '''
        持续连接损失函数
        '''
        try:
            from torch.nn import CTCLoss
            return CTCLoss()

        except:
            from warpctc_pytorch import CTCLoss
            return CTCLoss()
Exemple #2
0
    def __init__(self, opt, dataset_name='iam', reset_log=False):
        self.opt = opt
        self.mode = self.opt.mode
        self.dataset_name = dataset_name
        self.stn_nc = self.opt.stn_nc
        self.cnn_nc = self.opt.cnn_nc
        self.nheads = self.opt.nheads
        self.criterion = CTCLoss(blank=0, reduction='sum', zero_infinity=True)
        self.label_transform = self.init_label_transform()
        self.test_transforms = self.init_test_transforms()
        self.train_transforms = self.init_train_transforms()
        self.val1_iter = self.opt.val1_iter # Number of train data batches that will be validated
        self.val2_iter = self.opt.val2_iter # Number of validation data batches that will be validated
        self.stn_attn = None
        self.val_metric = 'cer'
        self.use_loc_bn = False
        self.CNN = 'ResCRNN'
        self.loc_block = 'LocNet'
        self.identity_matrix = torch.tensor([1, 0, 0, 0, 1, 0],
                                       dtype=torch.float).cuda()
        if self.mode == 'train':
            if len(self.opt.trainRoot) == 0:
                self.train_root = "/ssd_scratch/cvit/santhoshini/{}-train-lmdb".format(self.dataset_name)
            else:
                self.train_root = self.opt.trainRoot
        if len(self.opt.valRoot) == 0:
            self.test_root = "/ssd_scratch/cvit/santhoshini/{}-test-lmdb".format(self.dataset_name)
        else:
            self.test_root = self.opt.valRoot

        if not os.path.exists(self.opt.node_dir):
            os.makedirs(self.opt.node_dir)
        elif reset_log:
            shutil.rmtree(self.opt.node_dir)
            os.makedirs(self.opt.node_dir)

        random.seed(self.opt.manualSeed)
        np.random.seed(self.opt.manualSeed)
        torch.manual_seed(self.opt.manualSeed)

        # cudnn.benchmark = True
        cudnn.deterministic = True
        cudnn.benchmark = False
        cudnn.enabled = True
        # print('CudNN enabled', cudnn.enabled)

        if torch.cuda.is_available() and not self.opt.cuda:
            print("WARNING: You have a CUDA device, so you should probably run with --cuda")
        else:
            self.opt.gpu_id = list(map(int, self.opt.gpu_id.split(',')))
            torch.cuda.set_device(self.opt.gpu_id[0])
def main():
    eval_batch_size = config["eval_batch_size"]
    cpu_workers = config["cpu_workers"]
    reload_checkpoint = config["reload_checkpoint"]

    img_height = config["img_height"]
    img_width = config["img_width"]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"device: {device}")

    test_dataset = Synth90kDataset(
        root_dir=config["data_dir"],
        mode="test",
        img_height=img_height,
        img_width=img_width,
    )

    test_loader = DataLoader(
        dataset=test_dataset,
        batch_size=eval_batch_size,
        shuffle=False,
        num_workers=cpu_workers,
        collate_fn=synth90k_collate_fn,
    )

    num_class = len(Synth90kDataset.LABEL2CHAR) + 1
    crnn = CRNN(
        1,
        img_height,
        img_width,
        num_class,
        map_to_seq_hidden=config["map_to_seq_hidden"],
        rnn_hidden=config["rnn_hidden"],
        leaky_relu=config["leaky_relu"],
    )
    crnn.load_state_dict(torch.load(reload_checkpoint, map_location=device))
    crnn.to(device)

    criterion = CTCLoss(reduction="sum")
    criterion.to(device)

    evaluation = evaluate(
        crnn,
        test_loader,
        criterion,
        decode_method=config["decode_method"],
        beam_size=config["beam_size"],
    )
    print("test_evaluation: loss={loss}, acc={acc}".format(**evaluation))
Exemple #4
0
def main():
    device = torch.device('cuda')
    model = CRNN(args.nc, args.nclass, args.nh)
    if args.pretrained:
        model.load_state_dict(torch.load(args.pretrained))
    optimizer = optim.RMSprop(model.parameters(), lr=args.lr)
    criterion = CTCLoss(zero_infinity=True).cuda()
    if not os.path.exists(lmdb_train_path):
        create_lmdb(args.root, args.trainroot, args.valroot)
    if not os.path.exists(args.expr_dir):
        os.makedirs(args.expr_dir)
    if torch.cuda.is_available():
        torch.backends.cudnn.enabled = True
        torch.backends.cudnn.benchmark = True
    train_loader, val_loader = get_data_loader(args)
    trainer = create_supervised_trainer(model,
                                        optimizer,
                                        criterion,
                                        device=device)

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_training_loss(engine: Engine):
        if engine.state.iteration % args.log_interval == 0:
            print("Epoch {} [{}/{}] :Loss {}".format(
                engine.state.epoch,
                engine.state.iteration % (len(train_loader)),
                len(train_loader), engine.state.output))

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_test_acc(engine):
        valid(model, val_loader)
        if engine.state.epoch % args.save_interval == 0:
            torch.save(model, f'{args.expr_dir}/crnn_{engine.state.epoch}.pth')

    trainer.run(train_loader, max_epochs=args.epochs)
Exemple #5
0
    def evaluate_batch(self, batch_data, metric_names):
        x = batch_data["imgs"].to(self.device)
        y = batch_data["labels"].to(self.device)
        y_len = batch_data["labels_len"]
        str_y = batch_data["raw_labels"]
        loss = 0

        loss_ctc = CTCLoss(blank=self.dataset.tokens["blank"],
                           reduction="mean")
        with autocast(enabled=self.params["training_params"]["use_amp"]):
            x = self.models["encoder"](x)
            global_pred = self.models["decoder"](x)

            ind_x = list()
            b, c, h, w = global_pred.size()
            for i in range(b):
                x_h, x_w = batch_data["imgs_reduced_shape"][i][:2]
                pred = global_pred[i, :, :x_h, :x_w]
                pred = pred.reshape(1, c, x_h * x_w)
                loss += loss_ctc(pred.permute(2, 0, 1), y[i].unsqueeze(0), [
                    x_h * x_w,
                ], [
                    y_len[i],
                ])
                ind_x.append(torch.argmax(pred, dim=1).cpu().numpy()[0])

        metrics = self.compute_metrics(ind_x,
                                       str_y,
                                       loss=loss.item(),
                                       metric_names=metric_names)
        if "pred" in metric_names:
            metrics["pred"].extend(
                [batch_data["unchanged_labels"], batch_data["names"]])
        return metrics
Exemple #6
0
def main():
    text_image = TextImage('abc', 32, 96, 5, 2)
    data_set = Generator(text_image)
    data_loader = DataLoader(data_set, batch_size=32, shuffle=True)

    model = CrnnSmall(len(text_image.alpha) + 1, num_base_filters=8)
    criterion = CTCLoss()

    optimizer = optim.Adadelta(model.parameters(), weight_decay=1e-4)

    num_epochs = 10
    for epoch in range(num_epochs):
        epoch_loss = 0.0
        model.train()
        for image, target, input_len, target_len in tqdm(data_loader):
            # print(target, target_len, input_len)
            outputs = model(image.to(torch.float32))  # [B,N,C]
            m_outputs = outputs
            outputs = torch.log_softmax(outputs, dim=2).to(torch.float64)
            outputs = outputs.permute([1, 0, 2])  # [N,B,C]
            loss = criterion(outputs[:], target, input_len, target_len)
            # 梯度更新
            model.zero_grad()
            loss.backward()
            optimizer.step()
            # 当前轮的loss
            epoch_loss += loss.item() * image.size(0)
            if np.isnan(loss.item()):
                print(target, m_outputs)

        epoch_loss = epoch_loss / len(data_loader.dataset)
        # 打印日志,保存权重
        print('Epoch: {}/{} loss: {:03f}'.format(epoch + 1, num_epochs,
                                                 epoch_loss))
    def evaluate_batch(self, params):
        with torch.no_grad():
            x, y, seq_len, seq_reduced_len, labels_len, _, _ = params
            x = torch.from_numpy(x).float().permute(0, 3, 1, 2).to(self.device)
            y = torch.from_numpy(y).long().to(self.device)

            for model_name in self.models.keys():
                self.models[model_name].eval()
            loss_ctc = CTCLoss(blank=len(self.all_labels))

            global_pred = self.models["end_to_end_model"](x)

            loss = loss_ctc(global_pred.permute(2, 0, 1), y,
                            seq_reduced_len.tolist(), labels_len.tolist())
            loss_val = loss.item()

        truth = [self.ctc_ind_to_str(i) for i in y]

        pred = [self.ctc_decode(pred) for pred in global_pred.permute(0, 2, 1)]

        edit = self.batch_edit(truth, pred)

        diff_len = self.batch_len(truth, pred)
        losses = {"loss_ctc": loss_val}
        metrics = {"edit": edit, "diff_len": diff_len}
        return losses, metrics
def run(args):
    test_dataset = TrainWordsDataset(data_set_dir=args.dataset_dir,
                                     transform=ToFloatTensor())

    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=args.test_batch_size,
                             shuffle=False,
                             num_workers=4)

    model = CRNN(image_height=args.image_height,
                 num_of_channels=args.num_of_channels,
                 num_of_classes=args.num_of_classes,
                 num_of_lstm_hidden_units=args.num_of_lstm_hidden_units)

    model.load_state_dict(torch.load(args.snapshot))
    print(model)

    trainer = Trainer()
    criterion = CTCLoss(zero_infinity=True, reduction='mean')

    test_image = torch.FloatTensor(args.test_batch_size, 3, args.image_height,
                                   512)
    test_image = Variable(test_image)

    trainer.test(model=model,
                 test_loader=test_loader,
                 criterion=criterion,
                 test_image=test_image)
Exemple #9
0
    def __init__(self,
                 vocab: Vocabulary,
                 loss_ratio: float = 1.0,
                 remove_sos: bool = True,
                 remove_eos: bool = False,
                 target_namespace: str = "tokens",
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(CTCLayer, self).__init__(vocab, regularizer)
        self.loss_ratio = loss_ratio
        self._remove_sos = remove_sos
        self._remove_eos = remove_eos
        self._target_namespace = target_namespace
        self._num_classes = self.vocab.get_vocab_size(target_namespace)
        self._pad_index = self.vocab.get_token_index(DEFAULT_PADDING_TOKEN,
                                                     self._target_namespace)
        self._loss = CTCLoss(blank=self._pad_index)
        self._start_index = self.vocab.get_token_index(START_SYMBOL,
                                                       self._target_namespace)
        self._end_index = self.vocab.get_token_index(END_SYMBOL,
                                                     self._target_namespace)
        exclude_indices = {self._pad_index, self._end_index, self._start_index}
        self._wer: Metric = WER(exclude_indices=exclude_indices)
        self._bleu: Metric = BLEU(exclude_indices=exclude_indices)
        self._dal: Metric = Average()

        initializer(self)
Exemple #10
0
def main(config):
    train_loader, eval_loader = get_dataloader(config['data_loader']['type'], config['data_loader']['args'])
    if os.path.isfile(config['data_loader']['args']['alphabet']):
        config['data_loader']['args']['alphabet'] = str(np.load(config['data_loader']['args']['alphabet']))

    prediction_type = config['arch']['args']['prediction']['type']
    # label转换器设置
    if prediction_type == 'CTC':
        converter = CTCLabelConverter(config['data_loader']['args']['alphabet'])
    else:
        converter = AttnLabelConverter(config['data_loader']['args']['alphabet'])
    num_class = len(converter.character)

    # loss 设置
    if prediction_type == 'CTC':
        criterion = CTCLoss(zero_infinity=True).cuda()
    else:
        criterion = CrossEntropyLoss(ignore_index=0).cuda()  # ignore [GO] token = ignore index 0

    model = get_model(num_class, config)

    config['name'] = config['name'] + '_' + model.name
    trainer = Trainer(config=config,
                      model=model,
                      criterion=criterion,
                      train_loader=train_loader,
                      val_loader=eval_loader,
                      converter=converter,
                      weights_init=weights_init)
    trainer.train()
def train(net, optimizer, trainSet, valSet, use_gpu):
    ctc_loss = CTCLoss(blank=0, reduction='mean', zero_infinity = True)
    net.train()
    epoch = 0
    print('Loading Dataset...')

    epoch_size = math.ceil(len(trainSet) / args.batch_size)
    max_iter = args.max_epoch * epoch_size

    start_iter = 0
    t_loss = 0.0
    print("Begin training...")
    for iteration in range(start_iter, max_iter):
        if iteration % epoch_size == 0:
            epoch += 1

            epochnum.append(epoch)

            batch_iterator = iter(DataLoader(trainSet, args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=custom_collate_fn))
            if epoch % 1 == 0 and epoch > 0:
                if args.num_gpu > 1:
                    torch.save(net.module.state_dict(), os.path.join(args.weights_save_folder, 'epoch_' + str(epoch) + '.pth'))
                else:
                    torch.save(net.state_dict(), os.path.join(args.weights_save_folder, 'epoch_' + str(epoch) + '.pth'))

            val(net, valSet, ctc_loss)


        load_t0 = time.time()
        images, labels, target_lengths, input_lengths = next(batch_iterator)
        if use_gpu:
            images = images.cuda()
            labels = labels.cuda()
            target_lengths = target_lengths.cuda()
            input_lengths = input_lengths.cuda()
        out = net(images)
        optimizer.zero_grad()
        loss = ctc_loss(log_probs=out, targets=labels, target_lengths=target_lengths,
                        input_lengths=input_lengths)

        loss.backward()
        optimizer.step()

        load_t1 = time.time()
        batch_time = load_t1 - load_t0
        eta = int(batch_time * (max_iter - iteration))
        print('Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Loss: {:.4f}|| Batchtime: {:.4f} s || ETA: {}'.format
              (epoch, args.max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss,
               batch_time, str(datetime.timedelta(seconds=eta))))
        t_loss = t_loss + loss
        if ((iteration % epoch_size) + 1 == epoch_size):
            epochloss.append(t_loss/epoch_size)
            t_loss = 0.0
    
    if args.num_gpu > 1:
        torch.save(net.module.state_dict(), os.path.join(args.weights_save_folder, 'Final-crnn.pth'))
    else:
        torch.save(net.state_dict(), os.path.join(args.weights_save_folder, 'Final-crnn.pth'))
    print('Finished Training')
    def __init__(self, labels: List,
                 model_cfg: Union[UniDirectionalConfig, BiDirectionalConfig,
                                  ConvolutionConfig], precision: int,
                 optim_cfg: Union[AdamConfig,
                                  SGDConfig], spect_cfg: SpectConfig):
        super().__init__()
        self.save_hyperparameters()
        self.model_cfg = model_cfg
        self.precision = precision
        self.optim_cfg = optim_cfg
        self.spect_cfg = spect_cfg
        self.convolutional = True if OmegaConf.get_type(
            model_cfg) is ConvolutionConfig else False
        self.bidirectional = True if OmegaConf.get_type(
            model_cfg) is BiDirectionalConfig else False

        self.labels = labels

        self.conv = MaskConv(
            nn.Sequential(
                nn.Conv2d(1,
                          32,
                          kernel_size=(41, 11),
                          stride=(2, 2),
                          padding=(20, 5)), nn.BatchNorm2d(32),
                nn.Hardtanh(0, 20, inplace=True),
                nn.Conv2d(32,
                          32,
                          kernel_size=(21, 11),
                          stride=(2, 1),
                          padding=(10, 5)), nn.BatchNorm2d(32),
                nn.Hardtanh(0, 20, inplace=True)))
        # Based on above convolutions and spectrogram size using conv formula (W - F + 2P)/ S+1
        rnn_input_size = int(
            math.floor((self.spect_cfg.sample_rate *
                        self.spect_cfg.window_size) / 2) + 1)
        rnn_input_size = int(math.floor(rnn_input_size + 2 * 20 - 41) / 2 + 1)
        rnn_input_size = int(math.floor(rnn_input_size + 2 * 10 - 21) / 2 + 1)
        rnn_input_size *= 32

        if self.convolutional is False:
            self.rnns, self.lookahead, self.fc = self._rnn_construct(
                rnn_input_size)

        else:
            self.deep_conv, self.fc = self._conv_construct(rnn_input_size)

        self.inference_softmax = InferenceBatchSoftmax()
        self.criterion = CTCLoss(blank=self.labels.index('_'),
                                 reduction='sum',
                                 zero_infinity=True)
        self.evaluation_decoder = GreedyDecoder(
            self.labels)  # Decoder used for validation
        self.wer = WordErrorRate(decoder=self.evaluation_decoder,
                                 target_decoder=self.evaluation_decoder)
        self.cer = CharErrorRate(decoder=self.evaluation_decoder,
                                 target_decoder=self.evaluation_decoder)
 def __init__(self,params:configargparse.Namespace):
     """
     Calculates Loss, Accuracy, Perplexity Statistics
     : param argparse.Namespace params: The training options
     """
     super(StatsCalculator,self).__init__()
     self.ignore_label = params.text_pad
     self.char_list = params.char_list 
     self.criterion = CrossEntropyLoss(ignore_index=self.ignore_label,reduction="mean")
     self.ctc = CTCLoss(zero_infinity=True)
Exemple #14
0
    def __init__(self, freq_dim, output_dim, config):
        super().__init__(freq_dim, config)

        # include the blank token
        self.blank = output_dim
        fc_inp_dim = self.encoder_dim
        if config['encoder']['rnn']['bidirectional']:
            fc_inp_dim *= 2
        self.fc = nn.Linear(fc_inp_dim, output_dim + 1)
        self.loss_func = CTCLoss(blank=self.blank)
Exemple #15
0
def main():
    eval_batch_size = config['eval_batch_size']
    cpu_workers = config['cpu_workers']
    reload_checkpoint = config['reload_checkpoint']

    img_height = config['img_height']
    img_width = config['img_width']

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f'device: {device}')

    test_dataset = Synth90kDataset(root_dir=config['data_dir'],
                                   mode='test',
                                   img_height=img_height,
                                   img_width=img_width)

    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=eval_batch_size,
                             shuffle=False,
                             num_workers=cpu_workers,
                             collate_fn=synth90k_collate_fn)

    num_class = len(Synth90kDataset.LABEL2CHAR) + 1
    crnn = CRNN(1,
                img_height,
                img_width,
                num_class,
                map_to_seq_hidden=config['map_to_seq_hidden'],
                rnn_hidden=config['rnn_hidden'],
                leaky_relu=config['leaky_relu'])
    crnn.load_state_dict(torch.load(reload_checkpoint, map_location=device))
    crnn.to(device)

    criterion = CTCLoss(reduction='sum')
    criterion.to(device)

    evaluation = evaluate(crnn,
                          test_loader,
                          criterion,
                          decode_method=config['decode_method'],
                          beam_size=config['beam_size'])
    print('test_evaluation: loss={loss}, acc={acc}'.format(**evaluation))
Exemple #16
0
    def __init__(self, hparams, decoder=None, sample_rate=16000):
        super(DeepSpeech, self).__init__()

        self.hparams = hparams
        self.decoder = decoder
        self.criterion = CTCLoss(reduction='sum', zero_infinity=True)
        self.wer = WordErrorRate(decoder=self.decoder,
                                 target_decoder=self.decoder)
        self.cer = CharErrorRate(decoder=self.decoder,
                                 target_decoder=self.decoder)

        self.hidden_size = hparams.hidden_size
        self.hidden_layers = hparams.hidden_layers

        self.conv = MaskConv(
            nn.Sequential(
                nn.Conv2d(1,
                          32,
                          kernel_size=(41, 11),
                          stride=(2, 2),
                          padding=(20, 5)), nn.BatchNorm2d(32),
                nn.Hardtanh(0, 20, inplace=True),
                nn.Conv2d(32,
                          32,
                          kernel_size=(21, 11),
                          stride=(2, 1),
                          padding=(10, 5)), nn.BatchNorm2d(32),
                nn.Hardtanh(0, 20, inplace=True)))
        # Based on above convolutions and spectrogram size using conv formula (W - F + 2P)/ S+1
        rnn_input_size = int(
            math.floor((sample_rate * hparams.window_size) / 2) + 1)
        rnn_input_size = int(math.floor(rnn_input_size + 2 * 20 - 41) / 2 + 1)
        rnn_input_size = int(math.floor(rnn_input_size + 2 * 10 - 21) / 2 + 1)
        rnn_input_size *= 32

        rnns = []
        rnn = BatchRNN(input_size=rnn_input_size,
                       hidden_size=self.hidden_size,
                       bidirectional=True,
                       batch_norm=False)
        rnns.append(('0', rnn))
        for x in range(self.hidden_layers - 1):
            rnn = BatchRNN(input_size=self.hidden_size,
                           hidden_size=self.hidden_size,
                           bidirectional=True)
            rnns.append(('%d' % (x + 1), rnn))
        self.rnns = nn.Sequential(OrderedDict(rnns))

        fully_connected = nn.Sequential(
            nn.BatchNorm1d(self.hidden_size),
            nn.Linear(self.hidden_size, hparams.num_classes, bias=False))
        self.fc = nn.Sequential(SequenceWise(fully_connected), )
        self.inference_softmax = InferenceBatchSoftmax()
Exemple #17
0
    def __init__(self, paths: Paths) -> None:
        self.paths = paths
        self.writer = SummaryWriter(log_dir=paths.checkpoint_dir /
                                    'tensorboard')
        self.ctc_loss = CTCLoss()

        # Used for generating plots
        longest_id = get_longest_mel_id(dataset_path=self.paths.data_dir /
                                        'dataset.pkl')
        self.longest_mel = np.load(str(paths.mel_dir / f'{longest_id}.npy'),
                                   allow_pickle=False)
        self.longest_tokens = np.load(str(paths.token_dir /
                                          f'{longest_id}.npy'),
                                      allow_pickle=False)
Exemple #18
0
def train(num_epochs, model, device, train_loader, val_loader, images, texts,
          lengths, converter, optimizer, lr_scheduler, prediction_dir,
          print_iter):
    criterion = CTCLoss()
    criterion.to(device)
    images = images.to(device)
    model.to(device)
    for epoch in range(num_epochs):
        print(epoch)
        count = 0
        model.train()
        for i, datas in enumerate(train_loader):
            datas, targets = datas
            batch_size = datas.size(0)
            count += batch_size
            dataloader.loadData(images, datas)
            t, l = converter.encode(targets)
            dataloader.loadData(texts, t)
            dataloader.loadData(lengths, l)
            preds = model(images)
            preds_size = Variable(torch.IntTensor([preds.size(0)] *
                                                  batch_size))
            cost = criterion(preds, texts, preds_size, lengths) / batch_size
            model.zero_grad()
            cost.backward()
            optimizer.step()
            if count % print_iter < train_loader.batch_size:
                print('epoch {} [{}/{}]loss : {}'.format(
                    epoch, count, len(train_loader.dataset), cost))

        validation(model, device, val_loader, images, texts, lengths,
                   converter, prediction_dir)

        save_model('{}'.format(epoch), model, optimizer, lr_scheduler)

        lr_scheduler.step()
Exemple #19
0
    def __init__(self, args):
        self.batch_size = 1
        self.lr_crnn = args.lr_crnn
        self.lr_prep = args.lr_prep
        self.max_epochs = args.epoch
        self.inner_limit = args.inner_limit
        self.crnn_model_path = args.crnn_model
        self.sec_loss_scalar = args.scalar
        self.ocr_name = args.ocr
        self.std = args.std
        self.is_random_std = args.random_std
        torch.manual_seed(42)

        self.train_set = properties.pos_text_dataset_train
        self.validation_set = properties.pos_text_dataset_dev
        self.input_size = properties.input_size

        self.ocr = get_ocr_helper(self.ocr_name)

        self.char_to_index, self.index_to_char, self.vocab_size = get_char_maps(
            properties.char_set)
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")

        if self.crnn_model_path == '':
            self.crnn_model = CRNN(self.vocab_size, False).to(self.device)
        else:
            self.crnn_model = torch.load(
                properties.crnn_model_path).to(self.device)
        self.crnn_model.register_backward_hook(self.crnn_model.backward_hook)
        self.prep_model = UNet().to(self.device)

        self.dataset = PatchDataset(
            properties.patch_dataset_train, pad=True, include_name=True)
        self.validation_set = PatchDataset(
            properties.patch_dataset_dev, pad=True)
        self.loader_train = torch.utils.data.DataLoader(
            self.dataset, batch_size=self.batch_size, shuffle=True, drop_last=True, collate_fn=PatchDataset.collate)

        self.train_set_size = len(self.dataset)
        self.val_set_size = len(self.validation_set)

        self.primary_loss_fn = CTCLoss().to(self.device)
        self.secondary_loss_fn = MSELoss().to(self.device)
        self.optimizer_crnn = optim.Adam(
            self.crnn_model.parameters(), lr=self.lr_crnn, weight_decay=0)
        self.optimizer_prep = optim.Adam(
            self.prep_model.parameters(), lr=self.lr_prep, weight_decay=0)
Exemple #20
0
def main(config):
    train_loader, eval_loader = get_dataloader(config['data_loader']['type'],
                                               config['data_loader']['args'])

    converter = strLabelConverter(config['data_loader']['args']['alphabet'])
    criterion = CTCLoss(zero_infinity=True)

    model = get_model(config)

    trainer = Trainer(config=config,
                      model=model,
                      criterion=criterion,
                      train_loader=train_loader,
                      val_loader=eval_loader,
                      converter=converter)
    trainer.train()
    def __init__(self,
                 model: PhonemeDetector,
                 corpus: CorpusClass,
                 val_corpus: CorpusClass = None,
                 pretraining=False,
                 kl_ratio=0.10,
                 output_directory: str = "models",
                 batch_size: int = 20,
                 lr: float = 3e-5,
                 accumulate_steps: int = 1,
                 total_steps: int = 30000,
                 thaw_after: int = 10000,
                 output_model_every: int = 1000,
                 checkpoint=None,
                 device: str = 'cpu'):

        self.device = device
        self.total_steps = total_steps
        self.pretraining = pretraining
        self.output_model_every = output_model_every
        self.output_directory = output_directory
        self.accumulate_steps = accumulate_steps

        self.batch_size = batch_size
        self.lr = lr
        self.kl_ratio = kl_ratio
        self.accumulate_steps = accumulate_steps
        self.thaw_after = thaw_after

        os.makedirs(output_directory, exist_ok=True)
        self.model = model
        self.model.train()
        self.freeze()
        self.forced_aligner = ForcedAligner(self.model, n_beams=10)

        self.corpus = corpus
        self.val_corpus = val_corpus

        self.loss_fn = CTCLoss()
        self.optimizer = torch.optim.Adam(model.parameters(), lr=lr)
        self.memory_max_length = 300000
        self.epoch = 0

        if checkpoint is not None:
            self.epoch = checkpoint["epoch"]
            self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
Exemple #22
0
    def evaluate_batch(self, batch_data, metric_names):
        x = batch_data["imgs"].to(self.device)
        y = batch_data["labels"].to(self.device)
        x_reduced_len = [s[1] for s in batch_data["imgs_reduced_shape"]]
        y_len = batch_data["labels_len"]

        loss_ctc = CTCLoss(blank=self.dataset.tokens["blank"], reduction="sum")

        with autocast(enabled=self.params["training_params"]["use_amp"]):
            x = self.models["encoder"](x)
            global_pred = self.models["decoder"](x)

        loss = loss_ctc(global_pred.permute(2, 0, 1), y, x_reduced_len, y_len)
        pred = torch.argmax(global_pred, dim=1).cpu().numpy()
        metrics = self.compute_metrics(pred, y.cpu().numpy(), x_reduced_len, y_len, loss=loss.item(), metric_names=metric_names)
        if "pred" in metric_names:
            metrics["pred"].extend([batch_data["unchanged_labels"], batch_data["names"]])
        return metrics
Exemple #23
0
def main(config):
    import torch
    from torch.nn import CTCLoss

    from models import get_model
    from data_loader import get_dataloader
    from trainer import Trainer
    from utils import CTCLabelConverter, AttnLabelConverter, load

    if os.path.isfile(config['dataset']['alphabet']):
        config['dataset']['alphabet'] = ''.join(load(config['dataset']['alphabet']))

    prediction_type = config['arch']['args']['prediction']['type']

    # loss 设置
    if prediction_type == 'CTC':
        criterion = CTCLoss(blank=0, zero_infinity=True)
        converter = CTCLabelConverter(config['dataset']['alphabet'])
    elif prediction_type == 'Attn':
        criterion = torch.nn.CrossEntropyLoss(ignore_index=0)
        converter = AttnLabelConverter(config['dataset']['alphabet'])
    else:
        raise NotImplementedError
    img_channel = 3 if config['dataset']['train']['dataset']['args']['img_mode'] != 'GRAY' else 1
    model = get_model(img_channel, len(converter.character), config['arch']['args'])

    img_h, img_w = 32, 100
    for process in config['dataset']['train']['dataset']['args']['pre_processes']:
        if process['type'] == "Resize":
            img_h = process['args']['img_h']
            img_w = process['args']['img_w']
            break
    sample_input = torch.zeros((2, img_channel, img_h, img_w))
    num_label = model.get_batch_max_length(sample_input)
    train_loader = get_dataloader(config['dataset']['train'], num_label)
    assert train_loader is not None
    if 'validate' in config['dataset'] and config['dataset']['validate']['dataset']['args']['data_path'][0] is not None:
        validate_loader = get_dataloader(config['dataset']['validate'], num_label)
    else:
        validate_loader = None

    trainer = Trainer(config=config, model=model, criterion=criterion, train_loader=train_loader, validate_loader=validate_loader, sample_input=sample_input,
                      converter=converter)
    trainer.train()
    def __init__(self, args):
        self.ocr_name = args.ocr
        self.batch_size = args.batch_size
        self.lr = args.lr
        self.epochs = args.epoch
        self.std = args.std
        self.ocr = args.ocr
        self.p_samples = args.p
        self.sec_loss_scalar = args.scalar

        self.train_set = properties.vgg_text_dataset_train
        self.validation_set = properties.vgg_text_dataset_dev
        self.input_size = properties.input_size

        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        self.prep_model = UNet().to(self.device)
        self.ocr = get_ocr_helper(self.ocr)

        self.char_to_index, self.index_to_char, self.vocab_size = get_char_maps(
            properties.char_set)

        self.loss_fn = CTCLoss(reduction='none').to(self.device)

        transform = transforms.Compose([
            PadWhite(self.input_size),
            transforms.ToTensor(),
        ])
        self.dataset = ImgDataset(
            self.train_set, transform=transform, include_name=True)
        self.validation_set = ImgDataset(
            self.validation_set, transform=transform, include_name=True)
        self.loader_train = torch.utils.data.DataLoader(
            self.dataset, batch_size=self.batch_size, shuffle=True, drop_last=True)
        self.loader_validation = torch.utils.data.DataLoader(
            self.validation_set, batch_size=self.batch_size, drop_last=True)

        self.val_set_size = len(self.validation_set)
        self.train_set_size = len(self.dataset)

        self.optimizer = optim.Adam(
            self.prep_model.parameters(), lr=self.lr, weight_decay=0)
        self.secondary_loss_fn = MSELoss().to(self.device)
Exemple #25
0
    def train_batch(self, batch_data, metric_names):
        x = batch_data["imgs"].to(self.device)
        y = batch_data["labels"].to(self.device)
        x_reduced_len = [s[1] for s in batch_data["imgs_reduced_shape"]]
        y_len = batch_data["labels_len"]

        loss_ctc = CTCLoss(blank=self.dataset.tokens["blank"], reduction="sum")
        self.optimizer.zero_grad()

        with autocast(enabled=self.params["training_params"]["use_amp"]):
            x = self.models["encoder"](x)
            global_pred = self.models["decoder"](x)

        loss = loss_ctc(global_pred.permute(2, 0, 1), y, x_reduced_len, y_len)
        self.backward_loss(loss)
        self.step_optimizer()
        pred = torch.argmax(global_pred, dim=1).cpu().numpy()

        metrics = self.compute_metrics(pred, y.cpu().numpy(), x_reduced_len, y_len, loss=loss.item(), metric_names=metric_names)
        return metrics
Exemple #26
0
    def __init__(self, *args, **kwargs):
        super(ModelIinit, self).__init__(*args, **kwargs)

        if self.model_params["model_type"] == "crnn_big_size":

            self.model = crnn_big_size.CRNN(
                nc=self.model_params["num_input_channels"],
                nclass=self.nclass,
                nh=self.model_params["hid_layer_size"])

        self.converter = crnn_utils.strLabelConverter(
            self.model_params["alphabet"])
        self.criterion = CTCLoss(zero_infinity=True).to(
            self.general_params["device"])
        self.model.apply(self.weights_init)
        '''load pretrained weigths'''
        path_to_pretrained_model = self.model_params[
            self.model_params["model_type"]]["path_pretrained"]

        if path_to_pretrained_model and os.path.isfile(
                path_to_pretrained_model):
            print('loading pretrained model')
            self.model.load_state_dict(path_to_pretrained_model)

        self.model.to(self.general_params["device"])
        self.model = torch.nn.DataParallel(self.model,
                                           device_ids=range(
                                               self.general_params["num_gpu"]))
        '''optimizer initialise'''
        if self.model_params["optimizer"] == "Adam":
            self.optimizer = optim.Adam(self.model.parameters(),
                                        lr=self.model_params["adam"]["lr"],
                                        betas=(self.model_params["adam"]["lr"],
                                               0.999))

        elif self.model_params["optimizer"] == "adadelta":
            self.optimizer = optim.Adadelta(self.model.parameters(),
                                            lr=self.model_params["adam"]["lr"])
        else:
            self.optimizer = optim.RMSprop(self.model.parameters(),
                                           lr=self.model_params["adam"]["lr"])
    def predict_batch(self, params, metrics_name):
        with torch.no_grad():
            x, y, seq_len, seq_reduced_len, labels_len, _, img_name = params
            x = torch.from_numpy(x).float().permute(0, 3, 1, 2).to(self.device)
            y = torch.from_numpy(y).long().to(self.device)

            for model_name in self.models.keys():
                self.models[model_name].eval()

            global_pred = self.models["end_to_end_model"](x)

            truth = [self.ctc_ind_to_str(i) for i in y]

            pred = [
                self.ctc_decode(pred) for pred in global_pred.permute(0, 2, 1)
            ]

        metrics = {}
        for key in metrics_name:
            if key == "cer":
                metrics[key] = edit_cer_from_list(truth, pred)
            if key == "wer":
                metrics[key] = edit_wer_from_list(truth, pred)
                metrics["nb_words"] = sum([len(t.split(" ")) for t in truth])
            elif key == "pred":
                metrics[key] = pred
            elif key == "ground_truth":
                metrics[key] = truth
            elif key == "diff_len":
                metrics[key] = self.batch_len(truth, pred)
            elif key == "proba":
                metrics[key] = self.batch_probas_to_str(
                    global_pred.cpu().detach().numpy(), img_name)
            elif key == "loss_ctc":
                ctc_loss = CTCLoss(blank=len(self.all_labels))
                metrics[key] = ctc_loss(global_pred.permute(2, 0, 1), y,
                                        seq_reduced_len.tolist(),
                                        labels_len.tolist()).item()

        return metrics
Exemple #28
0
def ctc_loss(outputs, targets, mask):
    USE_CUDA = torch.cuda.is_available()
    device = torch.device("cuda:0" if USE_CUDA else "cpu")
    target_lengths = torch.sum(mask, dim=0).to(device)
    # We need to change targets, PAD_token = 0 = blank
    # EOS token -> PAD_token
    targets[targets == EOS_token] = PAD_token
    outputs = outputs.log_softmax(2)
    input_lengths = outputs.size()[0] * torch.ones(outputs.size()[1],
                                                   dtype=torch.int)
    loss_fn = CTCLoss(blank=PAD_token, zero_infinity=True)
    targets = targets.transpose(1, 0)
    # target_lengths have EOS token, we need minus one
    target_lengths = target_lengths - 1
    targets = targets[:, :-1]
    # print(input_lengths, target_lengths)
    torch.backends.cudnn.enabled = False
    # TODO: NAN when target_length > input_length, we can increase size or use zero infinity
    loss = loss_fn(outputs, targets, input_lengths, target_lengths)
    torch.backends.cudnn.enabled = True

    return loss, loss.item()
Exemple #29
0
    def train_batch(self, batch_data, metric_names):
        x = batch_data["imgs"].to(self.device)
        y = batch_data["labels"].to(self.device)
        y_len = batch_data["labels_len"]
        str_y = batch_data["raw_labels"]
        loss = 0

        loss_ctc = CTCLoss(blank=self.dataset.tokens["blank"],
                           reduction="mean")
        self.optimizer.zero_grad()

        with autocast(enabled=self.params["training_params"]["use_amp"]):
            global_pred = self.models["decoder"](self.models["encoder"](x))

            ind_x = list()
            b, c, h, w = global_pred.size()

            for i in range(b):
                x_h, x_w = batch_data["imgs_reduced_shape"][i][:2]
                pred = global_pred[i, :, :x_h, :x_w]
                pred = pred.reshape(1, c, x_h * x_w)
                torch.backends.cudnn.enabled = False
                loss += loss_ctc(pred.permute(2, 0, 1), y[i].unsqueeze(0), [
                    x_h * x_w,
                ], [
                    y_len[i],
                ])
                torch.backends.cudnn.enabled = True
                ind_x.append(torch.argmax(pred, dim=1).cpu().numpy()[0])

        del global_pred
        self.backward_loss(loss)
        self.step_optimizer()
        metrics = self.compute_metrics(ind_x,
                                       str_y,
                                       loss=loss.item(),
                                       metric_names=metric_names)
        return metrics
Exemple #30
0
    def __init__(self, opt, log_root='./'):
        super(AdversarialModel, self).__init__(opt, log_root)

        device = self.device
        self.lexicon = get_lexicon(self.opt.training.lexicon,
                                   get_true_alphabet(opt.dataset),
                                   max_length=self.opt.training.max_word_len)
        self.max_valid_image_width = self.opt.char_width * self.opt.training.max_word_len
        self.noise_dim = self.opt.GenModel.style_dim - self.opt.EncModel.style_dim

        generator = Generator(**opt.GenModel).to(device)
        style_encoder = StyleEncoder(**opt.EncModel).to(device)
        writer_identifier = WriterIdentifier(**opt.WidModel).to(device)
        discriminator = Discriminator(**opt.DiscModel).to(device)
        recognizer = Recognizer(**opt.OcrModel).to(device)
        self.models = Munch(G=generator,
                            D=discriminator,
                            R=recognizer,
                            E=style_encoder,
                            W=writer_identifier)

        self.ctc_loss = CTCLoss(zero_infinity=True, reduction='mean')
        self.classify_loss = CrossEntropyLoss()