Beispiel #1
0
def main(opt):
    dataset = VideoDataset(opt, "test")
    opt["vocab_size"] = dataset.get_vocab_size()
    opt["seq_length"] = dataset.max_len
    if opt["model"] == 'S2VTModel':
        model = S2VTModel(opt["vocab_size"],
                          opt["max_len"],
                          opt["dim_hidden"],
                          opt["dim_word"],
                          rnn_dropout_p=opt["rnn_dropout_p"]).cuda()
    elif opt["model"] == "S2VTAttModel":
        encoder = EncoderRNN(opt["dim_vid"],
                             opt["dim_hidden"],
                             bidirectional=opt["bidirectional"],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"])
        decoder = DecoderRNN(opt["vocab_size"],
                             opt["max_len"],
                             opt["dim_hidden"],
                             opt["dim_word"],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"],
                             bidirectional=opt["bidirectional"])
        model = S2VTAttModel(encoder, decoder).cuda()
    elif opt["model"] == "CTCmodel":
        model = CTCmodel(opt['vocab_size'] + 1, opt['dim_hidden'])
    # model = nn.DataParallel(model)
    # Setup the model
    model.load_state_dict(torch.load(opt["saved_model"]))
    model.cuda()
    crit = CTCLoss()
    crit = crit.cuda()
    # crit = utils.LanguageModelCriterion()

    test(model, crit, dataset, dataset.get_vocab(), opt)
    def __init__(self, model, loader, val_loader, test_loader, max_epochs=1, run_id='exp'):
        """
            Use this class to train your model
        """
        # feel free to add any other parameters here
        self.model = model.cuda() if torch.cuda.is_available() else model
        self.loader = loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.train_losses = []
        self.val_losses = []
        self.predictions = []
        self.predictions_test = []
        self.generated_logits = []
        self.generated = []
        self.generated_logits_test = []
        self.generated_test = []
        self.epochs = 0
        self.max_epochs = max_epochs
        self.run_id = run_id

        self.optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-6)
        # self.optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, weight_decay=1e-6, momentum=0.9)
        self.criterion = CTCLoss()#size_average=True, length_average=False)
        self.criterion = self.criterion.cuda() if torch.cuda.is_available() else self.criterion
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, factor=0.1, patience=2)
        self.LD = Levenshtein(phoneme_list.PHONEME_MAP)
        self.best_rate = 1e10
        self.decoder = CTCBeamDecoder(labels=[' '] + phoneme_list.PHONEME_MAP, blank_id=0, beam_width=150)
Beispiel #3
0
def main(args):
    if args.rnn:
        transform = transforms.Compose([
            Normalize([0.3956, 0.5763, 0.5616], [0.1535, 0.1278, 0.1299]),
            Resize((204, 32)),
            ToTensorRGBFlatten()
        ])
    else:
        transform = transforms.Compose([
            Normalize([0.3956, 0.5763, 0.5616], [0.1535, 0.1278, 0.1299]),
            Resize((204, 32)),
            ToTensor()
        ])
    train_set = digitsDataset(args.train_root_path, transform=transform)
    val_set = digitsDataset(args.val_root_path, transform=transform)
    train_loader = DataLoader(train_set,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=4,
                              pin_memory=True)
    val_loader = DataLoader(val_set,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=4,
                            pin_memory=True)
    # trainer parameters
    params = EasyDict()
    params.max_epoch = args.max_epoch
    params.print_freq = args.print_freq
    params.validate_interval = args.validate_interval
    params.save_interval = args.save_interval
    params.expr_path = args.expr_path
    params.rnn = args.rnn
    device = torch.device("cuda")

    # train engine
    ntoken = len(args.alphabet) + 1
    if args.rnn:
        input_dim = 96
        model = LSTMFeatures(input_dim, args.batch_size, ntoken)
    else:
        model = DenseNetFeature(num_classes=ntoken)
    model = model.to(device)
    criterion = CTCLoss()
    criterion = criterion.to(device)
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    converter = LabelConverter(args.alphabet)

    solver = SolverWrapper(params)
    # train
    solver.train(train_loader, val_loader, model, criterion, optimizer, device,
                 converter)
Beispiel #4
0
    def __init__(self, train_path, test_path, model_file, model, img_h=32, img_w=110, batch_size=64, lr=1e-3,
                 use_unicode=True, best_loss=0.2, use_gpu=True, workers=1):
        self.model = model
        self.model_file = model_file
        self.use_unicode = use_unicode
        self.img_h = img_h
        self.img_w = img_w
        self.batch_size = batch_size
        self.lr = lr
        self.best_loss = best_loss
        self.best_acc = 0.95
        self.use_gpu = use_gpu
        self.workers = workers

        self.converter = utils.strLabelConverter(alphabet)
        self.criterion = CTCLoss()

        if self.use_gpu:
            print("[use gpu] ...")
            self.model = self.model.cuda()
            self.criterion = self.criterion.cuda()
        if torch.cuda.is_available() and not self.use_gpu:
            print("[WARNING] You have a CUDA device, so you should probably run with --cuda")

        # 加载模型
        if os.path.exists(self.model_file):
            self.load(self.model_file)
        else:
            print('[Load model] error !!!')

        self.transform = T.Compose([
            T.Resize((self.img_h, self.img_w)),
            T.ToTensor(),
            # T.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5])
        ])

        train_label = os.path.join(train_path, 'labels_normal.txt')
        train_dataset = my_dataset.MyDataset(root=train_path, label_file=train_label, transform=self.transform,
                                             is_train=True, img_h=self.img_h, img_w=self.img_w)
        self.train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=self.batch_size,
                                                        shuffle=True, num_workers=int(self.workers))
        test_label = os.path.join(test_path, 'labels_normal.txt')
        test_dataset = my_dataset.MyDataset(root=test_path, label_file=test_label, transform=self.transform,
                                            is_train=False, img_h=self.img_h, img_w=self.img_w)
        self.test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=self.batch_size,
                                                       shuffle=False, num_workers=int(self.workers))

        # setup optimizer
        # if opt.adam:
        #     self.optimizer = optim.Adam(crnn.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
        # elif opt.adadelta:
        #     self.optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr)
        # else:
        #     self.optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr)
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=1e-5)
Beispiel #5
0
def main(model_path, confs): 
    model, __ = MultiTask.load_model(model_path)
    if confs['cuda']:
        model = model.cuda()
    
    
    if not model._meta['use_transcripts_out']: # only accent classification
        criterion = nn.CrossEntropyLoss()
    elif not model._meta['use_accents_out']: # only text recognition
        criterion = CTCLoss()
    else: # both tasks
        criterion = (CTCLoss(), nn.CrossEntropyLoss())
        
    
    # Results
    results = {}
    for manifest, lm in confs['testing_manifests']:
        eprint(f'\n### Testing {manifest.split("/")[-1]} for model {Path(model_path).stem.split("_")[0]}')
        
        # Decoder
        if model._meta['use_transcripts_out']:
            decoder = BeamCTCDecoder(confs['labels'], 
                                     lm_path=lm,
                                     alpha=confs['decoder_alpha'], 
                                     beta=confs['decoder_beta'],
                                     cutoff_top_n=confs['decoder_cutoff_top_n'],
                                     cutoff_prob=confs['decoder_cutoff_top_n'],
                                     beam_width=confs['decoder_beam_width'], 
                                     num_processes=confs['num_workers'])

            target_decoder = GreedyDecoder(confs['labels'])
        else:
            decoder, target_decoder = None, None
        
        # Test
        results[manifest.split('/')[-1]] = result_for_manifest(model, criterion, manifest, decoder, target_decoder, confs['batch_size'], confs['num_workers'])
        
        
    if not PRINT_LATEX_TABLE:
        print(f'Model: {model_path.split("/")[-1]}')
        for name, res in results.items():
            print(f'\nResults for {name}:')
            print('; '.join([f'{k}: {v:.3f}' for k, v in res.items()]))
    else:
        print(' & '.join(['model']+list([k[:-4] for k in results.keys()])))
        val_dict = {}
        for k in list(results.values())[0].keys():
            val_dict[k] = []
        for res in results.values():
            [val_dict[k].append(f'{v:.1f}') for k, v in res.items()]
        for val in val_dict.values():
            print(' & '.join([Path(model_path).stem.split('_')[0]]+val)+r' \\')
Beispiel #6
0
    def __init__(self):
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus
        if args.chars_file == '':
            self.alphabet = alphabetChinese
        else:
            self.alphabet = utils.load_chars(args.chars_file)
        nclass = len(self.alphabet) + 1
        nc = 1
        self.net = CRNN(args.imgH, nc, args.nh, nclass)
        self.train_dataloader, self.val_dataloader = self.dataloader(
            self.alphabet)
        self.criterion = CTCLoss()
        self.optimizer = self.get_optimizer()
        self.converter = utils.strLabelConverter(self.alphabet,
                                                 ignore_case=False)
        self.best_acc = 0.00001

        model_name = '%s' % (args.dataset_name)
        if not os.path.exists(args.save_prefix):
            os.mkdir(args.save_prefix)
        args.save_prefix += model_name

        if args.pretrained != '':
            print('loading pretrained model from %s' % args.pretrained)
            checkpoint = torch.load(args.pretrained)

            if 'model_state_dict' in checkpoint.keys():
                # self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
                args.start_epoch = checkpoint['epoch']
                self.best_acc = checkpoint['best_acc']
                checkpoint = checkpoint['model_state_dict']

            from collections import OrderedDict
            model_dict = OrderedDict()
            for k, v in checkpoint.items():
                if 'module' in k:
                    model_dict[k[7:]] = v
                else:
                    model_dict[k] = v
            self.net.load_state_dict(model_dict)

        if not args.cuda and torch.cuda.is_available():
            print(
                "WARNING: You have a CUDA device, so you should probably run with --cuda"
            )

        elif args.cuda and torch.cuda.is_available():
            print('available gpus is ', torch.cuda.device_count())
            self.net = torch.nn.DataParallel(self.net, output_dim=1).cuda()
            self.criterion = self.criterion.cuda()
Beispiel #7
0
def initiate(hyp_params, train_loader, valid_loader, test_loader):
    model = getattr(models, hyp_params['model']+'Model')(hyp_params)

    if hyp_params['use_cuda']:
        model = model.cuda()

    optimizer = getattr(optim, hyp_params['optim'])(model.parameters(), lr=hyp_params['lr'])
    criterion = getattr(nn, hyp_params['criterion'])()# weight=hyp_params['weights']

    if hyp_params['aligned'] or hyp_params['model']=='MULT':
        ctc_criterion = None
        ctc_a2l_module, ctc_v2l_module = None, None
        ctc_a2l_optimizer, ctc_v2l_optimizer = None, None
    else:
        from warpctc_pytorch import CTCLoss
        ctc_criterion = CTCLoss()
        ctc_a2l_module, ctc_v2l_module = get_CTC_module(hyp_params)
        if hyp_params['use_cuda']:
            ctc_a2l_module, ctc_v2l_module = ctc_a2l_module.cuda(), ctc_v2l_module.cuda()
        ctc_a2l_optimizer = getattr(optim, hyp_params['optim'])(ctc_a2l_module.parameters(), lr=hyp_params['lr'])
        ctc_v2l_optimizer = getattr(optim, hyp_params['optim'])(ctc_v2l_module.parameters(), lr=hyp_params['lr'])
    
    scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=hyp_params['when'], factor=0.1, verbose=True)
    settings = {'model': model,
                'optimizer': optimizer,
                'criterion': criterion,
                'ctc_a2l_module': ctc_a2l_module,
                'ctc_v2l_module': ctc_v2l_module,
                'ctc_a2l_optimizer': ctc_a2l_optimizer,
                'ctc_v2l_optimizer': ctc_v2l_optimizer,
                'ctc_criterion': ctc_criterion,
                'scheduler': scheduler}
    return train_model(settings, hyp_params, train_loader, valid_loader, test_loader)
Beispiel #8
0
def test_seg_ctc(use_mine=True, use_log=False):
    size = 43
    voca_size = 37
    n = 20

    np.random.seed(1234)
    pred_len_np = np.ones([n])*size
    pred_np = np.random.random([size, n, voca_size+1])
    pred_np = np.log(pred_np)

    token_len_np = np.random.randint(low=2, high=10, size=n)
    token_np = np.random.randint(voca_size, size=token_len_np.sum())+1

    pred = Variable(floatX(pred_np), requires_grad=True)
    token = Variable(T.IntTensor(token_np))
    sizes = Variable(T.IntTensor(pred_len_np))
    target_sizes = Variable(T.IntTensor(token_len_np))

    for i in range(50):
        if use_mine:
            H, cost = ctc_ent_cost(pred, token, sizes, target_sizes, use_log=use_log)
            # glog.info('%d, cost: %s, entropy: %s'% (i, cost.data.item(), H.data.item()))
#            cost = 0.9*cost - 0.1*H
        else:
            from warpctc_pytorch import CTCLoss
            criterion = CTCLoss().cuda()
            cost = criterion(pred, token, sizes, target_sizes)
            # glog.info('%d, cost: %s'% (i, cost.data.item()))

        optimizer = T.optim.Adam([pred], lr=3e-1)#, nesterov=True)
        optimizer.zero_grad()
        (cost).backward()
        optimizer.step()
Beispiel #9
0
def test_ctc_loss():
    pytest.importorskip("torch")
    pytest.importorskip("warpctc_pytorch")
    import torch
    from warpctc_pytorch import CTCLoss

    from e2e_asr_attctc_th import pad_list

    n_out = 7
    n_batch = 3
    input_length = numpy.array([11, 17, 15], dtype=numpy.int32)
    label_length = numpy.array([4, 2, 3], dtype=numpy.int32)
    np_pred = [numpy.random.rand(il, n_out).astype(
        numpy.float32) for il in input_length]
    np_target = [numpy.random.randint(
        0, n_out, size=ol, dtype=numpy.int32) for ol in label_length]

    # NOTE: np_pred[i] seems to be transposed and used axis=-1 in e2e_asr_attctc.py
    ch_pred = F.separate(F.pad_sequence(np_pred), axis=-2)
    ch_target = F.pad_sequence(np_target, padding=-1)
    ch_loss = F.connectionist_temporal_classification(
        ch_pred, ch_target, 0, input_length, label_length).data

    th_pred = pad_list([torch.autograd.Variable(torch.from_numpy(x))
                        for x in np_pred]).transpose(0, 1)
    th_target = torch.autograd.Variable(
        torch.from_numpy(numpy.concatenate(np_target)))
    th_ilen = torch.autograd.Variable(torch.from_numpy(input_length))
    th_olen = torch.autograd.Variable(torch.from_numpy(label_length))
    # NOTE: warpctc_pytorch.CTCLoss does not normalize itself by batch-size while chainer's default setting does
    th_loss = (CTCLoss()(th_pred, th_target, th_ilen,
                         th_olen) / n_batch).data.numpy()[0]
    numpy.testing.assert_allclose(th_loss, ch_loss, 0.05)
Beispiel #10
0
def train(
    model,
    epochs=110,
    batch_size=128,
    train_index_path=TRAIN_PATH,
    dev_index_path=DEV_PATH,
    labels_path=LABEL_PATH,
    learning_rate=0.6,
    momentum=0.8,
    max_grad_norm=0.2,
    weight_decay=0,
):
    train_dataset = data.MASRDataset(train_index_path, labels_path)
    batchs = (len(train_dataset) + batch_size - 1) // batch_size
    dev_dataset = data.MASRDataset(dev_index_path, labels_path)
    train_dataloader = data.MASRDataLoader(train_dataset,
                                           batch_size=batch_size,
                                           num_workers=0)
    train_dataloader_shuffle = data.MASRDataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   num_workers=0,
                                                   shuffle=True)
    dev_dataloader = data.MASRDataLoader(dev_dataset,
                                         batch_size=batch_size,
                                         num_workers=0)
    parameters = model.parameters()
    optimizer = torch.optim.SGD(
        parameters,
        lr=learning_rate,
        momentum=momentum,
        nesterov=True,
        weight_decay=weight_decay,
    )
    ctcloss = CTCLoss(size_average=True)
    # lr_sched = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.985)

    gstep = 0
    for epoch in range(epochs):
        epoch_loss = 0
        if epoch > 0:
            train_dataloader = train_dataloader_shuffle
        # lr_sched.step()
        for i, (x, y, x_lens, y_lens) in enumerate(train_dataloader):
            x = x.cuda()
            out, out_lens = model(x, x_lens)
            out = out.transpose(0, 1).transpose(0, 2)
            loss = ctcloss(out, y, out_lens, y_lens)
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
            optimizer.step()
            epoch_loss += loss.item()
            gstep += 1
            print("[{}/{}][{}/{}]\tLoss = {}".format(epoch + 1, epochs, i,
                                                     int(batchs), loss.item()))
        epoch_loss = epoch_loss / batchs
        cer = eval(model, dev_dataloader)
        print("Epoch {}: Loss= {}, CER = {}".format(epoch, epoch_loss, cer))
        if (epoch + 1) % 5 == 0:
            torch.save(model, "pretrained/model_{}.pth".format(epoch))
Beispiel #11
0
 def __init__(self, odim, eprojs, dropout_rate):
     super(CTC, self).__init__()
     self.dropout_rate = dropout_rate
     self.loss = None
     self.ctc_lo = torch.nn.Linear(eprojs, odim)
     from warpctc_pytorch import CTCLoss
     self.loss_fn = CTCLoss()
Beispiel #12
0
    def __init__(self, classes, class_agnostic):
        super(_fasterRCNN_OCR, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)
        self.RCNN_ocr_roi_pooling = roi_pooling(2)  # ocr roi_pooling

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()

        # rnn初始化,隐藏节点256
        nh = 256
        nclass = len('0123456789.') + 1
        self.rnn = nn.Sequential(BidirectionalLSTM(1024, nh, nh),
                                 BidirectionalLSTM(nh, nh, nclass))
        self.ctc_critition = CTCLoss().cuda()
Beispiel #13
0
    def __init__(self, oracle, alphabet, image_shape, target, file_weights):
        self.learning_rate = 0.001
        # self.learning_rate = 10
        self.num_iterations = 5000
        # self.num_iterations = 100
        self.batch_size = bs = 1
        self.phrase_length = len(target)
        self.o_imW, self.o_imH = image_shape
        self.i_imW, self.i_imH = imgW, imgH
        self.oracle = oracle
        self.weights = file_weights

        # Variable for adversarial noise, which is added to the image to perturb it
        if torch.cuda.is_available():
            self.delta = Variable(torch.rand(
                (1, self.o_imH, self.o_imW)).cuda(),
                                  requires_grad=True)
        else:
            self.delta = Variable(torch.rand((1, self.o_imH, self.o_imW)),
                                  requires_grad=True)

        # Optimize on delta and use ctc as criterion
        ctcloss = CTCLoss()
        self.optimizer = optim.Adam([self.delta],
                                    lr=self.learning_rate,
                                    betas=(0.9, 0.999))

        self.loss = ctcloss
        self.ctcloss = ctcloss
        self.target = target
        self.converter = utils.strLabelConverter(alphabet, attention=False)
Beispiel #14
0
def eval(model, dataloader):
    model.eval()
    decoder = GreedyDecoder(dataloader.dataset.labels_str)
    ctcloss = CTCLoss(size_average=True)
    cer = 0
    epoch_loss = 0
    print("decoding")
    with torch.no_grad():
        for i, (x, y, x_lens, y_lens) in tqdm(enumerate(dataloader)):
            x = x.to(device)
            outs, out_lens = model(x, x_lens)
            loss = ctcloss(
                outs.transpose(0, 1).transpose(0, 2), y, out_lens, y_lens)
            epoch_loss += loss.item()
            outs = F.softmax(outs, 1)
            outs = outs.transpose(1, 2)
            ys = []
            offset = 0
            for y_len in y_lens:
                ys.append(y[offset:offset + y_len])
                offset += y_len
            out_strings, out_offsets = decoder.decode(outs, out_lens)
            y_strings = decoder.convert_to_strings(ys)
            for pred, truth in zip(out_strings, y_strings):
                trans, ref = pred[0], truth[0]
                #if len(ref) == 0 : print("ref:", ref, y_strings)
                cer += decoder.cer(trans, ref) / float(len(ref))
        cer /= len(dataloader.dataset)
        epoch_loss /= i + 1
    model.train()
    return cer, epoch_loss
Beispiel #15
0
def test_seg_ctc(use_mine=True):
    # (T, voca_size+1)
    pred_np = np.array([[0.5, 0.4, 0.1], [0.3, 0.1, 0.6], [0.7, 0.2, 0.1],
                        [0.3, 0.5, 0.2]])[:, None]
    pred_np = np.log(np.tile(pred_np, (1, 2, 1)))
    #    pred_np = np.random.random((4,2,3))
    # (U)
    token_np = np.array([2, 2, 1, 2])

    pred = Variable(floatX(pred_np), requires_grad=True)
    token = Variable(T.IntTensor(token_np))
    sizes = Variable(T.IntTensor(np.array([4, 4])))
    target_sizes = Variable(T.IntTensor(np.array([2, 2])))

    for i in range(40):
        if use_mine:
            cost = ctc_cost(pred, token, sizes, target_sizes)
            print cost.data.item()
        else:
            from warpctc_pytorch import CTCLoss
            criterion = CTCLoss().cuda()
            cost = criterion(pred, token, sizes, target_sizes)
            print cost.data.item()

        optimizer = T.optim.SGD([pred], lr=3e-2, momentum=0.9, nesterov=True)
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
Beispiel #16
0
    def forward(self, model, x, labels_flatten, img_seq_lens, label_lens,
                batch_size):
        with _disable_tracking_bn_stats(model):
            # calc adversarial direction
            # prepare random unit tensor
            d = torch.rand(x.shape).to(
                torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
            d = _l2_normalize(d)
            for _ in range(self.ip):
                d.requires_grad_()
                loss_function = CTCLoss()
                preds = model.forward(x + self.xi * d, img_seq_lens)
                adv_loss_ctc = loss_function(
                    preds, labels_flatten,
                    Variable(torch.IntTensor(np.array(img_seq_lens))),
                    label_lens) / batch_size

                adv_loss_ctc.backward()
                d = d.grad
                model.zero_grad()

            # calc LDS
            r_adv = torch.sign(d) * self.eps

            pred_hat = model.forward(x + r_adv, img_seq_lens)
            lds = loss_function(
                pred_hat, labels_flatten,
                Variable(torch.IntTensor(np.array(img_seq_lens))),
                label_lens) / batch_size

        return lds
 def __init__(self):
     self.model = get_model().cuda()
     self.ctc_loss = CTCLoss(size_average=True)
     self.decoder = Decoder()
     # self.optimizer = optim.Adam(self.model.parameters(), lr=configs.lr, weight_decay=configs.l2_weight_decay)
     self.optimizer = optim.ASGD(self.model.parameters(),
                                 lr=configs.lr,
                                 weight_decay=configs.l2_weight_decay)
     self.lr_scheduler = lr_scheduler.ReduceLROnPlateau(
         self.optimizer,
         'min',
         patience=configs.lr_scheduler_patience,
         factor=configs.lr_scheduler_factor,
         verbose=True)
     self.epoch_idx = 0
     self.min_avg_dist = 1000.
Beispiel #18
0
def main(opts):
  # alphabet = '0123456789.'
  nclass = len(alphabet) + 1
  model_name = 'E2E-CRNN'
  net = OwnModel(attention=True, nclass=nclass)
  print("Using {0}".format(model_name))

  if opts.cuda:
    net.cuda()
  learning_rate = opts.base_lr
  optimizer = torch.optim.Adam(net.parameters(), lr=opts.base_lr, weight_decay=weight_decay)
  optimizer = optim.Adam(net.parameters(), lr=opts.base_lr, betas=(0.5, 0.999))
  step_start = 0

  ### 第一种:只修改conv11的维度
  # model_dict = net.state_dict()
  # if os.path.exists(opts.model):
  #     print('loading pretrained model from %s' % opts.model)
  #     pretrained_model = OwnModel(attention=True, nclass=12)
  #     pretrained_model.load_state_dict(torch.load(opts.model)['state_dict'])
  #     pretrained_dict = pretrained_model.state_dict()
  #
  #     pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict and 'rnn' not in k and 'conv11' not in k}
  #     model_dict.update(pretrained_dict)
  #     net.load_state_dict(model_dict)

  if os.path.exists(opts.model):
    print('loading model from %s' % args.model)
    step_start, learning_rate = net_utils.load_net(args.model, net, optimizer)

  ## ICDAR2015数据集
  e2edata = E2Edataset(train_list=opts.train_list)
  e2edataloader = torch.utils.data.DataLoader(e2edata, batch_size=opts.batch_size, shuffle=True, collate_fn=E2Ecollate, num_workers=4)

  net.train()

  converter = strLabelConverter(alphabet)
  ctc_loss = CTCLoss()

  for step in range(step_start, opts.max_iters):

    for index, date in enumerate(e2edataloader):
      im_data, gtso, lbso = date
      im_data = im_data.cuda()

      try:
    loss= process_crnn(im_data, gtso, lbso, net, ctc_loss, converter, training=True)

    net.zero_grad()
    # optimizer.zero_grad()
    loss.backward()
    optimizer.step()
      except:
    import sys, traceback
    traceback.print_exc(file=sys.stdout)
    pass


      if index % disp_interval == 0:
Beispiel #19
0
 def __init__(self):
     super(CTC, self).__init__()
     self.criterion = CTCLoss().cuda()
     self.softmax = None
     self.label = None
     self.len = 0
     self.times = 0
     self.count = 0
Beispiel #20
0
    def __init__(self, alphabets=None, nh=256):
        super(OcrLoss, self).__init__()

        # rnn初始化,隐藏节点256
        nclass = len(alphabets) + 1
        self.rnn = nn.Sequential(BidirectionalLSTM(1024, nh, nh),
                                 BidirectionalLSTM(nh, nh, nclass))
        self.ctc_critition = CTCLoss().cuda()
Beispiel #21
0
    def loss_gradient(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
        """
        Compute the gradient of the loss function w.r.t. `x`.

        :param x: Samples of shape (nb_samples, seq_length). Note that, it is allowable that sequences in the batch
                  could have different lengths. A possible example of `x` could be:
                  `x = np.array([np.array([0.1, 0.2, 0.1, 0.4]), np.array([0.3, 0.1])])`.
        :param y: Target values of shape (nb_samples). Each sample in `y` is a string and it may possess different
                  lengths. A possible example of `y` could be: `y = np.array(['SIXTY ONE', 'HELLO'])`.
        :return: Loss gradients of the same shape as `x`.
        """
        from warpctc_pytorch import CTCLoss

        x_ = x.copy()

        # Put the model in the training mode
        self._model.train()

        # Apply preprocessing
        x_preprocessed, y_preprocessed = self._apply_preprocessing(x_, y, fit=False)

        # Transform data into the model input space
        inputs, targets, input_rates, target_sizes, batch_idx = self.transform_model_input(
            x=x_preprocessed, y=y_preprocessed, compute_gradient=True
        )

        # Compute real input sizes
        input_sizes = input_rates.mul_(inputs.size()[-1]).int()

        # Call to DeepSpeech model for prediction
        outputs, output_sizes = self._model(inputs.to(self._device), input_sizes.to(self._device))
        outputs = outputs.transpose(0, 1)
        float_outputs = outputs.float()

        # Loss function
        criterion = CTCLoss()
        loss = criterion(float_outputs, targets, output_sizes, target_sizes).to(self._device)
        loss = loss / inputs.size(0)

        # Compute gradients
        if self._use_amp:
            from apex import amp

            with amp.scale_loss(loss, self._optimizer) as scaled_loss:
                scaled_loss.backward()

        else:
            loss.backward()

        # Get results
        results = []
        for i in range(len(x_preprocessed)):
            results.append(x_preprocessed[i].grad.cpu().numpy().copy())

        results = np.array(results)
        results = self._apply_preprocessing_gradient(x_, results)

        return results
Beispiel #22
0
def main(opts):
  alphabet = '0123456789.'
  nclass = len(alphabet) + 1
  model_name = 'crnn'
  net = CRNN(nclass)
  print("Using {0}".format(model_name))

  if opts.cuda:
    net.cuda()
  learning_rate = opts.base_lr
  optimizer = torch.optim.Adam(net.parameters(), lr=opts.base_lr, weight_decay=weight_decay)

  if os.path.exists(opts.model):
    print('loading model from %s' % args.model)
    step_start, learning_rate = net_utils.load_net(args.model, net, optimizer)

  ## 数据集
  converter = strLabelConverter(alphabet)
  dataset = ImgDataset(
      root='/home/yangna/deepblue/OCR/mech_demo2/dataset/imgs/image',
      csv_root='/home/yangna/deepblue/OCR/mech_demo2/dataset/imgs/train_list.txt',
      transform=None,
      target_transform=converter.encode
  )
  ocrdataloader = torch.utils.data.DataLoader(
      dataset, batch_size=1, shuffle=False, collate_fn=own_collate
  )

  num_count = 0
  net = net.eval()

  converter = strLabelConverter(alphabet)
  ctc_loss = CTCLoss()

  for step in range(len(dataset)):

    try:
    data = next(data_iter)
    except:
    data_iter = iter(ocrdataloader)
    data = next(data_iter)

    im_data, gt_boxes, text = data
    im_data = im_data.cuda()

    try:
      res = process_crnn(im_data, gt_boxes, text, net, ctc_loss, converter, training=False)

      pred, target = res
      if pred == target[0]:
    num_count += 1
    except:
      import sys, traceback
      traceback.print_exc(file=sys.stdout)
      pass


    print('correct/total:%d/%d'%(num_count, len(dataset)))
Beispiel #23
0
def test_seg_ctc(use_mine=True, use_log=False):
    size = 43
    voca_size = 37
    n = 20

    np.random.seed(1234)
    pred_len_np = np.ones([n]) * size
    pred_np = np.random.random([size, n, voca_size + 1])
    pred_np = np.log(pred_np)

    token_len_np = np.random.randint(low=2, high=10, size=n)
    token_np = np.random.randint(voca_size, size=token_len_np.sum()) + 1

    #    pred_np = np.load('/home/jins/CTC/test/preds.npy')
    #    pred_len_np = np.load('/home/jins/CTC/test/preds_size.npy')
    #    token_len_np = np.load('/home/jins/CTC/test/y_length.npy')
    #    token_np = np.load('/home/jins/CTC/test/text.npy')
    #    pdb.set_trace()

    pred = Variable(floatX(pred_np), requires_grad=True)
    token = Variable(T.IntTensor(token_np))
    sizes = Variable(T.IntTensor(pred_len_np))
    target_sizes = Variable(T.IntTensor(token_len_np))

    #    # (T, voca_size+1)
    #    pred_np = np.array([[0.5, 0.4, 0.1], [0.3, 0.1, 0.6], [0.7, 0.2, 0.1], [0.3, 0.5, 0.2]])[:, None]
    #    pred_np = np.log(np.tile(pred_np, (1,2,1)))
    ##    pred_np = np.random.random((4,2,3))
    #    # (U)
    #    token_np = np.array([2, 2, 1, 2])
    #
    #    pred = Variable(floatX(pred_np), requires_grad=True)
    #    token = Variable(T.IntTensor(token_np))
    #    sizes = Variable(T.IntTensor(np.array([4, 4])))
    #    target_sizes = Variable(T.IntTensor(np.array([2, 2])))

    for i in range(50):
        if use_mine:
            H, cost = ctc_ent_cost(pred,
                                   token,
                                   sizes,
                                   target_sizes,
                                   use_log=use_log)
            glog.info('%d, cost: %s, entropy: %s' %
                      (i, cost.data.item(), H.data.item()))


#            cost = 0.9*cost - 0.1*H
        else:
            from warpctc_pytorch import CTCLoss
            criterion = CTCLoss().cuda()
            cost = criterion(pred, token, sizes, target_sizes)
            glog.info('%d, cost: %s' % (i, cost.data.item()))

        optimizer = T.optim.Adam([pred], lr=3e-1)  #, nesterov=True)
        optimizer.zero_grad()
        (cost).backward()
        optimizer.step()
Beispiel #24
0
 def train(self, mode=True):
     """
     Enter (or exit) training mode. Initializes loss function if necessary
     :param mode: if True, set model up for training
     :return:
     """
     if mode and self.loss_func is None:
         self.loss_func = CTCLoss()
     super().train(mode=mode)
Beispiel #25
0
def train_ctc(rnn_num_layers = 2, learning_rate = 1e-3):
    dataset = SpectrogramDataset('data/CommonVoice/valid_train.h5', model_ctc = True)
    norm_transform = Normalize(dataset)
    decoder = CTCDecoder(dataset.char_to_ix)
    dataset.set_transform(norm_transform)
    data_loader = DataLoader(dataset, collate_fn = dataset.merge_batches, batch_size = batch_size, shuffle = True)
    print("dataset len")
    print(dataset.__len__())
    print("\nDataset loading completed\n")

    model = CTCModel(input_dim, hidden_dim, output_dim, rnn_num_layers, batch_size)
    model.to(device)

    #optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum = 0.9)
    optimizer = optim.Adam(model.parameters(), lr = learning_rate)

    ctc_loss = CTCLoss(blank = output_dim - 1)
    count = 0
    print("Begin training")
    for epoch in range(200):
        print("***************************")
        print("EPOCH NUM %d" % epoch)
        print("***************************")
        cost_epoch_sum = 0
        cost_tstep_sum = 0
        for i_batch, sample_batched in enumerate(data_loader):
            optimizer.zero_grad()
            padded_X, seq_labels, X_lengths, Y_lengths = sample_batched
            if (len(X_lengths) < batch_size):
                break
            # Get the distributions
            padded_X = padded_X.cuda()
            log_probs = model(padded_X, X_lengths)

            log_probs = log_probs.transpose(0, 1)
            log_probs.requires_grad_(True)
            cost = ctc_loss(log_probs.float(), seq_labels, (((X_lengths - 8) // 2) - 2) // 2, Y_lengths)
            cost.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 20)
            optimizer.step()
            print(cost)
            cost_epoch_sum += float(cost)

        print("***************************")
        print("PREDICTION")
        model = model.eval()
        xseq, yseq = dataset[0]
        xseq = torch.FloatTensor([xseq])
        xseq = norm_transform(xseq.cuda())
        log_probs = model(xseq.cuda())
        logprobs_numpy = log_probs[0].data.cpu().numpy()
        decoded_seq, _ = decoder.beam_search_decoding(log_probs[0].data.cpu().numpy(), beam_size = 100)
        model = model.train()
        print("Ground truth: ", yseq)
        print("Prediction: ", decoded_seq)
        print("Avg cost per epoch: ", cost_epoch_sum / 4076)
        print("***************************")
Beispiel #26
0
    def __init__(self, imgH, nc, nclass, nh, ngpu, n_rnn=2, leakyRelu=False):
        super(CRNN, self).__init__()
        self.ngpu = ngpu
        assert imgH % 16 == 0, 'imgH has to be a multiple of 16'

        ks = [3, 3, 3, 3, 3, 3, 2]
        ps = [1, 1, 1, 1, 1, 1, 0]
        ss = [1, 1, 1, 1, 1, 1, 1]
        nm = [64, 128, 256, 256, 512, 512, 512]

        cnn = nn.Sequential()
        self.criterion = CTCLoss()
        self.criterion = self.criterion.cuda()

        def convRelu(i, batchNormalization=False):
            nIn = nc if i == 0 else nm[i - 1]
            nOut = nm[i]
            cnn.add_module('conv{0}'.format(i),
                           nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i]))
            if batchNormalization:
                cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut))
            if leakyRelu:
                cnn.add_module('relu{0}'.format(i),
                               nn.LeakyReLU(0.2, inplace=True))
            else:
                cnn.add_module('relu{0}'.format(i), nn.ReLU(True))

        convRelu(0)
        cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2))  # 64x16x64
        convRelu(1)
        cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2))  # 128x8x32
        convRelu(2, True)
        convRelu(3)
        cnn.add_module('pooling{0}'.format(2),
                       nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 256x4x16
        convRelu(4, True)
        convRelu(5)
        cnn.add_module('pooling{0}'.format(3),
                       nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 512x2x16
        convRelu(6, True)  # 512x1x16

        self.cnn = cnn
        self.rnn = nn.Sequential(BidirectionalLSTM(512, nh, nh, ngpu),
                                 BidirectionalLSTM(nh, nh, nclass, ngpu))
Beispiel #27
0
 def init_model(cls,
                ninput,
                nhidden,
                noutput,
                codec,
                normalize=kraken.lib.lstm.normalize_nfkc,
                cuda=torch.cuda.is_available()):
     self = cls()
     self.codec = codec
     self.normalize = normalize
     self.rnn = TBIDILSTM(ninput, nhidden, noutput)
     self.setLearningRate()
     self.trial = 0
     self.mode = 'clstm'
     self.criterion = CTCLoss()
     self.cuda_available = cuda
     if self.cuda_available:
         self.cuda()
     return self
Beispiel #28
0
    def __init__(self, config):
        super(Pre_encoder, self).__init__()
        # define encoder
        self.config = config

        # self.encoder = BuildEncoder(config)
        self.encoder = build_encoder(config)
        self.project_layer = nn.Linear(800, 2664)

        self.crit = CTCLoss()
def main(args):
    print('Args:', args)

    # Set the random seed manually for reproducibility.
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    model = MyModel(args.nhid, args.nlayers, args.dropout, args.dropouth,
                    args.dropouti)
    # model = MyModel(args.nhid, args.nlayers)
    # model.load_state_dict(torch.load("/mnt/part2/e1/010_0.00.w"))
    if args.cuda:
        model.cuda()

    total_params = sum(x.size()[0] *
                       x.size()[1] if len(x.size()) > 1 else x.size()[0]
                       for x in model.parameters())
    print('Model total parameters:', total_params)

    train_loader, dev_loader = get_data_loaders(args)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.wdecay)
    criterion = CTCLoss()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=1,
                                                           threshold=0.01,
                                                           verbose=True)
    for epoch in range(1, args.epochs + 1):
        print(datetime.now())
        epoch_start_time = time.time()
        train(epoch, model, optimizer, criterion, train_loader, args)
        if True:
            val_loss_utter, val_loss_phoneme, val_cer = evaluate(model,
                                                                 criterion,
                                                                 dev_loader,
                                                                 args,
                                                                 calc_cer=True)
            scheduler.step(val_loss_phoneme)
            print('-' * 89)
            print(
                '| end of epoch {:3d} | time: {:5.2f}s | loss/utter {:5.2f} | loss/phoneme {:5.4f} | valid cer {:5.4f}'
                .format(epoch, (time.time() - epoch_start_time),
                        val_loss_utter, val_loss_phoneme, val_cer))
            print('-' * 89)

            if not os.path.exists(args.weights_dir):
                os.makedirs(args.weights_dir)
            weight_fname = "{}/{:03d}_{}.w".format(
                args.weights_dir, epoch, "{:.2f}".format(val_loss_phoneme))
            print("saving as", weight_fname)
            torch.save(model.state_dict(), weight_fname)
Beispiel #30
0
def naren_loss(out, labels, input_lens, label_lens, blank_idx):
    """Calculates the loss function using sean naren's warpctc bindings.
    The `.permute(1,0,2).float().cpu()` section of the model output is meant
    to match the expected format for the loss function. the `.cpu()` call is necessary
    to calculate a non-zero loss value. 
    """
    from warpctc_pytorch import CTCLoss
    loss_fn = CTCLoss(blank=blank_idx, size_average=True, length_average=False)
    out = out.permute(1, 0, 2).float().cpu()  #permuation for naren's  warpctc
    loss = loss_fn(out, labels, input_lens, label_lens)
    return loss
Beispiel #31
0
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=opt.batchSize,
    shuffle=True, sampler=sampler,
    num_workers=int(opt.workers),
    collate_fn=dataset.alignCollate(imgH=opt.imgH, keep_ratio=opt.keep_ratio))
test_dataset = dataset.lmdbDataset(
    root=opt.valroot, transform=dataset.resizeNormalize((100, 32)))

ngpu = int(opt.ngpu)
nh = int(opt.nh)
alphabet = opt.alphabet
nclass = len(alphabet) + 1
nc = 1

converter = utils.strLabelConverter(alphabet)
criterion = CTCLoss()


# custom weights initialization called on crnn
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

crnn = crnn.CRNN(opt.imgH, nc, nclass, nh, ngpu)
crnn.apply(weights_init)
if opt.crnn != '':
    print('loading pretrained model from %s' % opt.crnn)