コード例 #1
0
def trainBatch(net, criterion, optimizer):
    data = train_iter.next()
    cpu_images, cpu_texts = data
    #print (type(cpu_texts), cpu_texts)
    batch_size = cpu_images.size(0)
    util.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)

    util.loadData(text, t)
    util.loadData(length, l)

    preds = crnn(image)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    cost = criterion(preds, text, preds_size, length) / batch_size
    crnn.zero_grad()
    # optimizer.zero_grad()
    cost.backward()
    # optimizer.step()
    # torch.nn.utils.clip_grad_norm(crnn.parameters(), 5)
    # for p in crnn.parameters():
    #	p.data.add(-opt.lr, p.grad.data)

    # for w in crnn.parameters():
    #w.grad.data.clamp_(-5,5)
    optimizer.step()
    return cost
コード例 #2
0
def trainBatch(net, optimizer):
    data = train_iter.next()
    cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)

    preds = crnn(image)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    # length (batch)
    H, cost = ctc_ent_cost(preds, text, preds_size, length)
    cost_sum = cost.data.sum()
    inf = float("inf")
    if cost_sum == inf or cost_sum == -inf or cost_sum <= -1e5 or torch.isnan(
            cost) or torch.isnan(H):
        print("Warning: received an inf loss, setting loss value to 0")
        return torch.zeros(H.size()), torch.zeros(cost.size())

    crnn.zero_grad()
    (-opt.h_rate * H + (1 - opt.h_rate) * cost).backward()

    torch.nn.utils.clip_grad_norm(crnn.parameters(), opt.max_norm)
    optimizer.step()
    return H / batch_size, cost / batch_size
コード例 #3
0
def trainBatch(crnn, criterion, optimizer):
    # 取一个Batch的数据集
    data = train_iter.next()
    # 区分图片 和 标签
    cpu_images, cpu_texts = data

    batch_size = cpu_images.size(0)
    # 图片数据加载到张量
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    # 标签数据加载到张量
    utils.loadData(text, t)
    # 长度数据加载到张量
    utils.loadData(length, l)

    # 执行forward
    preds = crnn(image)

    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    # cost = criterion(preds, text, preds_size, length)
    cost = criterion(preds, text, preds_size, length) / batch_size

    # print("sss:{}".format(isinstance(crit, Variable)))
    # cost = crit / batch_size

    crnn.zero_grad()
    cost.backward()
    optimizer.step()

    return cost
コード例 #4
0
ファイル: train.py プロジェクト: smartcai/BankCardIdentifier
def trainBatch(net, criterion, optimizer, train_iter):
    data = train_iter.next()
    cpu_images, cpu_texts = data
    #bytes(cpu_texts, encoding = "utf8")

    #print(cpu_texts)
    #text1 = re.match('(?!b)',cpu_texts).group()
    #cpu_texts = re.match('(?!\')',text1).group()
    #print(cpu_texts)
    #print("cpu_texts")
    #print(type(cpu_texts[0]))

    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)

    preds = crnn(image)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    cost = criterion(preds, text, preds_size, length) / batch_size
    crnn.zero_grad()
    cost.backward()
    optimizer.step()

    return cost
コード例 #5
0
def train(crnn, train_loader, criterion, iteration):
    for p in crnn.parameters():
        p.requires_grad = True
    crnn.train()
    for i_batch, (image, index) in enumerate(train_loader):
        if args.cuda:
            image = image.cuda()
            criterion = criterion.cuda()
        label = utils.get_batch_label(dataset, index)
        preds = crnn(image)
        batch_size = image.size(0)
        index = np.array(index.data.numpy())
        text, length = converter.encode(label)
        preds_size = torch.IntTensor([preds.size(0)] * batch_size)
        # print(preds.shape, text.shape, preds_size.shape, length.shape)
        # torch.Size([41, 16, 6736]) torch.Size([160]) torch.Size([16]) torch.Size([16])
        cost = criterion(preds, text, preds_size, length) / batch_size

        crnn.zero_grad()
        cost.backward()
        optimizer.step()
        loss_avg.add(cost)
        if i_batch == 100:
            break
        if (i_batch + 1) % params.displayInterval == 0:
            print('[%d/%d][%d/%d] Loss: %f' %
                  (iteration, params.niter, i_batch, len(train_loader),
                   loss_avg.val()))
            loss_avg.reset()
コード例 #6
0
ファイル: train.py プロジェクト: jimleungjing/checkpoints
def trainBatch(net, criterion, optimizer):
    data = train_iter.next()
    cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    decode_texts = [text[2:-1] for text in cpu_texts]
    t, l = converter.encode(decode_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)

    # it will merge on the dimension 0 when running in multiple GPUs mode,
    # say we use 4 GPUs,
    # image_size = [batch_size, channels, height, width] = [16, 1, 48, 600]
    # preds(in CRNN) = [Seq_len, batch_size, nOut] = [151, 4, 37]
    # preds(in trainBatch) = [Seq_len * num_gpu, batch_size, nOut] = [604, 4, 37]
    # that is, since we specify the DataParallel on the dimension 0, it will merge each batch to the dimension 0, which will result in an error in the following steps
    preds = crnn(image)
    preds_chunks = preds.chunk(len(gpu_ids), dim=0)
    preds = torch.cat(
        preds_chunks, dim=1
    )  # [num_gpu * time_step, batch_size / num_gpu, nOut] -> [time_step, batch_size, nOut]

    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    # output = [batch_size, time_step, nOut=nclass] if set batch_first true
    # preds_size = Variable(torch.IntTensor([preds.size(1)] * batch_size))
    cost = criterion(preds, text, preds_size, length) / batch_size
    crnn.zero_grad()
    cost.backward()
    optimizer.step()
    return cost
コード例 #7
0
ファイル: new_train.py プロジェクト: tillmannschatz/CEIR
def trainBatch(net, optimizer):
    # print('train_iter: ', train_iter)
    # print('len_train_iter: ', len(train_iter))
    # print(type(train_iter.next()))
    data = train_iter.next()
    cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)

    preds = crnn(image)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    H, cost = seg_ctc_ent_cost(preds,
                               text,
                               preds_size,
                               length,
                               uni_rate=opt.uni_rate)
    h_cost = (1 - opt.h_rate) * cost - opt.h_rate * H
    cost_sum = h_cost.data.sum()
    inf = float("inf")
    if cost_sum == inf or cost_sum == -inf or cost_sum > 200 * batch_size:
        print("Warning: received an inf loss, setting loss value to 0")
        return torch.zeros(H.size()), torch.zeros(cost.size()), torch.zeros(
            h_cost.size())

    crnn.zero_grad()
    h_cost.backward()
    torch.nn.utils.clip_grad_norm(crnn.parameters(), opt.max_norm)
    optimizer.step()
    return H / batch_size, cost / batch_size, h_cost / batch_size
コード例 #8
0
ファイル: crnn_main.py プロジェクト: Narcissuscyn/OCR
def trainBatch(net, criterion, optimizer):
    data = train_iter.next()
    cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)

    preds = crnn(image)

    # dot=make_dot(preds, params=dict(crnn.named_parameters()))

    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    cost = criterion(preds, text, preds_size, length) / batch_size
    # if (np.isnan(cost.data.numpy())):
    #     print(net._modules['module'].cnn.conv0.weight.grad)
    #     # print("cost-------------------------------------------------------",cost)
    #     # return

    crnn.zero_grad()
    cost.backward()
    # print(crnn.state_dict())
    optimizer.step()
    return cost
コード例 #9
0
def trainBatch(net, criterion, optimizer):
    data = train_iter.next()

    cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)

    preds = crnn(image)
    if CUDA:
        preds_size = Variable(torch.IntTensor([preds.size(0)] *
                                              batch_size)).cuda()
    else:
        preds_size = torch.IntTensor([preds.size(0)] * batch_size)
    cost = criterion(preds, text.long(), preds_size.long(),
                     length.long()).sum() / float(batch_size)

    # cost = cost

    crnn.zero_grad()
    cost.backward()
    optimizer.step()
    return cost
コード例 #10
0
def trainBatch(net, criterion, optimizer):
    data = train_iter.next()
    if focal_alpha:
        cpu_images, cpu_texts, alpha = data
        alpha = torch.FloatTensor(list(alpha))
        utils.loadData(probs, alpha)
        assert not probs.requires_grad
    else:
        cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    assert batch_size > 0
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    #print(cpu_texts, 'converts to', t, t.size())
    global text
    utils.loadData(text, t)
    utils.loadData(length, l)
    text = text.view((batch_size, -1))
    text = text.cuda()

    preds = F.log_softmax(crnn(image), dim=-1)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    _, preds_str = preds.max(2)
    preds_str = preds_str.transpose(1, 0).contiguous().view(-1)
    preds_str = converter.decode(preds_str.data, preds_size.data, raw=False)
    acc = (np.array(preds_str) == np.array(cpu_texts)).mean()

    if display_flag:
        writer.add_figure('Train predictions vs. actuals',
                          plot_preds(cpu_images, preds_str, cpu_texts),
                          global_step=global_step)
        writer.add_figure('Gradient',
                          plot.plot_grad_flow_v2(crnn.named_parameters()),
                          global_step=global_step)
    #print('preds:', preds.size())
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    #print('preds_size:', preds_size, '\tlength:', length)
    #print('preds.size():', preds.size(), 'text.size()', text.size())
    cost = criterion(preds, text, preds_size, length)
    if opt.focal:
        cost = cost * probs
    cost = cost.sum() / batch_size
    writer.add_scalars('training', {
        'loss': cost.item(),
        'acc': acc
    }, global_step)
    writer.add_scalars('lr', plot.get_lr(optimizer), global_step)
    crnn.zero_grad()
    cost.backward()
    torch.nn.utils.clip_grad_value_(crnn.parameters(), 1)
    optimizer.step()
    return cost
コード例 #11
0
def trainBatch(net, criterion, optimizer):
    data = train_iter.next()
    cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)

    preds = crnn(image, length)
    cost = criterion(preds, text)
    crnn.zero_grad()
    cost.backward()
    optimizer.step()
    return cost
コード例 #12
0
ファイル: train.py プロジェクト: youscan/crnn.pytorch
def trainBatch(crnn, criterion, optimizer):
    data = train_iter.next()
    cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)
    preds = crnn(image)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    cost = criterion(preds, text, preds_size, length) / batch_size
    crnn.zero_grad()
    cost.backward()
    optimizer.step()
    return cost
コード例 #13
0
def trainBatch(net, criterion, optimizer):
    data = train_iter.next()
    cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)

    preds = crnn(image)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    cost = criterion(preds, text, preds_size, length) / batch_size
    crnn.zero_grad()
    cost.backward()
    optimizer.step()
    return cost
コード例 #14
0
ファイル: crnn_main.py プロジェクト: marvis/lstm_sliding
def trainBatch(net, criterion, optimizer):
    data = train_iter.next()
    cpu_trajects, cpu_texts = data
    cpu_trajects = padding2tensor(cpu_trajects)
    #print(cpu_trajects)
    #print(cpu_texts)
    batch_size = cpu_trajects.size(0)
    utils.loadData(traject, cpu_trajects)
    t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)

    preds = crnn(traject)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    cost = criterion(preds, text, preds_size, length) / batch_size
    crnn.zero_grad()
    cost.backward()
    optimizer.step()
    return cost
コード例 #15
0
ファイル: train.py プロジェクト: vutienhung260798/crnn
def trainBatch(net, data, criterion, optimizer):
    cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)

    preds = crnn(image)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    cost = criterion(preds, text, preds_size, length) / batch_size
    crnn.zero_grad()
    cost.backward()
    optimizer.step()
    cost = cost.detach().item()

    _, preds = preds.max(2)
    preds = preds.transpose(1, 0).contiguous().view(-1)
    sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
    cer_loss = utils.cer_loss(sim_preds, cpu_texts)
    return cost, cer_loss, len(cpu_images)
コード例 #16
0
def trainBatch(net, criterion, optimizer, train_iter):
    data = train_iter.next()
    cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    #print("batch_size train",cpu_images)
    #print("texts size tracin",cpu_texts)
    #cpu_texts:(b'JHP5U1R- NE', b'1EVNHF3 1Z8S0XC', b'XJH20HW HWUH1LGK00', b'3KDXH9MT T9G-8J', b'F', b'Q-8GXT', b'2302260770 2D 2426K', b'28 KCKH9RBFQQXC', b'N71', b'2308010565 4D TR144-GD', b'LLTBF75XF KH6', b'1 ZZXE', b'DC0J8-Q6XSN D9CQQ850M', b'GX7S -SRPGCTNDZ', b'ZGKND99BGT 0X614A48B81K', b'DD8L0EL')
    utils.loadData(image, cpu_images)
    #print("cpu_images size=",cpu_images.size())
    #print("image size=",image.size())#image, cpu_images size:torch.Size([16, 1, 32, 180])
    #print("image=",cpu_images)
    #print("cpu_texts",cpu_texts) #cpu_texts length:16
    t, l = converter.encode(cpu_texts)
    #print("t=",t)
    #print("l=",l)
    #t= tensor([ 7, 25,  4, 27, 32, 23,  1, 25,  9, 11, 19, 36,  2, 36,  6, 12, 12, 35,
    #          13,  9, 23, 26,  7, 26,  3, 30,  7,  8, 10, 35, 15, 24,  3,  2,  2,  3,
    #           3,  3,  1,  9,  1,  3, 35,  2, 12, 35, 18, 28, 10,  1,  3,  6, 23, 32,
    #           8,  9, 35, 21,  4,  9, 24, 26, 10,  2, 12, 29,  6,  1, 10,  2,  1, 14,
    #           2, 22, 16,  2,  4, 11, 34, 32, 34,  6,  6, 16, 19, 35,  8, 10, 27,  1,
    #           24, 13, 20, 16, 34, 36, 18,  2, 18,  2, 36, 21, 14, 35, 14,  2, 33, 26,
    #           14, 30,  1, 35, 13, 32,  8, 24, 23, 11, 36, 31, 11,  4,  9, 15, 21, 35,
    #           1, 24, 34, 15,  2, 26, 12, 13, 27, 25, 33, 10, 10, 32, 31, 24, 36, 36,
    #           18, 35, 33, 32, 24, 16, 20, 27, 23, 20, 26], dtype=torch.int32)
    #l= tensor([11, 17,  4, 22, 11, 11,  1, 20, 10,  9,  3,  9,  7,  8,  6,  6],dtype=torch.int32)
    utils.loadData(text, t)  #text:each character's map index
    utils.loadData(length, l)  #length:each label length
    preds = crnn(image)
    preds = preds.to(torch.float64)
    preds = preds.to(device)
    #print("preds size()=",preds.size()) #preds.size:(w,b,c)=>(T, N, C):([46, 16, 37])
    preds_size = torch.IntTensor([preds.size(0)] * batch_size)  #[46]*16
    cost = criterion(
        preds, text, preds_size, length
    ) / batch_size  #prediction ,target,prediction_length,target_length,cost= tensor(0.6697, device='cuda:0', grad_fn=<DivBackward0>)
    #print("cost=",cost)
    crnn.zero_grad()
    cost.backward()
    optimizer.step()
    return cost
コード例 #17
0
ファイル: train.py プロジェクト: gengyi/RMBRecognization
def trainBatch(net, criterion, optimizer):
    data = train_iter.next()
    cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)
    preds = crnn(image)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    print("on train, preds is: " + str(preds)) if log_for_explore else None
    print("on train, preds after logsoftmax is: " +
          str(preds.log_softmax(2))) if log_for_explore else None
    print("on train, preds_size is: " +
          str(preds_size)) if log_for_explore else None
    print("on train, target_size is: " +
          str(length)) if log_for_explore else None
    cost = criterion(preds, text, preds_size, length) / batch_size
    crnn.zero_grad()
    cost.backward()
    optimizer.step()
    return cost
コード例 #18
0
def trainBatch(net, criterion, optimizer, train_iter):
    data = train_iter.next()
    cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    myUtils.dbvart(cpu_texts)
    myUtils.dbvart(t, l)
    utils.loadData(text, t)
    utils.loadData(length, l)
    preds = crnn(image)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    myUtils.dbvart(preds.size())  #[41 2 6736]
    myUtils.dbvart(text.size())  #[20]
    myUtils.dbvart(preds_size)  #[41,41]
    myUtils.dbvart(length)  #[10,10]
    cost = criterion(preds, text, preds_size, length) / batch_size
    #print(text.shape)
    crnn.zero_grad()
    cost.backward()
    optimizer.step()
    return cost
コード例 #19
0
def trainBatch(net, criterion, optimizer, flage=False):
    data = train_iter.next()
    cpu_images, cpu_texts = data  ##decode utf-8 to unicode
    if ifUnicode:
        cpu_texts = [clean_txt(tx.decode('utf-8')) for tx in cpu_texts]

    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)

    preds = crnn(image)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    cost = criterion(preds, text, preds_size, length) / batch_size
    crnn.zero_grad()
    cost.backward()
    if flage:
        lr = 0.0001
        optimizer = optim.Adadelta(crnn.parameters(), lr=lr)
    optimizer.step()
    return cost
コード例 #20
0
ファイル: train.py プロジェクト: tinggh/ocr_recognition_crnn
def train(crnn, train_loader, criterion, optimizer, valid_loader):
    for p in crnn.parameters():
        p.requires_grad = True
    crnn.train()
    train_iter = iter(train_loader)
    # loss averager
    loss_avg = utils.averager()
    for i in range(len(train_loader)):
        data = train_iter.next()
        _, images, texts = data
        batch_size = images.size(0)
        t, l = converter.encode(texts)
        images = images.cuda()
        preds = crnn(images)
        preds_size = torch.IntTensor([preds.size(0)] * batch_size)
        cost = criterion(preds, t, preds_size, l) / batch_size
        crnn.zero_grad()
        cost.backward()
        optimizer.step()
        loss_avg.add(cost)
        if (i + 1) % opt.displayInterval == 0:
            print('[%d/%d][%d/%d] Loss: %f' %
                  (epoch, opt.nepoch, i, len(train_loader), loss_avg.val()))
            loss_avg.reset()
コード例 #21
0
ファイル: train.py プロジェクト: yiwangchunyu/CVCR
def main(arg):
    print(arg)
    train_dataset = dataset.lmdbDataset(
        path=arg.train_root,
        # transform=dataset.resizeNormalize((imgW,imgH)),
    )
    test_dataset = dataset.lmdbDataset(
        path=arg.test_root,
        # transform=dataset.resizeNormalize((arg.imgW,arg.imgH)),
    )
    d = test_dataset.__getitem__(0)
    l = test_dataset.__len__()
    train_loader = DataLoader(train_dataset,
                              num_workers=arg.num_workers,
                              batch_size=arg.batch_size,
                              collate_fn=dataset.alignCollate(
                                  imgH=arg.imgH,
                                  imgW=arg.imgW,
                                  keep_ratio=arg.keep_ratio),
                              shuffle=True,
                              drop_last=True)

    criterion = CTCLoss()
    converter = utils.Converter(arg.num_class)
    crnn = CRNN(imgH=arg.imgH, nc=3, nclass=arg.num_class + 1, nh=256)

    # custom weights initialization called on crnn
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv') != -1:
            m.weight.data.normal_(0.0, 0.02)
        elif classname.find('BatchNorm') != -1:
            m.weight.data.normal_(1.0, 0.02)
            m.bias.data.fill_(0)

    crnn.apply(weights_init)
    print(crnn)

    image = torch.FloatTensor(arg.batch_size, 3, arg.imgH, arg.imgW)
    text = torch.IntTensor(arg.batch_size * 5)
    length = torch.IntTensor(arg.batch_size)

    image = Variable(image)
    text = Variable(text)
    length = Variable(length)

    # loss averager
    loss_avg = utils.averager()

    # setup optimizer
    if arg.opt == 'adam':
        optimizer = optim.Adam(crnn.parameters(), 0.01, betas=(0.5, 0.999))
    elif arg.opt == 'adadelta':
        optimizer = optim.Adadelta(crnn.parameters())
    else:
        optimizer = optim.RMSprop(crnn.parameters(), 0.01)

    for epoch in range(arg.n_epoch):
        train_iter = iter(train_loader)
        i = 0
        while i < len(train_loader):
            for p in crnn.parameters():
                p.requires_grad = True
            crnn.train()

            data = train_iter.next()
            cpu_images, cpu_texts = data
            batch_size = cpu_images.size(0)
            utils.loadData(image, cpu_images)
            text_labels, l = converter.encode(cpu_texts)
            utils.loadData(text, text_labels)
            utils.loadData(length, l)

            preds = crnn(image)
            preds_size = Variable(torch.IntTensor([preds.size(0)] *
                                                  batch_size))
            cost = criterion(preds, text, preds_size, length) / batch_size
            crnn.zero_grad()
            cost.backward()
            optimizer.step()

            loss_avg.add(cost)
            i += 1

            if i % arg.displayInterval == 0:
                print(
                    '[%d/%d][%d/%d] Loss: %f' %
                    (epoch, arg.n_epoch, i, len(train_loader), loss_avg.val()))
                loss_avg.reset()

            if i % arg.testInterval == 0:
                test(arg, crnn, test_dataset, criterion, image, text, length)

            # do checkpointing
            if i % arg.saveInterval == 0:
                name = '{0}/netCRNN_{1}_{2}_{3}_{4}.pth'.format(
                    arg.model_dir, arg.num_class, arg.type, epoch, i)
                torch.save(crnn.state_dict(), name)
                print('model saved at ', name)
    torch.save(
        crnn.state_dict(),
        '{0}/netCRNN_{1}_{2}.pth'.format(arg.model_dir, arg.num_class,
                                         arg.type))
コード例 #22
0
ファイル: train.py プロジェクト: Ls-Dai/crnn.pytorch
# optimizer = optim.Adadelta(crnn.parameters())
# optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr)

min_val_loss = 0.2
for epoch in range(500):
    loss_avg = 0
    crnn.train()
    for patch, (im, labels) in enumerate(train_loader):
        preds = crnn(Variable(im).to(device=device))
        preds = preds.log_softmax(2)
        target, target_length = get_labels(alphabet.dict, labels)
        preds_size = Variable(torch.LongTensor([preds.size(0)] * batch_size)).to(device=device)

        loss = criterion(preds, Variable(target).to(device=device), preds_size, Variable(target_length).to(device=device)) / batch_size

        crnn.zero_grad()
        loss.backward()
        optimizer.step()        
        loss_avg += loss.item()

        if (patch+1)%50 == 0 or patch == len(train_loader)-1:
            print('[Train][Epoch: {}/200][Patch: {}/{}][Loss: {:.4f}]' .format(epoch+1, patch+1, len(train_loader),
                                                                       loss_avg/(patch+1)))
    # loss_avg = 0
    # crnn.eval()
    # for patch, (im, labels) in enumerate(val_loader):
    #     preds = crnn(Variable(im).to(device=device))
    #     preds = preds.log_softmax(2)
    #     target, target_length = get_labels(alphabet.dict, labels)
    #     preds_size = Variable(torch.LongTensor([preds.size(0)] * batch_size)).to(device=device)