예제 #1
0
def main():
    args = parse_arguments()

    if not args.train and not args.test:
        print("If we are not training,and not testing,what is the point?")

    crnn = None

    if args.train:
        crnn = CRNN(
            args.batch_size,
            args.model_path,
            args.example_path,
            args.max_image_width,
            args.train_test_ratio,
            args.restore
        )
        crnn.train(args.iteration_count)

    if args.test:
        if crnn is None:
            crnn = CRNN(
                args.batch_size,
                args.model_path,
                args.examples_path,
                args.max_image_width,
                0,
                args.restore
            )
        crnn.test()
예제 #2
0
파일: run.py 프로젝트: smartcai/BankCardOCR
def main():
    """
        Entry point when using CRNN from the commandline
    """

    args = parse_arguments()

    if not args.train and not args.test:
        print("If we are not training, and not testing, what is the point?")

    crnn = None

    if args.train:
        crnn = CRNN(args.iteration_count, args.batch_size, args.model_path,
                    args.examples_path, args.max_image_width,
                    args.train_test_ratio, args.restore, 0)

        crnn.train(args.iteration_count)

    if args.test:
        if crnn is None:
            crnn = CRNN(args.iteration_count, args.batch_size, args.model_path,
                        args.examples_path, args.max_image_width, 0,
                        args.restore, 1)

        crnn.test()
예제 #3
0
파일: run.py 프로젝트: zawlinnnaing/CRNN
def main():
    """
        Entry point when using CRNN from the commandline
    """

    args = parse_arguments()

    if not args.train and not args.test:
        print("If we are not training, and not testing, what is the point?")

    crnn = None

    charset = ""
    if os.path.isfile(args.char_set_string):
        # if charset is file read from file.
        with open(args.char_set_string, "r") as f:
            while True:
                c = f.readline()
                charset += c.strip("\n")
                if not c:
                    charset += "\n"  # Add line break to charset at the end
                    break
    else:
        charset = args.char_set_string

    if args.train:
        crnn = CRNN(
            args.batch_size,
            args.model_path,
            args.examples_path,
            args.max_image_width,
            args.train_test_ratio,
            args.restore,
            charset,
            args.use_trdg,
            args.language,
            args.learning_rate
        )

        crnn.train(args.iteration_count)

    if args.test:
        if crnn is None:
            crnn = CRNN(
                args.batch_size,
                args.model_path,
                args.examples_path,
                args.max_image_width,
                0,
                args.restore,
                charset,
                args.use_trdg,
                args.language,
                args.learning_rate
            )

        crnn.test()
예제 #4
0
파일: run.py 프로젝트: vinayakkailas/CRNN-1
def main():
    """
        Entry point when using CRNN from the commandline
    """

    args = parse_arguments()

    if not args.train and not args.test:
        print("If we are not training, and not testing, what is the point?")

    crnn = None

    if args.train:
        crnn = CRNN(
            args.batch_size,
            args.model_path,
            args.examples_path,
            args.max_image_width,
            args.train_test_ratio,
            args.restore
        )

        crnn.train(args.iteration_count)

    if args.test:
        if crnn is None:
            crnn = CRNN(
                args.batch_size,
                args.model_path,
                args.examples_path,
                args.max_image_width,
                0,
                args.restore
            )

        crnn.test()
예제 #5
0
파일: train.py 프로젝트: lygztq/MagicOCR
def start_train():
    model = CRNN(MODEL_HYPER.batch_size, MODEL_HYPER.epoches,
                 MODEL_HYPER.data_path, MODEL_HYPER.text_path,
                 MODEL_HYPER.log_path, MODEL_HYPER.model_path)
    model.train()
    model.save()
예제 #6
0
    # accuracy = n_correct / float((i + 1) * opt.val_batchsize)
    accuracy_train = n_correct_train / float(opt.num_val * opt.val_batchsize)

    print('accuray_train: %f' % (accuracy_train))
    return accuracy_train


step = 0
for epoch in range(opt.num_epoch):
    for i, (inputs, labels, lengths) in enumerate(dloader_train):
        for p in net.parameters():
            # p.requires_grad = True
            p.requires_grad_(True)

        net.train()
        # inputs, labels, lengths = Variable(inputs.cuda()), Variable(labels), Variable(lengths)
        inputs = inputs.cuda()
        label = []
        for j in range(labels.size(0)):
            if lengths.data[j, 0] == 0:
                continue
            label.append(labels[j, :lengths.data[j, 0]])
        label = torch.cat(label)
        preds = net(inputs)
        bs = inputs.size(0)
        # pred_size = torch.IntTensor([preds.size(0)] * bs)
        pred_size = torch.tensor([preds.size(0)] * bs, dtype=torch.int32)
        optimizer.zero_grad()
        loss = criterion(preds, label, pred_size,
                         lengths[:, 0]) / opt.train_batchsize
예제 #7
0
파일: run.py 프로젝트: ucassjy/SimpleOCR
from crnn import CRNN

batch_size = 10
model_path = 'MyModel'
examples_picture_path = 'restore/'
examples_label_path = 'target_label.txt'
dictionary_path = 'dictionary.txt'
max_image_width = 256
train_test_ratio = 0.9
restore = False
NUM_CLASSES = 52
iteration_count = 4000

crnn = CRNN(batch_size, model_path, examples_picture_path, examples_label_path, dictionary_path, max_image_width, train_test_ratio, restore, NUM_CLASSES)

if __name__ == '__main__':
    crnn.train(iteration_count)
def main():
    conf_file = "conf/train.yml"
    with open(conf_file, 'r') as f:
        args = edict(yaml.load(f))

    train_root = args.train_root
    test_root = args.test_root
    batch_size = args.batch_size
    max_len = args.max_len
    img_h = args.img_h
    img_w = args.img_w
    n_hidden = args.n_hidden
    n_iter = args.n_iter
    lr = args.lr
    cuda = args.cuda
    val_interval = args.val_interval
    save_interval = args.save_interval
    model_dir = args.model_dir
    debug_level = args.debug_level
    experiment = args.experiment
    n_channel = args.n_channel
    n_class = args.n_class
    beta = args.beta

    image = torch.FloatTensor(batch_size, n_channel, img_h, img_h)
    text = torch.IntTensor(batch_size * max_len)
    length = torch.IntTensor(batch_size)

    logging.getLogger().setLevel(debug_level)
    '''
        50 - critical
        40 - error
        30 - warining
        20 - info
        10 - debug
    '''
    crnn = CRNN(img_h, n_channel, n_class, n_hidden).cuda()
    crnn.apply(weights_init)

    criterion = CTCLoss().cuda()

    optimizer = optim.RMSprop(crnn.parameters(), lr=lr)
    # optimizer = optim.Adam(crnn.parameters(), lr=lr,
    #                    betas=(beta, 0.999))

    trainset = train_set(train_root, batch_size, img_h, img_w, n_class)
    valset = train_set(test_root, batch_size, img_h, img_w, n_class)

    cur_iter = 0
    for ITER in range(n_iter):
        for train_img, train_label, train_lengths, batch_label \
                in iter(trainset):
            for p in crnn.parameters():
                p.requires_grad = True
            crnn.train()

            if train_img is None:
                break
            cur_iter += 1
            loadData(image, train_img)
            loadData(text, train_label)
            loadData(length, train_lengths)
            preds = crnn(train_img.cuda())
            # preds = F.softmax(preds, dim=2)
            # print(preds.shape)
            preds_size = Variable(torch.IntTensor([preds.size(0)] *
                                                  batch_size))
            # print(batch_label, text, length, len(text), len(length), length.sum(),
            #     preds.shape, preds_size.shape)
            cost = criterion(preds, text, preds_size, length)\
                    / batch_size
            crnn.zero_grad()
            cost.backward()
            optimizer.step()
            print("training-iter {} cost {}".format(
                ITER,
                cost.cpu().detach().numpy()[0]))
            if cur_iter % val_interval == 0:
                val(crnn, valset, criterion, n_class)
            if cur_iter % save_interval == 0:
                model_file = os.path.join(model_dir,
                                          "crnn_iter{}.pth".format(ITER))
                print("saving in file {}".format(model_file))
                with open(model_file, 'wb') as f:
                    torch.save(crnn, f)
예제 #9
0
파일: main.py 프로젝트: ravindra098/CRNN
def train(root,
          start_epoch,
          epoch_num,
          letters,
          net=None,
          lr=0.1,
          fix_width=True):
    """
    Train CRNN model

    Args:
        root (str): Root directory of dataset
        start_epoch (int): Epoch number to start
        epoch_num (int): Epoch number to train
        letters (str): Letters contained in the data
        net (CRNN, optional): CRNN model (default: None)
        lr (float, optional): Coefficient that scale delta before it is applied
            to the parameters (default: 1.0)
        fix_width (bool, optional): Scale images to fixed size (default: True)

    Returns:
        CRNN: Trained CRNN model
    """

    # load data
    trainloader = load_data(root, training=True, fix_width=fix_width)
    if not net:
        # create a new model if net is None
        net = CRNN(1, len(letters) + 1)
    criterion = torch.nn.CTCLoss()
    optimizer = optim.Adadelta(net.parameters(), lr=lr, weight_decay=1e-3)
    # use gpu or not
    use_cuda = torch.cuda.is_available()
    use_cuda = False
    device = torch.device('cuda' if use_cuda else 'cpu')
    if use_cuda:
        net = net.to(device)
        criterion = criterion.to(device)
    else:
        print("*****   Warning: Cuda isn't available!  *****")

    # get encoder and decoder
    labeltransformer = LabelTransformer(letters)

    print('====   Training..   ====')
    # .train() has any effect on Dropout and BatchNorm.
    net.train()
    for epoch in range(start_epoch, start_epoch + epoch_num):
        print('----    epoch: %d    ----' % (epoch, ))
        loss_sum = 0
        for i, (img, label) in enumerate(trainloader):
            label, label_length = labeltransformer.encode(label)
            img = img.to(device)
            optimizer.zero_grad()
            # put images in
            outputs = net(img)
            output_length = torch.IntTensor([outputs.size(0)] *
                                            outputs.size(1))
            # calc loss
            loss = criterion(outputs, label, output_length, label_length)
            # update
            loss.backward()
            optimizer.step()
            loss_sum += loss.item()
        print('loss = %f' % (loss_sum, ))
    print('Finished Training')
    return net
예제 #10
0
def train():
    epoch_num = train_parameters["num_epochs"]
    batch_size = train_parameters["train_batch_size"]

    place = fluid.CUDAPlace(
        0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
    logger.info('train with {}'.format(place))
    with fluid.dygraph.guard(place):
        # 数据加载
        file_list = open(train_parameters['train_list']).readlines()
        train_reader = get_loader(
            file_list=file_list,
            input_size=train_parameters['input_size'],
            max_char_per_line=train_parameters['max_char_per_line'],
            mean_color=train_parameters['mean_color'],
            batch_size=train_parameters['train_batch_size'],
            mode='train',
            label_dict=train_parameters['label_dict'],
            place=place)

        batch_num = len(train_reader())

        crnn = CRNN(train_parameters["class_dim"] + 1, batch_size=batch_size)
        total_step = batch_num * epoch_num
        LR = train_parameters['learning_rate']
        lr = fluid.layers.polynomial_decay(LR, total_step, 1e-7, power=0.9)
        # lr = fluid.layers.piecewise_decay([total_step // 3, total_step * 2 // 3], [LR, LR * 0.1, LR * 0.01])
        optimizer = fluid.optimizer.Adam(learning_rate=lr,
                                         parameter_list=crnn.parameters())

        if train_parameters["continue_train"]:
            # 加载上一次训练的模型,继续训练
            params_dict, opt_dict = fluid.load_dygraph('{}/crnn_latest'.format(
                train_parameters['save_model_dir']))
            crnn.set_dict(params_dict)
            optimizer.set_dict(opt_dict)
            logger.info("load model from {}".format(
                train_parameters['save_model_dir']))

        current_best = -1
        start_epoch = 0
        for epoch in range(start_epoch, epoch_num):
            crnn.train()
            tic = time.time()
            for batch_id, (img, label, label_len) in enumerate(train_reader()):
                out = crnn(img)

                out_for_loss = fluid.layers.transpose(out, [1, 0, 2])
                input_length = np.array([out.shape[1]] *
                                        out.shape[0]).astype("int64")
                input_length = fluid.dygraph.to_variable(input_length)
                input_length.stop_gradient = True
                loss = fluid.layers.warpctc(
                    input=out_for_loss,
                    label=label.astype(np.int32),
                    input_length=input_length,
                    label_length=label_len,
                    blank=train_parameters["class_dim"],
                    norm_by_times=True)
                avg_loss = fluid.layers.reduce_mean(loss)

                cur_acc_num, cur_all_num = acc_batch(out.numpy(),
                                                     label.numpy())
                if batch_id % 1 == 0:
                    logger.info(
                        "epoch [{}/{}], step [{}/{}], loss: {:.6f}, acc: {:.4f}, lr: {}, time: {:.4f}"
                        .format(epoch, epoch_num, batch_id, batch_num,
                                avg_loss.numpy()[0], cur_acc_num / cur_all_num,
                                optimizer.current_step_lr(),
                                time.time() - tic))
                    tic = time.time()
                avg_loss.backward()
                optimizer.minimize(avg_loss)
                crnn.clear_gradients()

            fluid.save_dygraph(
                crnn.state_dict(),
                '{}/crnn_latest'.format(train_parameters['save_model_dir']))
            fluid.save_dygraph(
                optimizer.state_dict(),
                '{}/crnn_latest'.format(train_parameters['save_model_dir']))
            crnn.eval()
            ratio = eval_model(crnn, place=place)
            if ratio >= current_best:
                fluid.save_dygraph(
                    crnn.state_dict(),
                    '{}/crnn_best'.format(train_parameters['save_model_dir']))
                fluid.save_dygraph(
                    optimizer.state_dict(),
                    '{}/crnn_best'.format(train_parameters['save_model_dir']))
                current_best = ratio
                logger.info("save model to {}, current best acc:{:.2f}".format(
                    train_parameters['save_model_dir'], ratio))
    logger.info("train end")
예제 #11
0
        print("-----Test-----step:%d/%d----loss value:%f-----accuracy:%f" %
              (teststep, len(testdata), loss, accuracy))
        writer.add_scalar("Testing Loss", loss, step)
        writer.add_scalar("Testing accuracy", accuracy, epoch)


step = 1
for epoch in range(0, opt.epoch):
    correct = 0
    number = 0
    for i, batch in enumerate(traindata):
        raw_text = batch["label"]
        encode_text, length = process.encodetext(raw_text)
        encode_text = Variable(encode_text).to(device)
        image = Variable(batch["image"]).to(device)
        model.train()
        output = model(image)
        optimizer.zero_grad()
        output_size = Variable(
            torch.IntTensor([output.size(0)] * opt.batchsize)).to(device)
        loss = lossfunction(output, encode_text, output_size, length)
        loss.backward()
        optimizer.step()
        _, output = output.max(2)
        output = output.transpose(1, 0)
        outputtext = []
        for i in range(0, output.size(0)):
            decode_text = process.decodetext(output[i])
            outputtext += [decode_text]
            number += 1
            if decode_text == raw_text[i]:
    def test_train(self):
        '''
        parameters of train
        '''
        # test_root = "data/ocr_dataset_val"
        # train_root = "data/ocr_dataset"
        train_root = "data/ocr_dataset_train_400_10/"
        test_root = "data/ocr_dataset_train_50_10_val/"
        batch_size = 20
        max_len = 15
        img_h, img_w = 32, 150
        n_hidden = 512
        n_iter = 400
        lr = 0.00005
        cuda = True
        val_interval = 250
        save_interval = 1000
        model_dir = "models"
        debug_level = 20
        experiment = "experiment"
        n_channel = 3
        n_class = 11
        beta = 0.5

        image = torch.FloatTensor(batch_size, n_channel, img_h, img_h)
        text = torch.IntTensor(batch_size * max_len)
        length = torch.IntTensor(batch_size)

        logging.getLogger().setLevel(debug_level)
        '''
            50 - critical
            40 - error
            30 - warining
            20 - info
            10 - debug
        '''
        crnn = CRNN(img_h, n_channel, n_class, n_hidden).cuda()
        crnn.apply(weights_init)

        criterion = CTCLoss().cuda()

        optimizer = optim.RMSprop(crnn.parameters(), lr=lr)
        # optimizer = optim.Adam(crnn.parameters(), lr=lr,
        #                    betas=(beta, 0.999))

        trainset = train_set(train_root, batch_size, img_h, img_w, n_class)
        valset = train_set(test_root, batch_size, img_h, img_w, n_class)

        cur_iter = 0
        for ITER in range(n_iter):
            for train_img, train_label, train_lengths, batch_label in iter(
                    trainset):
                for p in crnn.parameters():
                    p.requires_grad = True
                crnn.train()

                if train_img is None:
                    break
                cur_iter += 1
                loadData(image, train_img)
                loadData(text, train_label)
                loadData(length, train_lengths)
                preds = crnn(train_img.cuda())
                # preds = F.softmax(preds, dim=2)
                # print(preds.shape)
                preds_size = Variable(
                    torch.IntTensor([preds.size(0)] * batch_size))
                # print(batch_label, text, length, len(text), len(length), length.sum(),
                #     preds.shape, preds_size.shape)
                cost = criterion(preds, text, preds_size, length) / batch_size
                crnn.zero_grad()
                cost.backward()
                optimizer.step()
                print("training-iter {} cost {}".format(
                    ITER,
                    cost.cpu().detach().numpy()[0]))
                if cur_iter % val_interval == 0:
                    val(crnn, valset, criterion, n_class)
                if cur_iter % save_interval == 0:
                    model_file = os.path.join(model_dir,
                                              "crnn_iter{}.pth".format(ITER))
                    print("saving in file {}".format(model_file))
                    with open(model_file, 'wb') as f:
                        torch.save(crnn, f)