コード例 #1
0
ファイル: train.py プロジェクト: belosthomas/socr-line
def callback(model, loss, test_path):
    model.eval()
    subprocess.run(['rm', '-R', 'results'])

    evaluate(model, loss, test_path)
    result = run_transkribus()
    lines = result.split("\n")
    probs = [line.split(",") for line in lines]
    probs = [[prob.replace(" ", "") for prob in problist]
             for problist in probs]

    new_probs = []
    total = None

    for i in range(0, len(probs)):
        try:
            id = probs[i][3].split(".")[0]
            if id == "TOTAL":
                total = probs[i]
        except Exception as e:
            pass

    print_normal("P : " + str(total[0]) + "; F : " + str(total[1]) +
                 "; F1 : " + str(total[2]))

    for i in range(0, len(probs)):
        try:
            new_probs.append([
                float(probs[i][0]),
                float(probs[i][1]),
                float(probs[i][2]), probs[i][3], probs[i][4]
            ])
        except:
            pass

    new_probs.sort(key=lambda x: x[2])

    for i in range(0, len(new_probs)):
        id = new_probs[i][3].split(".")[0]
        if id != "TOTAL":
            for ext in [
                    ".jpg", ".probs.jpg", ".probs.gt.jpg", ".components.jpg",
                    ".txt", ".xml"
            ]:
                os.rename("results/" + id + ext,
                          'results/%.4f%s' % (new_probs[i][2], ext))
        else:
            print(new_probs[i])

    return total[2]
コード例 #2
0
    def __init__(self,
                 loss_type="mse",
                 hysteresis_minimum=0.5,
                 hysteresis_maximum=0.5,
                 thicknesses=2,
                 height_importance=1.0):
        """

        :param s: grid division, assuming we have only 1 bounding box per cell
        """
        super().__init__()

        self.add_activation = None
        self.loss_type = loss_type
        self.mse = torch.nn.MSELoss()

        if loss_type == "mse":
            print_normal("Using MSE Loss with Hysteresis=(" +
                         str(hysteresis_minimum) + "," +
                         str(hysteresis_maximum) + "), thicknesses=" +
                         str(thicknesses) + ", height_importance=" +
                         str(height_importance))
            self.loss = torch.nn.MSELoss()
        elif loss_type == "bce":
            print_normal("Using Binary Cross Entropy Loss Hysteresis=(" +
                         str(hysteresis_minimum) + "," +
                         str(hysteresis_maximum) + "), thicknesses=" +
                         str(thicknesses) + ", height_importance=" +
                         str(height_importance))
            self.loss = torch.nn.BCELoss()
            # self.mse = torch.nn.BCEWithLogitsLoss()
        elif loss_type == "norm":
            self.loss = None
        else:
            raise AssertionError

        self.hysteresis_minimum = hysteresis_minimum
        self.hysteresis_maximum = hysteresis_maximum
        self.thicknesses = thicknesses

        self.height_factor = 1.0
        self.height_importance = height_importance
        self.decoder = BaselineDecoder(self.height_factor,
                                       self.hysteresis_minimum,
                                       self.hysteresis_maximum)
        self.encoder = BaselineEncoder(self.height_factor, self.thicknesses)
コード例 #3
0
ファイル: resRnn.py プロジェクト: belosthomas/socr-text
    def __init__(self, labels):
        super().__init__()

        self.labels = labels
        self.output_numbers = max(labels.values()) + 1
        self.rnn_size = self.output_numbers

        print_normal("Creating resSru with " + str(self.output_numbers) +
                     " labels")

        self.convolutions = torch.nn.Sequential(
            OrderedDict([
                ('conv1',
                 torch.nn.Conv2d(3,
                                 64,
                                 kernel_size=7,
                                 stride=2,
                                 padding=3,
                                 bias=False)),
                ('bn1', torch.nn.BatchNorm2d(64)),
                ('activation', torch.nn.ReLU(inplace=True)),
                ('maxpool',
                 torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=(1, 1))),
                ('resnet',
                 ResNet(BasicBlock, [2, 2, 2, 2],
                        strides=[1, (2, 1), (2, 1), (2, 1)],
                        bn=True)),
            ]))
        self.convolutions_output_size = self.get_cnn_output_size()

        self.rnn = torch.nn.GRU(self.convolutions_output_size[1] *
                                self.convolutions_output_size[2],
                                self.rnn_size,
                                num_layers=1,
                                bidirectional=True)
        # self.rnn = IndRNN(self.convolutions_output_size[1] * self.convolutions_output_size[2], self.rnn_size, n_layer=3, bidirectional=True, batch_norm=True, batch_first=True, dropout=0.1, nonlinearity='relu')
        self.fc = torch.nn.Linear(2 * self.rnn_size, self.output_numbers)

        self.softmax = torch.nn.Softmax(dim=2)
コード例 #4
0
ファイル: __init__.py プロジェクト: belosthomas/socr-text
def download_resources():
    if not os.path.isdir("resources/fonts"):
        url = "https://www.dropbox.com/s/3wcp26el8x5na4j/resources.zip?dl=1"

        print_normal("Dowloading resources...")
        wget.download(url)

        print_normal("Extracting resources...")
        zip_ref = zipfile.ZipFile("resources.zip", 'r')
        zip_ref.extractall(".")
        zip_ref.close()

        print_normal("Cleaing up...")
        os.remove("resources.zip")

        print_normal("Resources downloaded successfully.")
コード例 #5
0
def test(model, lm, loss, test_database, limit=32):
    """
    Test the network

    :param limit: Limit of images of the test
    :return: The average cer
    """
    model.eval()

    is_cuda = next(model.parameters()).is_cuda

    loader = torch.utils.data.DataLoader(test_database,
                                         batch_size=1,
                                         shuffle=False,
                                         num_workers=1)

    test_len = len(test_database)
    if limit is not None:
        test_len = min(limit, test_len)

    wer_s, wer_i, wer_d, wer_n = 0, 0, 0, 0
    cer_s, cer_i, cer_d, cer_n = 0, 0, 0, 0

    sen_err = 0
    count = 0

    for i, data in enumerate(loader, 0):
        image, label = data
        label = label[1][0]

        if image.shape[2] < 8:
            continue

        if is_cuda:
            result = model(torch.autograd.Variable(image.float().cuda()))
        else:
            result = model(torch.autograd.Variable(image.float().cpu()))

        # text = loss.ytrue_to_lines(result.cpu().detach().numpy())
        text = wordBeamSearch(result[0].data.cpu().numpy(), 32, lm, False)

        # update CER statistics
        _, (s, i, d) = levenshtein(label, text)
        cer_s += s
        cer_i += i
        cer_d += d
        cer_n += len(label)
        # update WER statistics

        _, (s, i, d) = levenshtein(label.split(), text.split())
        wer_s += s
        wer_i += i
        wer_d += d
        wer_n += len(label.split())
        # update SER statistics
        if s + i + d > 0:
            sen_err += 1

        count = count + 1

        sys.stdout.write("Testing..." + str(count * 100 // test_len) + "%\r")

        if count == test_len:
            break

    cer = (100.0 * (cer_s + cer_i + cer_d)) / cer_n
    wer = (100.0 * (wer_s + wer_i + wer_d)) / wer_n
    ser = (100.0 * sen_err) / count

    print_normal("CER : %.3f; WER : %.3f; SER : %.3f \n" % (cer, wer, ser))
コード例 #6
0
def main():
    """
    This is the main function, which is called first
    """

    # Parse the command line arguments
    parser = argparse.ArgumentParser(description="SOCR Text Recognizer")
    parser.add_argument('--bs', type=int, default=1, help="Batch size")
    parser.add_argument('--model',
                        type=str,
                        default="resRnn",
                        help="Model name")
    parser.add_argument('--name',
                        type=str,
                        default="resRnn",
                        help="Name for this training")
    parser.add_argument('--lr',
                        type=float,
                        default=0.0001,
                        help="Learning rate")
    parser.add_argument('--clipgradient',
                        type=float,
                        default=None,
                        help="Gradient clipping")
    parser.add_argument('--epochlimit',
                        type=int,
                        default=None,
                        help="Limit the training to a number of epoch")
    parser.add_argument('--overlr',
                        action='store_const',
                        const=True,
                        default=False,
                        help="Override the learning rate")
    parser.add_argument('--disablecuda',
                        action='store_const',
                        const=True,
                        default=False,
                        help="Disable cuda")
    parser.add_argument('--iamtrain', type=str, help="IAM Training Set")
    parser.add_argument('--iamtest',
                        type=str,
                        default=None,
                        help="IAM Testing Set")
    parser.add_argument('--generated',
                        action='store_const',
                        const=True,
                        default=False,
                        help="Enable generated data")
    args = parser.parse_args()

    assert args.iamtrain is not None

    # Initialize Language Model
    with open("characters.txt", "r") as content_file:
        characters = content_file.read() + " "
        lst = characters
        labels = {"": 0}
        for i in range(0, len(lst)):
            labels[lst[i]] = i + 1

    with open("word_characters.txt", "r") as content_file:
        word_characters = content_file.read()

    with open("dictionnary.txt", "r") as content_file:
        dictionnary = content_file.read()

    lm = LanguageModel(dictionnary, characters, word_characters)

    # Create the model and the loss
    model = resRnn(labels)
    loss = model.create_loss()

    if not args.disablecuda:
        model = model.cuda()
        loss = loss.cuda()
    else:
        print_warning("Using the CPU")
        model = model.cpu()
        loss = loss.cpu()

    image_height = model.get_input_image_height()

    print_normal("Using Adam with a Learning Rate of " + str(args.lr))
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    adaptative_optimizer = torch.optim.lr_scheduler.ExponentialLR(
        optimizer, 0.98)

    os.makedirs('checkpoints', exist_ok=True)

    if not args.disablecuda:
        print_normal("Using GPU Data Parallel")
        model = torch.nn.DataParallel(model)
    else:
        model = CPUParallel(model)

    checkpoint_name = "checkpoints/" + args.name + ".pth.tar"

    epoch = 0

    # Create or load the weights
    if os.path.exists(checkpoint_name):
        print_normal("Restoring the weights...")
        checkpoint = torch.load(checkpoint_name)
        epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        adaptative_optimizer.load_state_dict(
            checkpoint['adaptative_optimizer'])
    else:
        print_warning("Can't find '" + checkpoint_name + "'")

    if args.overlr is not None:
        print_normal("Overwriting the lr to " + str(args.lr))
        for param_group in optimizer.param_groups:
            param_group['lr'] = args.lr

    # Initialize datasets
    train_databases = [
        IAMHandwritingLineDatabase(args.iamtrain,
                                   height=image_height,
                                   loss=loss)
    ]

    if args.generated:
        sys.path.append(
            os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         "submodules/scribbler"))
        from scribbler.generator import LineGenerator
        train_databases.append(LineGenerator(height=image_height, loss=loss))

    train_database = torch.utils.data.ConcatDataset(train_databases)

    test_database = None
    if args.iamtest is not None:
        test_database = IAMHandwritingLineDatabase(args.iamtest,
                                                   height=image_height,
                                                   loss=loss)

    moving_average = MovingAverage(
        max(train_database.__len__() // args.bs, 1024))

    # Start training
    try:
        while True:
            if args.epochlimit is not None and epoch > args.epochlimit:
                print_normal("Epoch " + str(args.epochlimit) + "reached !")
                break

            model.train()

            loader = torch.utils.data.DataLoader(train_database,
                                                 batch_size=args.bs,
                                                 shuffle=True,
                                                 num_workers=4,
                                                 collate_fn=collate)
            for i, data in enumerate(loader, 0):

                inputs, labels = data

                optimizer.zero_grad()

                variable = torch.autograd.Variable(inputs).float()

                if not args.disablecuda:
                    variable = variable.cuda()
                else:
                    variable = variable.cpu()

                outputs = model(variable)
                loss_value = loss.forward(outputs, labels)
                loss_value.backward()

                loss_value_cpu = loss_value.data.cpu().numpy()

                if args.clipgradient is not None:
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   args.clipgradient)

                optimizer.step()

                loss_value_np = float(loss_value.data.cpu().numpy())
                moving_average.addn(loss_value_np)

                if (i * args.bs) % 8 == 0:
                    sys.stdout.write(TerminalColors.BOLD + '[%d, %5d] ' %
                                     (epoch + 1,
                                      (i * args.bs) + 1) + TerminalColors.ENDC)
                    sys.stdout.write(
                        'lr: %.8f; loss: %.4f ; curr: %.4f ;\r' %
                        (optimizer.state_dict()['param_groups'][0]['lr'],
                         moving_average.moving_average(), loss_value_cpu))

            epoch = epoch + 1
            adaptative_optimizer.step()

            sys.stdout.write("\n")

            if args.iamtest is not None:
                test(model, lm, loss, test_database)

    except KeyboardInterrupt:
        pass

    print_normal("Done training ! Saving...")
    torch.save(
        {
            'epoch': epoch,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'adaptative_optimizer': adaptative_optimizer.state_dict(),
        }, checkpoint_name)
コード例 #7
0
    def __init__(self,
                 loss_type="mse",
                 hysteresis_minimum=0.5,
                 hysteresis_maximum=0.5,
                 thicknesses=2,
                 height_importance=1.0,
                 bn_momentum=0.1):
        super(dhSegment, self).__init__()

        self.loss_type = loss_type
        self.hysteresis_minimum = hysteresis_minimum
        self.hysteresis_maximum = hysteresis_maximum
        self.thicknesses = thicknesses
        self.height_importance = height_importance
        self.bn_momentum = bn_momentum

        self.inplanes = 64

        self.conv1 = torch.nn.Conv2d(3,
                                     64,
                                     kernel_size=7,
                                     padding=3,
                                     stride=2,
                                     bias=False)
        self.bn1 = torch.nn.BatchNorm2d(64)
        self.act1 = torch.nn.ReLU(inplace=True)

        self.layer1 = self._make_layer(Bottleneck, 64, 3, stride=1)
        self.layer2 = self._make_layer(Bottleneck, 128, 4, stride=2)
        self.layer3 = self._make_layer(Bottleneck, 256, 6, stride=2)
        self.layer4 = self._make_layer(Bottleneck, 512, 3, stride=2)

        self.layer4_reduce = torch.nn.Conv2d(2048,
                                             512,
                                             kernel_size=1,
                                             bias=False)
        self.layer4_reduce_bn = torch.nn.BatchNorm2d(512)
        self.layer4_reduce_act = torch.nn.ReLU(inplace=True)

        self.layer3_reduce = torch.nn.Conv2d(1024,
                                             512,
                                             kernel_size=1,
                                             bias=False)
        self.layer3_reduce_bn = torch.nn.BatchNorm2d(512)
        self.layer3_reduce_act = torch.nn.ReLU(inplace=True)

        self.up1 = PSPUpsample(512 + 512, 512, bn=True)
        self.up2 = PSPUpsample(512 + 512, 256, bn=True)
        self.up3 = PSPUpsample(256 + 64, 128, bn=True)
        self.up4 = PSPUpsample(128 + 3, 64, bn=True)

        self.last_conv_prob = torch.nn.Conv2d(64,
                                              2,
                                              kernel_size=(1, 1),
                                              dilation=(1, 1),
                                              padding=0,
                                              bias=True)
        self.last_h_prob = torch.nn.ReLU(inplace=True)

        self.last_act_prob = torch.nn.Sigmoid()

        print_normal("Applying xavier initialization...")
        self.apply(self.weights_init)

        print_normal("Downloading pretrained model from pytorch model zoo...")
        pretrained_model = model_zoo.load_url(
            "https://download.pytorch.org/models/resnet50-19c8e357.pth")

        print_normal("Loading pretrained resnet...")
        self.load_my_state_dict(pretrained_model)

        print_normal("Adjusting Batch Normalization momentum to " +
                     str(self.bn_momentum))
        self.apply(self.adjust_bn_decay(self.bn_momentum))
コード例 #8
0
ファイル: train.py プロジェクト: belosthomas/socr-line
def evaluate(model, loss, path):
    """
    Evaluate the line localizator. Output all the results to the 'results' directory.

    :param path: The path of the images, with or without associated XMLs
    """
    print_normal("Evaluating " + path)

    if not os.path.exists("results"):
        os.makedirs("results")

    data_set = ICDARDocumentEvalSet(path, loss)

    loader = torch.utils.data.DataLoader(data_set,
                                         batch_size=1,
                                         shuffle=False,
                                         num_workers=1)

    count = 0

    for i, data in enumerate(loader, 0):
        resized, image, path, label = data

        percent = i * 100 // data_set.__len__()
        sys.stdout.write(str(percent) + "%... Processing \r")

        lines, positions, probsmap, components = extract(model,
                                                         loss,
                                                         image,
                                                         resized,
                                                         with_images=False)

        output_image_bloc(image,
                          positions).save("results/" + str(count) + ".jpg",
                                          "JPEG")

        save_connected_components(components,
                                  "results/" + str(count) + ".components.jpg")

        image_numpy_to_pillow_bw(
            probsmap[0].cpu().detach().numpy()).save("results/" + str(count) +
                                                     ".probs.jpg")
        del probsmap

        image_numpy_to_pillow_bw(
            label[0][0].cpu().detach().numpy()).save("results/" + str(count) +
                                                     ".probs.gt.jpg")

        xml_path = os.path.join(
            os.path.dirname(path[0]),
            os.path.splitext(os.path.basename(path[0]))[0] + ".xml")
        if not os.path.exists(xml_path):
            xml_path = os.path.join(
                os.path.dirname(path[0]), "page/" +
                os.path.splitext(os.path.basename(path[0]))[0] + ".xml")

        if os.path.exists(xml_path):
            shutil.copy2(xml_path, "results/" + str(count) + ".xml")
            with open("results/" + str(count) + ".txt", "w") as text_file:
                text_file.write(output_baseline(positions))
        else:
            print_warning("Can't find : '" + xml_path + "'")

        count = count + 1
コード例 #9
0
ファイル: train.py プロジェクト: belosthomas/socr-line
def main():
    parser = argparse.ArgumentParser(description="socr")
    parser.add_argument('--name', type=str, default="dhSegment")
    parser.add_argument('--lr',
                        type=float,
                        default=0.0001,
                        help="Learning rate")
    parser.add_argument('--overlr',
                        action='store_const',
                        const=True,
                        default=False,
                        help="Override the learning rate")
    parser.add_argument('--bs', type=int, default=16, help="The batch size")
    parser.add_argument('--losstype',
                        type=str,
                        default='bce',
                        help="The loss type. Ex : mse, bce, norm")
    parser.add_argument('--thicknesses',
                        type=int,
                        default=2,
                        help="Line thicknesses in the document")
    parser.add_argument('--hystmin',
                        type=float,
                        default=0.5,
                        help="Hysteresys thresholding minimum")
    parser.add_argument('--hystmax',
                        type=float,
                        default=0.5,
                        help="Hysteresys thresholding maximum")
    parser.add_argument('--expdecay',
                        type=float,
                        default=0.98,
                        help="Exponential decay")
    parser.add_argument(
        '--heightimportance',
        type=float,
        default=0.001,
        help="Height prediction importance during the training")
    parser.add_argument('--weightdecay',
                        type=float,
                        default=0.000001,
                        help="Weight decay")
    parser.add_argument('--epochlimit',
                        type=int,
                        default=None,
                        help="Limit the number of epoch")
    parser.add_argument('--bnmomentum',
                        type=float,
                        default=0.1,
                        help="BatchNorm Momentum")
    parser.add_argument('--disablecuda',
                        action='store_const',
                        const=True,
                        default=False,
                        help="Disable cuda")
    parser.add_argument('--icdartrain',
                        type=str,
                        help="Path to the ICDAR Training set")
    parser.add_argument('--icdartest',
                        type=str,
                        default=None,
                        help="Path to the ICDAR Testing set")
    parser.add_argument('--generated',
                        action='store_const',
                        const=True,
                        default=False,
                        help="Enable generated data")
    args = parser.parse_args()

    model = dhSegment(args.losstype, args.hystmin, args.hystmax,
                      args.thicknesses, args.heightimportance, args.bnmomentum)
    loss = model.create_loss()

    if not args.disablecuda:
        model = torch.nn.DataParallel(model.cuda())
        loss = loss.cuda()
    else:
        model = CPUParallel(model.cpu())
        loss = loss.cpu()

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weightdecay)
    adaptative_optimizer = torch.optim.lr_scheduler.ExponentialLR(
        optimizer, args.expdecay)

    os.makedirs('checkpoints', exist_ok=True)
    checkpoint_name = "checkpoints/" + args.name + ".pth.tar"

    epoch = 0

    if os.path.exists(checkpoint_name):
        print_normal("Restoring the weights...")
        checkpoint = torch.load(checkpoint_name)
        epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        adaptative_optimizer.load_state_dict(
            checkpoint['adaptative_optimizer'])
    else:
        print_warning("Can't find '" + checkpoint_name + "'")

    if args.overlr is not None:
        print_normal("Overwriting the lr to " + str(args.lr))
        for param_group in optimizer.param_groups:
            param_group['lr'] = args.lr

    train_databases = [ICDARDocumentSet(args.icdartrain, loss, True)]

    if args.generated:
        sys.path.append(
            os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         "submodules/scribbler"))
        from scribbler.generator import DocumentGenerator
        train_databases.append(DocumentGenerator(loss))

    train_database = torch.utils.data.ConcatDataset(train_databases)

    test_database_path = None
    if args.icdartest is not None:
        test_database_path = args.icdartest

    moving_average = MovingAverage(
        max(train_database.__len__() // args.bs, 1024))

    try:
        while True:
            if args.epochlimit is not None and epoch > args.epochlimit:
                print_normal("Epoch " + str(args.epochlimit) + "reached !")
                break

            model.train()

            loader = torch.utils.data.DataLoader(train_database,
                                                 batch_size=args.bs,
                                                 shuffle=True,
                                                 num_workers=4,
                                                 collate_fn=collate)
            for i, data in enumerate(loader, 0):

                inputs, labels = data

                optimizer.zero_grad()

                variable = torch.autograd.Variable(inputs).float()
                labels = torch.autograd.Variable(labels).float()

                if not args.disablecuda:
                    variable = variable.cuda()
                    labels = labels.cuda()
                else:
                    variable = variable.cpu()
                    labels = labels.cpu()

                outputs = model(variable)
                loss_value = loss.forward(outputs, labels)
                loss_value.backward()

                loss_value_cpu = loss_value.data.cpu().numpy()

                optimizer.step()

                loss_value_np = float(loss_value.data.cpu().numpy())
                moving_average.addn(loss_value_np)

                if (i * args.bs) % 8 == 0:
                    sys.stdout.write(TerminalColors.BOLD + '[%d, %5d] ' %
                                     (epoch + 1,
                                      (i * args.bs) + 1) + TerminalColors.ENDC)
                    sys.stdout.write(
                        'lr: %.8f; loss: %.4f ; curr: %.4f ;\r' %
                        (optimizer.state_dict()['param_groups'][0]['lr'],
                         moving_average.moving_average(), loss_value_cpu))

            epoch = epoch + 1
            adaptative_optimizer.step()

            sys.stdout.write("\n")

            try:
                if args.icdartest is not None:
                    callback(model, loss, test_database_path)
            except Exception as e:
                print_error("Can't test : " + str(e))

    except KeyboardInterrupt:
        pass

    print_normal("Done training ! Saving...")
    torch.save(
        {
            'epoch': epoch,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'adaptative_optimizer': adaptative_optimizer.state_dict(),
        }, checkpoint_name)