def callback(model, loss, test_path): model.eval() subprocess.run(['rm', '-R', 'results']) evaluate(model, loss, test_path) result = run_transkribus() lines = result.split("\n") probs = [line.split(",") for line in lines] probs = [[prob.replace(" ", "") for prob in problist] for problist in probs] new_probs = [] total = None for i in range(0, len(probs)): try: id = probs[i][3].split(".")[0] if id == "TOTAL": total = probs[i] except Exception as e: pass print_normal("P : " + str(total[0]) + "; F : " + str(total[1]) + "; F1 : " + str(total[2])) for i in range(0, len(probs)): try: new_probs.append([ float(probs[i][0]), float(probs[i][1]), float(probs[i][2]), probs[i][3], probs[i][4] ]) except: pass new_probs.sort(key=lambda x: x[2]) for i in range(0, len(new_probs)): id = new_probs[i][3].split(".")[0] if id != "TOTAL": for ext in [ ".jpg", ".probs.jpg", ".probs.gt.jpg", ".components.jpg", ".txt", ".xml" ]: os.rename("results/" + id + ext, 'results/%.4f%s' % (new_probs[i][2], ext)) else: print(new_probs[i]) return total[2]
def __init__(self, loss_type="mse", hysteresis_minimum=0.5, hysteresis_maximum=0.5, thicknesses=2, height_importance=1.0): """ :param s: grid division, assuming we have only 1 bounding box per cell """ super().__init__() self.add_activation = None self.loss_type = loss_type self.mse = torch.nn.MSELoss() if loss_type == "mse": print_normal("Using MSE Loss with Hysteresis=(" + str(hysteresis_minimum) + "," + str(hysteresis_maximum) + "), thicknesses=" + str(thicknesses) + ", height_importance=" + str(height_importance)) self.loss = torch.nn.MSELoss() elif loss_type == "bce": print_normal("Using Binary Cross Entropy Loss Hysteresis=(" + str(hysteresis_minimum) + "," + str(hysteresis_maximum) + "), thicknesses=" + str(thicknesses) + ", height_importance=" + str(height_importance)) self.loss = torch.nn.BCELoss() # self.mse = torch.nn.BCEWithLogitsLoss() elif loss_type == "norm": self.loss = None else: raise AssertionError self.hysteresis_minimum = hysteresis_minimum self.hysteresis_maximum = hysteresis_maximum self.thicknesses = thicknesses self.height_factor = 1.0 self.height_importance = height_importance self.decoder = BaselineDecoder(self.height_factor, self.hysteresis_minimum, self.hysteresis_maximum) self.encoder = BaselineEncoder(self.height_factor, self.thicknesses)
def __init__(self, labels): super().__init__() self.labels = labels self.output_numbers = max(labels.values()) + 1 self.rnn_size = self.output_numbers print_normal("Creating resSru with " + str(self.output_numbers) + " labels") self.convolutions = torch.nn.Sequential( OrderedDict([ ('conv1', torch.nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)), ('bn1', torch.nn.BatchNorm2d(64)), ('activation', torch.nn.ReLU(inplace=True)), ('maxpool', torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=(1, 1))), ('resnet', ResNet(BasicBlock, [2, 2, 2, 2], strides=[1, (2, 1), (2, 1), (2, 1)], bn=True)), ])) self.convolutions_output_size = self.get_cnn_output_size() self.rnn = torch.nn.GRU(self.convolutions_output_size[1] * self.convolutions_output_size[2], self.rnn_size, num_layers=1, bidirectional=True) # self.rnn = IndRNN(self.convolutions_output_size[1] * self.convolutions_output_size[2], self.rnn_size, n_layer=3, bidirectional=True, batch_norm=True, batch_first=True, dropout=0.1, nonlinearity='relu') self.fc = torch.nn.Linear(2 * self.rnn_size, self.output_numbers) self.softmax = torch.nn.Softmax(dim=2)
def download_resources(): if not os.path.isdir("resources/fonts"): url = "https://www.dropbox.com/s/3wcp26el8x5na4j/resources.zip?dl=1" print_normal("Dowloading resources...") wget.download(url) print_normal("Extracting resources...") zip_ref = zipfile.ZipFile("resources.zip", 'r') zip_ref.extractall(".") zip_ref.close() print_normal("Cleaing up...") os.remove("resources.zip") print_normal("Resources downloaded successfully.")
def test(model, lm, loss, test_database, limit=32): """ Test the network :param limit: Limit of images of the test :return: The average cer """ model.eval() is_cuda = next(model.parameters()).is_cuda loader = torch.utils.data.DataLoader(test_database, batch_size=1, shuffle=False, num_workers=1) test_len = len(test_database) if limit is not None: test_len = min(limit, test_len) wer_s, wer_i, wer_d, wer_n = 0, 0, 0, 0 cer_s, cer_i, cer_d, cer_n = 0, 0, 0, 0 sen_err = 0 count = 0 for i, data in enumerate(loader, 0): image, label = data label = label[1][0] if image.shape[2] < 8: continue if is_cuda: result = model(torch.autograd.Variable(image.float().cuda())) else: result = model(torch.autograd.Variable(image.float().cpu())) # text = loss.ytrue_to_lines(result.cpu().detach().numpy()) text = wordBeamSearch(result[0].data.cpu().numpy(), 32, lm, False) # update CER statistics _, (s, i, d) = levenshtein(label, text) cer_s += s cer_i += i cer_d += d cer_n += len(label) # update WER statistics _, (s, i, d) = levenshtein(label.split(), text.split()) wer_s += s wer_i += i wer_d += d wer_n += len(label.split()) # update SER statistics if s + i + d > 0: sen_err += 1 count = count + 1 sys.stdout.write("Testing..." + str(count * 100 // test_len) + "%\r") if count == test_len: break cer = (100.0 * (cer_s + cer_i + cer_d)) / cer_n wer = (100.0 * (wer_s + wer_i + wer_d)) / wer_n ser = (100.0 * sen_err) / count print_normal("CER : %.3f; WER : %.3f; SER : %.3f \n" % (cer, wer, ser))
def main(): """ This is the main function, which is called first """ # Parse the command line arguments parser = argparse.ArgumentParser(description="SOCR Text Recognizer") parser.add_argument('--bs', type=int, default=1, help="Batch size") parser.add_argument('--model', type=str, default="resRnn", help="Model name") parser.add_argument('--name', type=str, default="resRnn", help="Name for this training") parser.add_argument('--lr', type=float, default=0.0001, help="Learning rate") parser.add_argument('--clipgradient', type=float, default=None, help="Gradient clipping") parser.add_argument('--epochlimit', type=int, default=None, help="Limit the training to a number of epoch") parser.add_argument('--overlr', action='store_const', const=True, default=False, help="Override the learning rate") parser.add_argument('--disablecuda', action='store_const', const=True, default=False, help="Disable cuda") parser.add_argument('--iamtrain', type=str, help="IAM Training Set") parser.add_argument('--iamtest', type=str, default=None, help="IAM Testing Set") parser.add_argument('--generated', action='store_const', const=True, default=False, help="Enable generated data") args = parser.parse_args() assert args.iamtrain is not None # Initialize Language Model with open("characters.txt", "r") as content_file: characters = content_file.read() + " " lst = characters labels = {"": 0} for i in range(0, len(lst)): labels[lst[i]] = i + 1 with open("word_characters.txt", "r") as content_file: word_characters = content_file.read() with open("dictionnary.txt", "r") as content_file: dictionnary = content_file.read() lm = LanguageModel(dictionnary, characters, word_characters) # Create the model and the loss model = resRnn(labels) loss = model.create_loss() if not args.disablecuda: model = model.cuda() loss = loss.cuda() else: print_warning("Using the CPU") model = model.cpu() loss = loss.cpu() image_height = model.get_input_image_height() print_normal("Using Adam with a Learning Rate of " + str(args.lr)) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) adaptative_optimizer = torch.optim.lr_scheduler.ExponentialLR( optimizer, 0.98) os.makedirs('checkpoints', exist_ok=True) if not args.disablecuda: print_normal("Using GPU Data Parallel") model = torch.nn.DataParallel(model) else: model = CPUParallel(model) checkpoint_name = "checkpoints/" + args.name + ".pth.tar" epoch = 0 # Create or load the weights if os.path.exists(checkpoint_name): print_normal("Restoring the weights...") checkpoint = torch.load(checkpoint_name) epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) adaptative_optimizer.load_state_dict( checkpoint['adaptative_optimizer']) else: print_warning("Can't find '" + checkpoint_name + "'") if args.overlr is not None: print_normal("Overwriting the lr to " + str(args.lr)) for param_group in optimizer.param_groups: param_group['lr'] = args.lr # Initialize datasets train_databases = [ IAMHandwritingLineDatabase(args.iamtrain, height=image_height, loss=loss) ] if args.generated: sys.path.append( os.path.join(os.path.dirname(os.path.abspath(__file__)), "submodules/scribbler")) from scribbler.generator import LineGenerator train_databases.append(LineGenerator(height=image_height, loss=loss)) train_database = torch.utils.data.ConcatDataset(train_databases) test_database = None if args.iamtest is not None: test_database = IAMHandwritingLineDatabase(args.iamtest, height=image_height, loss=loss) moving_average = MovingAverage( max(train_database.__len__() // args.bs, 1024)) # Start training try: while True: if args.epochlimit is not None and epoch > args.epochlimit: print_normal("Epoch " + str(args.epochlimit) + "reached !") break model.train() loader = torch.utils.data.DataLoader(train_database, batch_size=args.bs, shuffle=True, num_workers=4, collate_fn=collate) for i, data in enumerate(loader, 0): inputs, labels = data optimizer.zero_grad() variable = torch.autograd.Variable(inputs).float() if not args.disablecuda: variable = variable.cuda() else: variable = variable.cpu() outputs = model(variable) loss_value = loss.forward(outputs, labels) loss_value.backward() loss_value_cpu = loss_value.data.cpu().numpy() if args.clipgradient is not None: torch.nn.utils.clip_grad_norm_(model.parameters(), args.clipgradient) optimizer.step() loss_value_np = float(loss_value.data.cpu().numpy()) moving_average.addn(loss_value_np) if (i * args.bs) % 8 == 0: sys.stdout.write(TerminalColors.BOLD + '[%d, %5d] ' % (epoch + 1, (i * args.bs) + 1) + TerminalColors.ENDC) sys.stdout.write( 'lr: %.8f; loss: %.4f ; curr: %.4f ;\r' % (optimizer.state_dict()['param_groups'][0]['lr'], moving_average.moving_average(), loss_value_cpu)) epoch = epoch + 1 adaptative_optimizer.step() sys.stdout.write("\n") if args.iamtest is not None: test(model, lm, loss, test_database) except KeyboardInterrupt: pass print_normal("Done training ! Saving...") torch.save( { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'adaptative_optimizer': adaptative_optimizer.state_dict(), }, checkpoint_name)
def __init__(self, loss_type="mse", hysteresis_minimum=0.5, hysteresis_maximum=0.5, thicknesses=2, height_importance=1.0, bn_momentum=0.1): super(dhSegment, self).__init__() self.loss_type = loss_type self.hysteresis_minimum = hysteresis_minimum self.hysteresis_maximum = hysteresis_maximum self.thicknesses = thicknesses self.height_importance = height_importance self.bn_momentum = bn_momentum self.inplanes = 64 self.conv1 = torch.nn.Conv2d(3, 64, kernel_size=7, padding=3, stride=2, bias=False) self.bn1 = torch.nn.BatchNorm2d(64) self.act1 = torch.nn.ReLU(inplace=True) self.layer1 = self._make_layer(Bottleneck, 64, 3, stride=1) self.layer2 = self._make_layer(Bottleneck, 128, 4, stride=2) self.layer3 = self._make_layer(Bottleneck, 256, 6, stride=2) self.layer4 = self._make_layer(Bottleneck, 512, 3, stride=2) self.layer4_reduce = torch.nn.Conv2d(2048, 512, kernel_size=1, bias=False) self.layer4_reduce_bn = torch.nn.BatchNorm2d(512) self.layer4_reduce_act = torch.nn.ReLU(inplace=True) self.layer3_reduce = torch.nn.Conv2d(1024, 512, kernel_size=1, bias=False) self.layer3_reduce_bn = torch.nn.BatchNorm2d(512) self.layer3_reduce_act = torch.nn.ReLU(inplace=True) self.up1 = PSPUpsample(512 + 512, 512, bn=True) self.up2 = PSPUpsample(512 + 512, 256, bn=True) self.up3 = PSPUpsample(256 + 64, 128, bn=True) self.up4 = PSPUpsample(128 + 3, 64, bn=True) self.last_conv_prob = torch.nn.Conv2d(64, 2, kernel_size=(1, 1), dilation=(1, 1), padding=0, bias=True) self.last_h_prob = torch.nn.ReLU(inplace=True) self.last_act_prob = torch.nn.Sigmoid() print_normal("Applying xavier initialization...") self.apply(self.weights_init) print_normal("Downloading pretrained model from pytorch model zoo...") pretrained_model = model_zoo.load_url( "https://download.pytorch.org/models/resnet50-19c8e357.pth") print_normal("Loading pretrained resnet...") self.load_my_state_dict(pretrained_model) print_normal("Adjusting Batch Normalization momentum to " + str(self.bn_momentum)) self.apply(self.adjust_bn_decay(self.bn_momentum))
def evaluate(model, loss, path): """ Evaluate the line localizator. Output all the results to the 'results' directory. :param path: The path of the images, with or without associated XMLs """ print_normal("Evaluating " + path) if not os.path.exists("results"): os.makedirs("results") data_set = ICDARDocumentEvalSet(path, loss) loader = torch.utils.data.DataLoader(data_set, batch_size=1, shuffle=False, num_workers=1) count = 0 for i, data in enumerate(loader, 0): resized, image, path, label = data percent = i * 100 // data_set.__len__() sys.stdout.write(str(percent) + "%... Processing \r") lines, positions, probsmap, components = extract(model, loss, image, resized, with_images=False) output_image_bloc(image, positions).save("results/" + str(count) + ".jpg", "JPEG") save_connected_components(components, "results/" + str(count) + ".components.jpg") image_numpy_to_pillow_bw( probsmap[0].cpu().detach().numpy()).save("results/" + str(count) + ".probs.jpg") del probsmap image_numpy_to_pillow_bw( label[0][0].cpu().detach().numpy()).save("results/" + str(count) + ".probs.gt.jpg") xml_path = os.path.join( os.path.dirname(path[0]), os.path.splitext(os.path.basename(path[0]))[0] + ".xml") if not os.path.exists(xml_path): xml_path = os.path.join( os.path.dirname(path[0]), "page/" + os.path.splitext(os.path.basename(path[0]))[0] + ".xml") if os.path.exists(xml_path): shutil.copy2(xml_path, "results/" + str(count) + ".xml") with open("results/" + str(count) + ".txt", "w") as text_file: text_file.write(output_baseline(positions)) else: print_warning("Can't find : '" + xml_path + "'") count = count + 1
def main(): parser = argparse.ArgumentParser(description="socr") parser.add_argument('--name', type=str, default="dhSegment") parser.add_argument('--lr', type=float, default=0.0001, help="Learning rate") parser.add_argument('--overlr', action='store_const', const=True, default=False, help="Override the learning rate") parser.add_argument('--bs', type=int, default=16, help="The batch size") parser.add_argument('--losstype', type=str, default='bce', help="The loss type. Ex : mse, bce, norm") parser.add_argument('--thicknesses', type=int, default=2, help="Line thicknesses in the document") parser.add_argument('--hystmin', type=float, default=0.5, help="Hysteresys thresholding minimum") parser.add_argument('--hystmax', type=float, default=0.5, help="Hysteresys thresholding maximum") parser.add_argument('--expdecay', type=float, default=0.98, help="Exponential decay") parser.add_argument( '--heightimportance', type=float, default=0.001, help="Height prediction importance during the training") parser.add_argument('--weightdecay', type=float, default=0.000001, help="Weight decay") parser.add_argument('--epochlimit', type=int, default=None, help="Limit the number of epoch") parser.add_argument('--bnmomentum', type=float, default=0.1, help="BatchNorm Momentum") parser.add_argument('--disablecuda', action='store_const', const=True, default=False, help="Disable cuda") parser.add_argument('--icdartrain', type=str, help="Path to the ICDAR Training set") parser.add_argument('--icdartest', type=str, default=None, help="Path to the ICDAR Testing set") parser.add_argument('--generated', action='store_const', const=True, default=False, help="Enable generated data") args = parser.parse_args() model = dhSegment(args.losstype, args.hystmin, args.hystmax, args.thicknesses, args.heightimportance, args.bnmomentum) loss = model.create_loss() if not args.disablecuda: model = torch.nn.DataParallel(model.cuda()) loss = loss.cuda() else: model = CPUParallel(model.cpu()) loss = loss.cpu() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weightdecay) adaptative_optimizer = torch.optim.lr_scheduler.ExponentialLR( optimizer, args.expdecay) os.makedirs('checkpoints', exist_ok=True) checkpoint_name = "checkpoints/" + args.name + ".pth.tar" epoch = 0 if os.path.exists(checkpoint_name): print_normal("Restoring the weights...") checkpoint = torch.load(checkpoint_name) epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) adaptative_optimizer.load_state_dict( checkpoint['adaptative_optimizer']) else: print_warning("Can't find '" + checkpoint_name + "'") if args.overlr is not None: print_normal("Overwriting the lr to " + str(args.lr)) for param_group in optimizer.param_groups: param_group['lr'] = args.lr train_databases = [ICDARDocumentSet(args.icdartrain, loss, True)] if args.generated: sys.path.append( os.path.join(os.path.dirname(os.path.abspath(__file__)), "submodules/scribbler")) from scribbler.generator import DocumentGenerator train_databases.append(DocumentGenerator(loss)) train_database = torch.utils.data.ConcatDataset(train_databases) test_database_path = None if args.icdartest is not None: test_database_path = args.icdartest moving_average = MovingAverage( max(train_database.__len__() // args.bs, 1024)) try: while True: if args.epochlimit is not None and epoch > args.epochlimit: print_normal("Epoch " + str(args.epochlimit) + "reached !") break model.train() loader = torch.utils.data.DataLoader(train_database, batch_size=args.bs, shuffle=True, num_workers=4, collate_fn=collate) for i, data in enumerate(loader, 0): inputs, labels = data optimizer.zero_grad() variable = torch.autograd.Variable(inputs).float() labels = torch.autograd.Variable(labels).float() if not args.disablecuda: variable = variable.cuda() labels = labels.cuda() else: variable = variable.cpu() labels = labels.cpu() outputs = model(variable) loss_value = loss.forward(outputs, labels) loss_value.backward() loss_value_cpu = loss_value.data.cpu().numpy() optimizer.step() loss_value_np = float(loss_value.data.cpu().numpy()) moving_average.addn(loss_value_np) if (i * args.bs) % 8 == 0: sys.stdout.write(TerminalColors.BOLD + '[%d, %5d] ' % (epoch + 1, (i * args.bs) + 1) + TerminalColors.ENDC) sys.stdout.write( 'lr: %.8f; loss: %.4f ; curr: %.4f ;\r' % (optimizer.state_dict()['param_groups'][0]['lr'], moving_average.moving_average(), loss_value_cpu)) epoch = epoch + 1 adaptative_optimizer.step() sys.stdout.write("\n") try: if args.icdartest is not None: callback(model, loss, test_database_path) except Exception as e: print_error("Can't test : " + str(e)) except KeyboardInterrupt: pass print_normal("Done training ! Saving...") torch.save( { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'adaptative_optimizer': adaptative_optimizer.state_dict(), }, checkpoint_name)