Esempio n. 1
0
def train():
    print('start training ...........')
    batch_size = 16
    num_epochs = 50
    learning_rate = 0.1

    label_converter = LabelConverter(char_set=string.ascii_lowercase + string.digits)
    vocab_size = label_converter.get_vocab_size()

    device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
    model = CRNN(vocab_size=vocab_size).to(device)
    # model.load_state_dict(torch.load('output/weight.pth', map_location=device))

    train_loader, val_loader = get_loader('data/CAPTCHA Images/', batch_size=batch_size)

    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, nesterov=True)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
    # scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 10, 2)

    train_losses, val_losses = [], []
    for epoch in range(num_epochs):
        train_epoch_loss = fit(epoch, model, optimizer, label_converter, device, train_loader, phase='training')
        val_epoch_loss = fit(epoch, model, optimizer, label_converter, device, val_loader, phase='validation')
        print('-----------------------------------------')

        if epoch == 0 or val_epoch_loss <= np.min(val_losses):
            torch.save(model.state_dict(), 'output/weight.pth')

        train_losses.append(train_epoch_loss)
        val_losses.append(val_epoch_loss)

        write_figure('output', train_losses, val_losses)
        write_log('output', epoch, train_epoch_loss, val_epoch_loss)

        scheduler.step(val_epoch_loss)
Esempio n. 2
0
def predict(model_path, im_path, norm_height=32, norm_width=128, device='cpu'):
    '''
    predict a new image using a trained model
    :param model_path: path of the saved model
    :param im_path: path of an image
    :param norm_height: image normalization height
    :param norm_width: image normalization width
    :param device: 'cpu' or 'cuda'
    '''

    # step 1: initialize a model and put it on device
    model = CRNN()
    model = model.to(device)

    # step 2: load state_dict from saved model
    checkpoint = torch.load(model_path,
                            map_location=torch.device('cuda') if
                            torch.cuda.is_available() else torch.device('cpu'))
    model.load_state_dict(checkpoint['state_dict'])
    print('[Info] Load model from {}'.format(model_path))

    # step 3: initialize the label converter
    label_converter = LabelConverter()

    # step 4: read image and normalization
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((norm_height, norm_width)),
        transforms.ToTensor()
    ])
    im = cv2.imread(im_path)
    if im is None:
        raise AssertionError(
            f'the image {im_path} may not exist, please check it.')
    x = transform(im)
    x = x.unsqueeze(0)  # add the batch dimension

    # step 5: run model
    model.eval()
    with torch.no_grad():
        logits, _ = model(x)
        raw_pred = logits.argmax(2)
        pred = label_converter.decode(raw_pred)[0]
    print('prediction: {}\n'.format(pred))

    # visualize probabilities output by CTC
    savepath = os.path.splitext(im_path)[0] + '_vis.jpg'
    visual_ctc_results(im, logits, savepath)
Esempio n. 3
0
class CaptchaDataset(Dataset):
    def __init__(self, dataset_metadata_df, vocab, is_external_img=False):
        self.dataset_metadata_df = dataset_metadata_df
        self.vocab = vocab
        self.label_converter = LabelConverter(self.vocab)
        self.is_external_img = is_external_img

    def __len__(self):
        return len(self.dataset_metadata_df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_metadata = self.dataset_metadata_df.iloc[idx]
        img_path = img_metadata[0]
        raw_label = img_metadata[1]
        # print("img_path = ", img_path)
        # print("raw_label = ", raw_label)
        image = Image.open(img_path)
        # print("Opened image")
        if self.is_external_img:  # Our external dataset has 4 channels (RGBA) and needs to be converted to RGB
            background = Image.new("RGB", image.size, (255, 255, 255))
            background.paste(image,
                             mask=image.split()[3])  # 3 is the alpha channel
            image = background
        # print("Converted to rgb")
        preprocess = transforms.Compose([
            transforms.Resize(289),
            # transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]),
        ])
        # print("b4 preprocess")
        image = preprocess(image)
        # print("AFTER preprocess")
        label = self.label_converter.encode(raw_label)
        # print("label = ", label)
        return (image, label)
Esempio n. 4
0
import numpy as np
from PIL import Image
import torch
import torchvision.transforms.functional as F
import matplotlib.pyplot as plt
from model import CRNN
import os
from tqdm import tqdm
import glob
from dataset import CaptchaImagesDataset
from utils import LabelConverter
from tqdm import tqdm

if __name__ == '__main__':
    device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
    label_converter = LabelConverter(char_set=string.ascii_lowercase +
                                     string.digits)
    vocab_size = label_converter.get_vocab_size()

    model = CRNN(vocab_size=vocab_size).to(device)
    model.load_state_dict(torch.load('output/weight.pth', map_location=device))
    model.eval()

    correct = 0.0
    image_list = glob.glob('data/CAPTCHA Images/test/*')
    for image in tqdm(image_list):
        ground_truth = image.split('/')[-1].split('.')[0]
        image = Image.open(image).convert('RGB')
        image = F.to_tensor(image).unsqueeze(0).to(device)

        output = model(image)
        encoded_text = output.squeeze().argmax(1)
Esempio n. 5
0
testset_percentage = 0.01

if not isRandomTestSet:
    for input_test_folder in input_test_folders:
        pass

train_data = []
bar = Bar('Processing input folders', max=len(input_train_folders))
for input_train_folder in input_train_folders:
    bar.next()
    labels_paths = glob.glob('{}/labels_voc/*'.format(input_train_folder))
    if len(labels_paths) == 0:
        raise Exception('ARE YOU SURE THERE IS LABELS IN THE FOLDER {}/labels_voc/ ?'.format(input_train_folder))
    bar2 = Bar('Processing labels in {}'.format(input_train_folder), max=len(labels_paths))
    for label_path in labels_paths[:]:
        image_path = LabelConverter.get_image_path_from_label_path(label_path)
        if not verify_image(image_path):
            print("LOL IS BROKEN")
            bar2.next()
            continue
        train_data.append([image_path, label_path])
        bar2.next()
    bar2.finish()
bar.finish()


shuffle(train_data)
if isRandomTestSet:
    test_set = train_data[:int(len(train_data)*testset_percentage)]
    train_set = train_data[int(len(train_data)*testset_percentage):]
print('The dataset has a total of {} images, splitted across {} training images and {} test images'.format(len(train_data), len(train_set), len(test_set)))
Esempio n. 6
0
if __name__ == "__main__":
    # 01 vocab
    # vocab = "01"
    # train_dataset_path = '/Users/tomtalpir/dev/tom/captcha_project/CaptchasRNN/generated_images_1590229754'

    # Digits vocab
    # vocab = string.digits
    # train_dataset_path = '/Users/tomtalpir/dev/tom/captcha_project/CaptchasRNN/generated_images_1591000952'

    vocab = string.ascii_lowercase
    train_dataset_path = '/Users/tomtalpir/dev/tom/captcha_project/CaptchasRNN/local_train_lowercase_ascii'

    # vocab = string.ascii_lowercase + string.digits
    # train_dataset_path = '/Users/tomtalpir/dev/tom/captcha_project/CaptchasRNN/local_train_lowercase_ascii'

    lc = LabelConverter(vocab)

    claptcha_test_dataset_path = '/Users/tomtalpir/dev/tom/captcha_project/CaptchasRNN/claptcha_test'
    claptcha_test_dataset_metadata_df = get_metadata_df(
        claptcha_test_dataset_path)
    claptcha_test_dataset_metadata_df = claptcha_test_dataset_metadata_df.head(
        2)
    claptcha_test_dataset = CaptchaDataset(claptcha_test_dataset_metadata_df,
                                           vocab)
    claptcha_test_dataset_loader = torch.utils.data.DataLoader(
        claptcha_test_dataset,
        batch_size=200,
        shuffle=True,
        collate_fn=custom_collate_func)

    train_dataset_metadata_df = get_metadata_df(train_dataset_path)
Esempio n. 7
0
import xml.etree.ElementTree as ET
import os
import utils.LabelConverter as LabelConverter
from PIL import Image
from progress.bar import Bar

input_test_data = "/media/esteve/1615F2A532ED483C/Ubuntu/ML/fish_dataset/imagenet_dataset/imagenet_split_renamed/images/easy_classes_n01497118_277.jpg 1 0.603365 118 173 376 284 \n/media/esteve/1615F2A532ED483C/Ubuntu/ML/fish_dataset/imagenet_dataset/imagenet_split_renamed/images/easy_classes_n01497118_278.jpg 1 0.807335 89 -8 383 354 \n/media/esteve/1615F2A532ED483C/Ubuntu/ML/fish_dataset/imagenet_dataset/imagenet_split_renamed/images/easy_classes_n01497118_279.jpg 1 0.717453 122 25 438 355 \n/media/esteve/1615F2A532ED483C/Ubuntu/ML/fish_dataset/imagenet_dataset/imagenet_split_renamed/images/easy_classes_n01497118_427.jpg 1 0.971344 -8 6 504 270 \n/media/esteve/1615F2A532ED483C/Ubuntu/ML/fish_dataset/imagenet_dataset/imagenet_split_renamed/images/easy_classes_n01497118_280.jpg 1 0.956263 237 152 489 274 \n/media/esteve/1615F2A532ED483C/Ubuntu/ML/fish_dataset/imagenet_dataset/imagenet_split_renamed/images/easy_classes_n01497118_280.jpg 1 0.798489 49 79 418 199"

input_file = '/media/esteve/1615F2A532ED483C/Ubuntu/ML/fish_dataset/imagenet_dataset/imagenet_split_renamed/out.txt'

input_data = open(input_file, 'r').read().split('\n')
bar = Bar('Creating xml files...', max=len(input_data))
for line in input_data[:]:
    bar.next()
    data = line.split(' ')
    label_file_path = LabelConverter.get_label_path_from_image_path(data[0])
    try:
        tree = ET.parse(label_file_path)
        root = tree.getroot()
    except FileNotFoundError:
        root = ET.Element('annotation')
        tree = ET.ElementTree(root)

        folder = ET.SubElement(root, 'folder')
        folder.text = '/'.join(label_file_path.split('/')[:-1])

        filename = ET.SubElement(root, 'filename')
        filename.text = label_file_path.split('/')[-1]

        source = ET.SubElement(root, 'source')
        ET.SubElement(source, 'database').text = "Selflabeled"
Esempio n. 8
0
def train_val(
        train_im_dir='data/train',
        val_im_dir='data/train',  # data path configs
        norm_height=32,
        norm_width=128,  # image normalization configs
        n_epochs=20,
        batch_size=4,
        lr=1e-4,  # training configs
        model_save_epoch=5,
        model_save_dir='models',  # model saving configs
        load_pretrain=False,
        pretrain_path=None,  # pretrained model configs
        device='cpu'):
    '''
    The main training procedure
    ----------------------------
    :param train_im_dir: path to directory with training images and ground-truth file
    :param val_im_dir: path to directory with validation images and ground-truth file
    :param norm_height: image normalization height
    :param norm_width: image normalization width
    :param n_epochs: number of training epochs
    :param batch_size: training and validation batch size
    :param lr: learning rate
    :param model_save_epoch: save model after each {model_save_epoch} epochs
    :param model_save_dir: path to save the model
    :param load_pretrain: whether to load a pretrained model
    :param pretrain_path: path of the pretrained model
    :param device: 'cpu' or 'cuda'
    '''

    # step 1: initialize training and validation data loaders
    #         please see ListDataset and dataLoader (line 19 and line 92) in utils.py for details
    trainloader = dataLoader(train_im_dir,
                             norm_height,
                             norm_width,
                             batch_size,
                             training=True)
    valloader = dataLoader(val_im_dir,
                           norm_height,
                           norm_width,
                           batch_size,
                           training=False)

    # step 2: initialize the label converter
    #         please see LabelConverter (line 112) in utils.py for details
    label_converter = LabelConverter()

    # step 3: initialize the model
    model = CRNN()
    model = model.to(device)
    if load_pretrain:
        try:
            checkpoint = torch.load(
                pretrain_path,
                map_location=torch.device('cuda')
                if torch.cuda.is_available() else torch.device('cpu'))
            model.load_state_dict(checkpoint['state_dict'])
            print(f'[Info] load pretrained model from {pretrain_path}')
        except Exception as e:
            print(
                f'[Warning] load pretrain model failed, the reason is:\n    {e}'
            )
            print('[Warning] the model will be trained from scratch!')

    # step 4: define CTC loss function and optimizer
    # -- CTC loss function in PyTorch is nn.CTCLoss()
    #    note that the first input of nn.CTCLoss() is logarithmized probabilities
    #    please refer to the following document to look up its usage
    #    https://pytorch.org/docs/stable/generated/torch.nn.CTCLoss.html#torch.nn.CTCLoss
    criterion = nn.CTCLoss()
    optimizer = optim.Adam(model.parameters(), lr)

    # step 5: training & validation

    # two lists to save training loss and validation accuracy for each epoch
    losses, accuracies = [], []

    for epoch in range(n_epochs):
        # train
        print('\nEpoch [{}/{}] start ...'.format(epoch + 1, n_epochs))
        train_loss = train_one_epoch(model, trainloader, optimizer, criterion,
                                     label_converter, device)
        losses.append(train_loss)

        # validation
        accuracy = val_one_epoch(model, valloader, label_converter, device)
        accuracies.append(accuracy)

        # show information of the epoch
        print('train loss = {:.3f}, validation word accuracy = {:.1f}%'.format(
            train_loss, 100 * accuracy))

        # save model
        if (epoch + 1) % model_save_epoch == 0:
            model_save_path = os.path.join(
                model_save_dir, 'model_epoch{}.pth'.format(epoch + 1))
            torch.save({'state_dict': model.state_dict()}, model_save_path)
            print('[Info] model saved in {}'.format(model_save_path))

    # draw the loss and accuracy curve
    plot_loss_and_accuracies(losses, accuracies)
Esempio n. 9
0
 def __init__(self, dataset_metadata_df, vocab, is_external_img=False):
     self.dataset_metadata_df = dataset_metadata_df
     self.vocab = vocab
     self.label_converter = LabelConverter(self.vocab)
     self.is_external_img = is_external_img
Esempio n. 10
0
def main():
    """ main function """
    # define some command line arguments
    parser = argparse.ArgumentParser(
        description='Todo Bicig handwritten text recognition')
    parser.add_argument('--train', action='store_true', help='train the NN')
    parser.add_argument('--validate',
                        action='store_true',
                        help='validate the NN')

    args = parser.parse_args()

    dataset = TodoDataset(Params.dataset_path, Params.image_size)
    converter = LabelConverter(dataset.char_set)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    if args.train or args.validate:

        #train_loss = []
        #val_loss = []
        #char_error = []
        #word_acc = []

        if args.train:

            # split on train and validation sets
            train_size = int(0.8 * len(dataset))
            val_size = len(dataset) - train_size

            train_set, val_set = random_split(dataset, [train_size, val_size])
            train_dataloader = DataLoader(train_set,
                                          batch_size=32,
                                          shuffle=True,
                                          num_workers=10)
            val_dataloader = DataLoader(val_set,
                                        batch_size=32,
                                        shuffle=True,
                                        num_workers=10)

            # training model
            model = Model(1024, len(dataset.char_set) + 1)
            loss = torch.nn.CTCLoss()
            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=1.0e-4,
                                         amsgrad=True)
            scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                        step_size=30,
                                                        gamma=0.1)

            #train_loss, val_loss, char_error, word_acc =
            train_model(model=model,
                        loss=loss,
                        optimizer=optimizer,
                        scheduler=scheduler,
                        num_epochs=70,
                        train_dataloader=train_dataloader,
                        val_dataloader=val_dataloader,
                        converter=converter)

            torch.save(model.state_dict(), 'model/model.pth')

        if args.validate:

            # use all dataset
            val_dataloader = DataLoader(dataset,
                                        batch_size=32,
                                        shuffle=True,
                                        num_workers=10)
            loss = torch.nn.CTCLoss()

            model = Model(1024, len(dataset.char_set) + 1)
            model.load_state_dict(
                torch.load('model/model.pth', map_location=device))

            #val_loss, char_error_rate, word_accuracy =
            validate_model(model, loss, val_dataloader, converter)

    else:
        model = Model(1024, len(dataset.char_set) + 1)
        model.load_state_dict(
            torch.load('model/model.pth', map_location=device))

        image = ImagePreprocess().resize_image(
            cv.imread(Params.test_image, cv.IMREAD_GRAYSCALE),
            Params.image_size)
        result = recognize(model, image, converter)

        print(result)