Ejemplo n.º 1
0
def train():
    print('start training ...........')
    batch_size = 16
    num_epochs = 50
    learning_rate = 0.1

    label_converter = LabelConverter(char_set=string.ascii_lowercase + string.digits)
    vocab_size = label_converter.get_vocab_size()

    device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
    model = CRNN(vocab_size=vocab_size).to(device)
    # model.load_state_dict(torch.load('output/weight.pth', map_location=device))

    train_loader, val_loader = get_loader('data/CAPTCHA Images/', batch_size=batch_size)

    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, nesterov=True)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
    # scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 10, 2)

    train_losses, val_losses = [], []
    for epoch in range(num_epochs):
        train_epoch_loss = fit(epoch, model, optimizer, label_converter, device, train_loader, phase='training')
        val_epoch_loss = fit(epoch, model, optimizer, label_converter, device, val_loader, phase='validation')
        print('-----------------------------------------')

        if epoch == 0 or val_epoch_loss <= np.min(val_losses):
            torch.save(model.state_dict(), 'output/weight.pth')

        train_losses.append(train_epoch_loss)
        val_losses.append(val_epoch_loss)

        write_figure('output', train_losses, val_losses)
        write_log('output', epoch, train_epoch_loss, val_epoch_loss)

        scheduler.step(val_epoch_loss)
Ejemplo n.º 2
0
def test_model():
    model = CRNN(32, 1, 53, 256)
    inp = torch.randn((1, 1,32, 100))

    out = model.forward(inp)
    print(out.shape)
    
Ejemplo n.º 3
0
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    args = parse_args()

    train_dataset = DenoisingDataset(args.train_dir)
    val_dataset = DenoisingDataset(args.val_dir)

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True)
    val_loader = DataLoader(val_dataset,
                            batch_size=args.batch_size,
                            shuffle=False)

    model = CRNN().to(device)
    train_model(model,
                train_loader,
                val_loader,
                args.epochs,
                args.learning_rate,
                device,
                log=True)

    model_dir = os.path.dirname(args.model_save_path)
    if not os.path.exists(model_dir) and model_dir:
        os.mkdir(model_dir)
    torch.save(model.state_dict(),
               args.model_save_path,
               _use_new_zipfile_serialization=False)
Ejemplo n.º 4
0
def main():

    params = _get_args()
    cfg = read_config(params.config)

    with tf.device(params.device):

        model = CRNN(cfg)
        model.train()
Ejemplo n.º 5
0
def main():

    params = _get_args()
    cfg = read_config(params.config)

    with tf.device(params.device):

        model = CRNN(cfg, pretrained=True)
        preds, labels, cer, macc = model.evaluate(params.dataset)
        print(preds, labels, cer, macc)
Ejemplo n.º 6
0
def load_model(model_path):
    with tf.Graph().as_default():
        inputs = tf.placeholder(tf.float32, [1, 32, None, 3])
        crnn = CRNN(inputs)
        seq_len = tf.placeholder(tf.int32, [None], name='seq_len')
        logits = tf.reshape(crnn, [-1, 512])  # (batchsizex(width/32))*512
        W = tf.Variable(tf.truncated_normal([512, 7000], stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0., shape=[7000]), name="b")
        logits = tf.matmul(logits, W) + b
        logits = tf.reshape(logits, [1, -1, 7000], name="reshape_log")
        logits = tf.transpose(logits, (1, 0, 2), name="final_log")
        decoded, log_prob = tf.nn.ctc_beam_search_decoder(logits,
                                                          seq_len,
                                                          top_paths=1,
                                                          merge_repeated=False)
        saver = tf.train.Saver(tf.global_variables())
        config = tf.ConfigProto(allow_soft_placement=True)
        sess = tf.Session(config=config)
        ckpt = tf.train.get_checkpoint_state(model_path)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            print("load success")
        else:
            print("no such file")
            return
        return sess, decoded, inputs, seq_len
def main(argv=None):
	if not os.path.exists(Constants.MODEL_DIR):
	    os.makedirs(Constants.MODEL_DIR)
	if not os.path.exists(Constants.TENSORBOARD_DIR):
	    os.makedirs(Constants.TENSORBOARD_DIR)

	with open(Constants.CHARLIST_FILE, "rb") as fp:
		charList = pickle.load(fp)
	lenCharList = len(charList)

	with tf.device("CPU:0"):
		train_ds, train_image_count = create_datasets(Constants.TRAIN_TFRECORD)
		val_ds, val_image_count = create_datasets(Constants.VAL_TFRECORDS)

	train_batches = int(np.floor(train_image_count/Constants.BATCH_SIZE))
	val_batches = int(np.floor(val_image_count/Constants.BATCH_SIZE))

	model = CRNN(lenCharList)

	global_step_op = tf.Variable(0)
	starter_learning_rate = 0.1
	learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step_op, decay_steps=10000, decay_rate=0.1, staircase=False)
	optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate)

	epoch = 1
	summary_writer = tf.contrib.summary.create_file_writer(Constants.TENSORBOARD_DIR, flush_millis=10000)
	with summary_writer.as_default(), tf.contrib.summary.always_record_summaries():
		while True:
			print("Epoch "+str(epoch))
			loss = train_on_batch(model, train_ds, train_batches, charList, optimizer)
			images, recognized, charErrorRate, wordAccuracy = validate_on_batch(model, val_ds, val_batches, charList, epoch)
			if charErrorRate < 15:
				save_model(model, epoch)
			write_to_tensorboard(epoch, images, recognized, loss, charErrorRate, wordAccuracy)
			epoch += 1
Ejemplo n.º 8
0
def main():
    if torch.cuda.is_available():
        torch.set_default_tensor_type('torch.cuda.FloatTensor')

    parser = argparse.ArgumentParser(prog='train',
                                     description="""Script to train the DOA estimation system""")
    parser.add_argument("--input", "-i", default="data", help="Directory where data and labels are", type=str)
    parser.add_argument("--savedir", "-s", default=".", help="Directory to write results", type=str)
    parser.add_argument("--rate", "-r", type=float, default=None, help="Choose a learning rate, default to sweep")
    parser.add_argument("--batchsize", "-b", type=int, default=None, help="Choose a batchsize, default to sweep")
    parser.add_argument("--epochs", "-e", type=int, default=10, help="Number of epochs")
    parser.add_argument("--dropout", "-dp", type=float, default=0., help="Specify dropout rate")
    # parser.add_argument("--input_dropout", "-id", type=float, default=0., help="Specify input dropout rate")
    # parser.add_argument("--conv_dropout", "-cd", type=float, default=0., help="Specify conv dropout rate (applied at all layers)")
    # parser.add_argument("--lstm_dropout", "-ld", type=float, default=0., help="Specify lstm dropout rate (applied to lstm output)")
    parser.add_argument("--model", "-m", type=str, choices=["CNN", "CRNN"], required=True, help="Choose network model")
    parser.add_argument("--outputformulation", "-of", type=str, choices=["Reg", "Class"], required=True, help="Choose output formulation")
    parser.add_argument("--lstmout", "-lo", type=str, choices=[LSTM_FULL, LSTM_FIRST, LSTM_LAST], required=False, default=LSTM_FULL, help="Choose what to use from LSTM ouput")
    args = parser.parse_args()

    # dropouts = Dropouts(args.input_dropout, args.conv_dropout, args.lstm_dropout)
    dropouts = Dropouts(args.dropout, args.dropout, args.dropout)
    rates = [1e-5, 1e-7, 1e-3, 1e-9, 1e-1] if not args.rate else [args.rate]
    batches = [128, 32, 64] if not args.batchsize else [args.batchsize]

    for learning_rate in rates:
        for batch_size in batches:
            # dir to store the experiment files
            results_dir = os.path.join(args.savedir, \
                "results" + '_{}'.format(args.model) + '_{}'.format(args.outputformulation) + \
                '_lr{}'.format(learning_rate) + '_bs{}'.format(batch_size) + '_drop{}'.format(args.dropout))
            print('writing results to {}'.format(results_dir))

            doa_classes = None
            if args.outputformulation == "Reg":
                loss = nn.MSELoss(reduction='sum')
                output_dimension = 3
            elif args.outputformulation == "Class":
                loss = nn.CrossEntropyLoss(reduction="sum")
                doa_classes = DoaClasses()
                output_dimension = len(doa_classes.classes)

            if args.model == "CNN":
                model_choice = ConvNet(device, dropouts, output_dimension, doa_classes).to(device)
            elif args.model == "CRNN":
                model_choice = CRNN(device, dropouts, output_dimension, doa_classes, args.lstmout).to(device)

            config = Config(data_folder=args.input,\
                            learning_rate=learning_rate,\
                            batch_size=batch_size,\
                            num_epochs=args.epochs,\
                            test_to_all_ratio=0.1,\
                            results_dir=results_dir,\
                            model=model_choice,\
                            loss_criterion=loss,\
                            doa_classes=doa_classes,\
                            lstm_output=args.lstmout,\
                            shuffle=True)
            doa_train(config)
    def __init__(self, args):
        self.args = args

        self.dataset = DataSet(self.args.dataset, self.args.batch_size)
        self.model = CRNN(args.lr, self.dataset.img_width, self.dataset.img_height, self.dataset.total_labels)
        
        # restoring the model weights        
        if self.args.resume is not None:
            print(f"Resuming from checkpoint: {self.args.resume}")
            self.model.load_weights(self.args.resume)

        # inverse chord map for single character -> name used in decode chord
        self.inverse_chord_map = {
            "Δ":"major",
            "M": "major",
            "m": "minor",
            "+": "augmented",
            "-" : "diminished",
            "o": "diminished ",
            "ø": "half diminished",
            "#": "sharp",
            "b": "bimol",
            # "<"   : "mychord",
        }
Ejemplo n.º 10
0
def inference_model(network,lstm_out,out_format,model_path):
  doa_classes = DoaClasses()
  if out_format == "cartesian":
    out_dim = 3
  elif out_format == "class":
    out_dim = len(doa_classes.classes)
  
  if network == "CNN":
    model = ConvNet(device, Dropouts(0,0,0), out_dim, doa_classes)
  elif network == "CRNN":
    model = CRNN(device, Dropouts(0,0,0), out_dim, doa_classes, lstm_out)
  model.load_state_dict(torch.load(model_path,map_location=device))
  model.eval()
  model.to(device)
  
  return model,doa_classes
Ejemplo n.º 11
0
def create_model(formulation, model):
    dropouts = Dropouts(0, 0, 0)
    doa_classes = None
    if formulation == "Reg":
        loss = nn.MSELoss(reduction='sum')
        output_dimension = 3        
    elif formulation == "Class":
        loss = nn.CrossEntropyLoss(reduction="sum")
        doa_classes = DoaClasses()
        output_dimension = len(doa_classes.classes)

    if model == "CNN":
        model_choice = ConvNet(device, dropouts, output_dimension, doa_classes).to(device)
    elif model == "CRNN":
        model_choice = CRNN(device, dropouts, output_dimension, doa_classes, "Full").to(device)
    return model_choice
Ejemplo n.º 12
0
def eval_model():
    model = CRNN()
    model.load_state_dict(torch.load('./model_EEG.pt'))
    # specify the target classes
    classes = ('True', 'False')

    # track test loss
    test_loss = 0.0
    class_correct = list(0. for i in range(num_classes))
    class_total = list(0. for i in range(num_classes))

    model.eval()
    with torch.no_grad():
        for data, target in testloader:
            data, target = data, target
            target = target.long()
            output, _ = model(data)
            #print(output.data)
            # convert output probabilities to predicted class
            _, pred = torch.max(output, 1)
            # print(pred)
            # compare predictions to true label
            correct = (pred == target).squeeze()
            for i, label in enumerate(target):
                class_correct[label] += correct[i].item()
                class_total[label] += 1
        for i in range(len(classes)):
            print('Accuracy of %s : %2d%% out of %d cases' %
                  (classes[i], 100 * class_correct[i] / class_total[i], class_total[i]))

        data = next(iter(testloader))
        inputs, targets = data
        inputs = inputs
        targets = targets
        targets = targets.long()
        outputs, _ = model(inputs)
        probability, predicted = torch.max(outputs.data, 1)
        c = (predicted == targets).squeeze()

        eval_metrics = pd.DataFrame(np.empty([2, 4]))
        eval_metrics.index = ["baseline"] + ['RNN']
        eval_metrics.columns = ["Accuracy", "ROC AUC", "PR AUC", "Log Loss"]
        pred = np.repeat(0, len(y_test.cpu()))
        pred_proba = np.repeat(0.5, len(y_test.cpu()))
        eval_metrics.iloc[0, 0] = accuracy_score(y_test.cpu(), pred)
        eval_metrics.iloc[0, 1] = roc_auc_score(y_test.cpu(), pred_proba)
        eval_metrics.iloc[0, 2] = average_precision_score(y_test.cpu(), pred_proba)
        eval_metrics.iloc[0, 3] = log_loss(y_test.cpu(), pred_proba)
        eval_metrics.iloc[1, 0] = accuracy_score(y_test.cpu(), predicted.cpu())
        eval_metrics.iloc[1, 1] = roc_auc_score(y_test.cpu(), probability.cpu())
        eval_metrics.iloc[1, 2] = average_precision_score(y_test.cpu(), probability.cpu())
        eval_metrics.iloc[1, 3] = 0  # log_loss(y_test.cpu(), pred_proba[:, 1])

        print(eval_metrics)
Ejemplo n.º 13
0
def main():
    eval_batch_size = config["eval_batch_size"]
    cpu_workers = config["cpu_workers"]
    reload_checkpoint = config["reload_checkpoint"]

    img_height = config["img_height"]
    img_width = config["img_width"]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"device: {device}")

    test_dataset = Synth90kDataset(
        root_dir=config["data_dir"],
        mode="test",
        img_height=img_height,
        img_width=img_width,
    )

    test_loader = DataLoader(
        dataset=test_dataset,
        batch_size=eval_batch_size,
        shuffle=False,
        num_workers=cpu_workers,
        collate_fn=synth90k_collate_fn,
    )

    num_class = len(Synth90kDataset.LABEL2CHAR) + 1
    crnn = CRNN(
        1,
        img_height,
        img_width,
        num_class,
        map_to_seq_hidden=config["map_to_seq_hidden"],
        rnn_hidden=config["rnn_hidden"],
        leaky_relu=config["leaky_relu"],
    )
    crnn.load_state_dict(torch.load(reload_checkpoint, map_location=device))
    crnn.to(device)

    criterion = CTCLoss(reduction="sum")
    criterion.to(device)

    evaluation = evaluate(
        crnn,
        test_loader,
        criterion,
        decode_method=config["decode_method"],
        beam_size=config["beam_size"],
    )
    print("test_evaluation: loss={loss}, acc={acc}".format(**evaluation))
Ejemplo n.º 14
0
def black_box_function(lr_pow):
    learning_rate = 10.0**lr_pow
    results_dir = os.path.join(
        savedir, "results" + '_{}'.format(modelname) +
        '_{}'.format(args.outputformulation) + '_lr{}'.format(learning_rate) +
        '_bs{}'.format(batch_size) + '_drop{}'.format(dropout))
    print('writing results to {}'.format(results_dir))

    dropouts = Dropouts(dropout, dropout, dropout)
    doa_classes = None
    if outputformulation == "Reg":
        loss = nn.MSELoss(reduction='sum')
        output_dimension = 3
    elif outputformulation == "Class":
        loss = nn.CrossEntropyLoss()
        doa_classes = DoaClasses()
        output_dimension = len(doa_classes.classes)

    if modelname == "CNN":
        model_choice = ConvNet(device, dropouts, output_dimension,
                               doa_classes).to(device)
    elif modelname == "CRNN":
        model_choice = CRNN(device, dropouts, output_dimension, doa_classes,
                            lstmout).to(device)

    config = TrainConfig() \
        .set_data_folder(inputdir) \
        .set_learning_rate(learning_rate) \
        .set_batch_size(batch_size) \
        .set_num_epochs(epochs) \
        .set_test_to_all_ratio(0.1) \
        .set_results_dir(results_dir) \
        .set_model(model_choice) \
        .set_loss_criterion(loss) \
        .set_doa_classes(doa_classes) \
        .set_lstm_output(lstmout)
    # negative sign for minimization
    return -doa_train(config)
Ejemplo n.º 15
0
    def __init__(self):
        super(EnsembleNetwork, self).__init__()

        # Init dual class classifier
        self.resnet = Resnet()
        self.crnn = CRNN()
        self.unet = UNet()

        # Init one class classifier
        self.deep_sad_normal = LG_1DCNN()
        self.deep_sad_abnormal = LG_1DCNN()

        # Init models list
        self.models = [self.resnet, self.crnn, self.unet, self.deep_sad_normal, self.deep_sad_abnormal]

        # Load weights for non-anomaly detectors
        self.resnet.load_state_dict(torch.load('/workspace/jinsung/resnet_final-Copy1js.pt'))
        #self.crnn.load_state_dict(torch.load('/workspace/demon/crnn_random700_spectrogram.pt'))
        #self.unet.load_state_dict(torch.load('/workspace/demon/unet_random700_spectrogram.pt'))

        # Load DeepSAD Normal
        model_dict_normal = torch.load('/workspace/demon/deepSAD_1117_7k_10ep_64batch_normal_flip.tar')
        self.c_normal = model_dict_normal["c"]
        self.deep_sad_normal.load_state_dict(model_dict_normal["net_dict"])

        # Load DeepSAD Abnormal
        model_dict_abnormal = torch.load('/workspace/demon/deepSADModel_7k_10ep_64batch_abnormal.tar')
        self.c_abnormal = model_dict_abnormal["c"]
        self.deep_sad_abnormal.load_state_dict(model_dict_abnormal["net_dict"])

        # Load on CUDA and freeze parameter values
        for model in self.models:
            model.to('cuda')
            model.eval()
            for param in model.parameters():
                param.requires_grad_(False)
Ejemplo n.º 16
0
  def _multi_models(self, **kwargs):
    models = []
    ## Calculate the gradients for each model tower.
    tower_losses = []
    tower_grads = []
    with tf.variable_scope(tf.get_variable_scope()) as scope: 
      for i in xrange(self._n_gpus):
        with tf.device('/gpu:%d' % i):
          crnn = CRNN(**kwargs)
          tf.get_variable_scope().reuse_variables()
          ## Calculate the gradients for the batch of data on this CIFAR tower.
          grads = self._optimizer.compute_gradients(crnn.loss)
          models.append(crnn)
          tower_losses.append(crnn.loss) 
          ## Keep track of the gradients across all towers.
          tower_grads.append(grads)

    loss = tf.reduce_mean(tower_losses) 
    # We must calculate the mean of each gradient. Note that this is the
    # synchronization point across all towers.
    grads = self._average_gradients(tower_grads)
    # Apply the gradients to adjust the shared variables.
    apply_grad = self._optimizer.apply_gradients(grads) 
    return models, apply_grad, loss  
Ejemplo n.º 17
0
def main():
    crnn = CRNN(is_train=True)

    ada = Adadelta()
    crnn.model.compile(loss=lambda y_true, y_pred: y_pred, optimizer=ada)

    batch_size = 64
    train_gen = DataGenerator(data_path=TRAIN_DIR_NAME, batch_size=batch_size)

    os.system('mkdir -p models')
    early_stop = EarlyStopping(monitor='loss',
                               min_delta=0.01,
                               patience=8,
                               mode='min',
                               verbose=1)
    checkpoint = ModelCheckpoint(
        filepath='models/model_curr_{epoch:02d}_{loss:.3f}.h5',
        monitor='loss',
        verbose=1,
        mode='min',
        period=1,
        save_weights_only=True)

    # load previous checkpoints
    crnn.model.load_weights('models/model_synth_2_35.h5')

    crnn.model.fit_generator(
        generator=train_gen,
        steps_per_epoch=len(train_gen.image_paths) // batch_size,
        epochs=100,
        callbacks=[checkpoint, early_stop],
        workers=16,
        use_multiprocessing=True,
        max_queue_size=10,
        verbose=1,
    )
Ejemplo n.º 18
0
def main():
    eval_batch_size = config['eval_batch_size']
    cpu_workers = config['cpu_workers']
    reload_checkpoint = config['reload_checkpoint']

    img_height = config['img_height']
    img_width = config['img_width']

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f'device: {device}')

    test_dataset = Synth90kDataset(root_dir=config['data_dir'],
                                   mode='test',
                                   img_height=img_height,
                                   img_width=img_width)

    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=eval_batch_size,
                             shuffle=False,
                             num_workers=cpu_workers,
                             collate_fn=synth90k_collate_fn)

    num_class = len(Synth90kDataset.LABEL2CHAR) + 1
    crnn = CRNN(1,
                img_height,
                img_width,
                num_class,
                map_to_seq_hidden=config['map_to_seq_hidden'],
                rnn_hidden=config['rnn_hidden'],
                leaky_relu=config['leaky_relu'])
    crnn.load_state_dict(torch.load(reload_checkpoint, map_location=device))
    crnn.to(device)

    criterion = CTCLoss(reduction='sum')
    criterion.to(device)

    evaluation = evaluate(crnn,
                          test_loader,
                          criterion,
                          decode_method=config['decode_method'],
                          beam_size=config['beam_size'])
    print('test_evaluation: loss={loss}, acc={acc}'.format(**evaluation))
Ejemplo n.º 19
0
def main():
    arguments = docopt(__doc__)

    images = arguments['IMAGE']
    reload_checkpoint = arguments['-m']
    batch_size = int(arguments['-s'])
    decode_method = arguments['-d']
    beam_size = int(arguments['-b'])

    img_height = config['img_height']
    img_width = config['img_width']

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f'device: {device}')

    predict_dataset = Synth90kDataset(paths=images,
                                      img_height=img_height,
                                      img_width=img_width)

    predict_loader = DataLoader(dataset=predict_dataset,
                                batch_size=batch_size,
                                shuffle=False)

    num_class = len(Synth90kDataset.LABEL2CHAR) + 1
    crnn = CRNN(1,
                img_height,
                img_width,
                num_class,
                map_to_seq_hidden=config['map_to_seq_hidden'],
                rnn_hidden=config['rnn_hidden'],
                leaky_relu=config['leaky_relu'])
    crnn.load_state_dict(torch.load(reload_checkpoint, map_location=device))
    crnn.to(device)

    preds = predict(crnn,
                    predict_loader,
                    Synth90kDataset.LABEL2CHAR,
                    decode_method=decode_method,
                    beam_size=beam_size)

    show_result(images, preds)
Ejemplo n.º 20
0
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    args = parse_args()

    noisy_mel = np.load(args.path_to_file)
    h, w = noisy_mel.shape
    noisy_mel = pad_mel_spectogram(noisy_mel)
    noisy_mel = torch.tensor(noisy_mel, dtype=torch.float32)
    noisy_mel = noisy_mel.unsqueeze(0)

    model = CRNN().to(device)
    model.load_state_dict(torch.load(args.path_to_model, map_location=device))
    model.eval()

    clean_mel = model(noisy_mel)
    clean_mel = clean_mel.squeeze(0)
    clean_mel = clean_mel.data.cpu().numpy()
    clean_mel = clean_mel[:h]

    save_dir = os.path.dirname(args.path_to_save)
    if save_dir and not os.path.exists(save_dir):
        os.mkdir(save_dir)
    np.save(args.path_to_save, clean_mel)
Ejemplo n.º 21
0
        pass


gendata = generatorData()

train_datas, train_labels = gendata.gen_Data(batch_size=60)
test_datas, test_labels = gendata.gen_Data(batch_size=40)

train_datas_shapes = np.shape(train_datas)

train_labels_s = np.reshape(train_labels, (int(len(train_labels) / 4), 4))
train_hot = to_categorical(train_labels_s)

input_shapes = np.shape(train_datas)[1:]

model = CRNN(input_shapes).CRNN_model

model.summary()

model.compile(loss={
    'ctc': lambda train_hot, y_pred: y_pred
},
              optimizer=RMSprop())

history = model.fit(
    [train_datas, train_hot,
     np.ones(1) * 15, np.ones(1) * 37],
    np.ones(1),
    batch_size=10,
    epochs=100,
    verbose=2)
Ejemplo n.º 22
0
#!/usr/bin/env python
# encoding: utf-8
'''
@author: Li Huan
@contact: [email protected]
@file: train.py
@time: 2019/5/6 14:00
@desc:
'''
from util import data_generater_test
from model import CRNN
import os
import string
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
in_put = (32,100)
content = list(string.digits)+list(string.ascii_lowercase)
data = data_generater_test(content,batch_size=128,input_shape = in_put)
class_numbers = data.class_numbers
im_shape = data.im_shape
print('training model on {} samples'.format(data.lenth))
model = CRNN(in_put,class_numbers,batch_size = 128)
model.train(data,content,epoch=60)
Ejemplo n.º 23
0
import cv2
from torchvision import transforms
import torch
from torch.autograd import Variable
from dataset import LabelConverter, Rescale, Normalize

from model import CRNN

IMAGE_HEIGHT = 32

model_path = './ocr-model/crnn_address.pth'
img_path = './ocr_address.jpg'
# alphabet = '0123456789X'
alphabet = alphabet = ''.join(json.load(open('./cn-alphabet.json', 'rb')))

model = CRNN(IMAGE_HEIGHT, 1, len(alphabet) + 1, 256)
if torch.cuda.is_available():
    model = model.cuda()
print('loading pretrained model from %s' % model_path)
model.load_state_dict(torch.load(model_path))

converter = LabelConverter(alphabet)

image_transform = transforms.Compose(
    [Rescale(IMAGE_HEIGHT),
     transforms.ToTensor(),
     Normalize()])
image = cv2.imread(img_path, 0)
image = image_transform(image)
if torch.cuda.is_available():
    image = image.cuda()
Ejemplo n.º 24
0
def train(field):
    alphabet = ''.join(json.load(open('./cn-alphabet.json', 'rb')))
    nclass = len(alphabet) + 1  # add the dash -
    batch_size = BATCH_SIZE
    if field == 'address' or field == 'psb':
        batch_size = 1  # image length varies

    converter = LabelConverter(alphabet)
    criterion = CTCLoss(zero_infinity=True)

    crnn = CRNN(IMAGE_HEIGHT, nc, nclass, number_hidden)
    crnn.apply(weights_init)

    image_transform = transforms.Compose([
        Rescale(IMAGE_HEIGHT),
        transforms.ToTensor(),
        Normalize()
    ])

    dataset = LmdbDataset(db_path, field, image_transform)
    dataloader = DataLoader(dataset, batch_size=batch_size,
                            shuffle=True, num_workers=4)

    image = torch.FloatTensor(batch_size, 3, IMAGE_HEIGHT, IMAGE_HEIGHT)
    text = torch.IntTensor(batch_size * 5)
    length = torch.IntTensor(batch_size)

    image = Variable(image)
    text = Variable(text)
    length = Variable(length)

    loss_avg = utils.averager()
    optimizer = optim.RMSprop(crnn.parameters(), lr=lr)

    if torch.cuda.is_available():
        crnn.cuda()
        crnn = nn.DataParallel(crnn)
        image = image.cuda()
        criterion = criterion.cuda()

    def train_batch(net, iteration):
        data = iteration.next()
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.load_data(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.load_data(text, t)
        utils.load_data(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        crnn.zero_grad()
        cost.backward()
        optimizer.step()
        return cost

    nepoch = 25
    for epoch in range(nepoch):
        train_iter = iter(dataloader)
        i = 0
        while i < len(dataloader):
            for p in crnn.parameters():
                p.requires_grad = True
            crnn.train()

            cost = train_batch(crnn, train_iter)
            loss_avg.add(cost)
            i += 1

            if i % 500 == 0:
                print('%s [%d/%d][%d/%d] Loss: %f' %
                        (datetime.datetime.now(), epoch, nepoch, i, len(dataloader), loss_avg.val()))
                loss_avg.reset()

            # do checkpointing
            if i % 500 == 0:
                torch.save(
                    crnn.state_dict(), f'{model_path}crnn_{field}_{epoch}_{i}.pth')
Ejemplo n.º 25
0
import matplotlib.pyplot as plt
from model import CRNN
import os
from tqdm import tqdm
import glob
from dataset import CaptchaImagesDataset
from utils import LabelConverter
from tqdm import tqdm

if __name__ == '__main__':
    device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
    label_converter = LabelConverter(char_set=string.ascii_lowercase +
                                     string.digits)
    vocab_size = label_converter.get_vocab_size()

    model = CRNN(vocab_size=vocab_size).to(device)
    model.load_state_dict(torch.load('output/weight.pth', map_location=device))
    model.eval()

    correct = 0.0
    image_list = glob.glob('data/CAPTCHA Images/test/*')
    for image in tqdm(image_list):
        ground_truth = image.split('/')[-1].split('.')[0]
        image = Image.open(image).convert('RGB')
        image = F.to_tensor(image).unsqueeze(0).to(device)

        output = model(image)
        encoded_text = output.squeeze().argmax(1)
        decoded_text = label_converter.decode(encoded_text)

        if ground_truth == decoded_text:
Ejemplo n.º 26
0
import numpy as np
from model import CRNN

train_X = np.loadtxt("data/train_sequence.csv", delimiter=",")
train_Y = np.loadtxt("data/train_label.csv", delimiter=",")

# hyper-parameters
input_size = 12
number_filter = 12
output_size = 12
rate_drop_dense = 0
validation_split_ratio = 0.1

cnn = CRNN(input_size, number_filter, output_size, rate_drop_dense,
           validation_split_ratio)

best_model_path = cnn.train_model(train_X, train_Y, model_save_directory='./')
Ejemplo n.º 27
0
import torch.nn.functional as F

device = 'cuda' if torch.cuda.is_available() else 'cpu'
parser = argparse.ArgumentParser(description='LID testing script for single audio')
parser.add_argument('-i', type=str, help='path to input audio', required=True)
parser.add_argument('-o', type=str, help='path to output result file', default='./result.txt')
args = parser.parse_args()

out_f = open(args.o, 'w')


checkpoint = torch.load('model_saves/vgg_lstm_subset_newloader/best.pth')
model = CRNN(hidden_size=checkpoint['hidden_size'], 
		only_cnn=checkpoint['only_cnn'], 
		cnn_type=checkpoint['cnn_type'], 
		recurrent_type=checkpoint['recurrent_type'],
		lstm_layers=checkpoint['lstm_layers'],
		nheads=checkpoint['nheads'], 
		nlayers=checkpoint['nlayers'],
		input_shape=checkpoint['input_shape']).double().to(device)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

audio, sample_rate = torchaudio.load(args.i)
assert(sample_rate == 8000)

audio = audio.unsqueeze(0)
audio = audio.double().to(device)
with torch.no_grad():
	pred = model(audio)
probs = F.softmax(pred, dim=1)
print(probs)
Ejemplo n.º 28
0
if __name__ == "__main__":
    dataloader = OCRDataLoader(args.annotation_paths, 
                               args.image_height, 
                               args.image_width, 
                               table_path=args.table_path,
                               blank_index=BLANK_INDEX,
                               shuffle=True, 
                               batch_size=args.batch_size)
    print("Num of eval samples: {}".format(len(dataloader)))
    print("Num of classes: {}".format(NUM_CLASSES))
    print("Blank index is {}".format(BLANK_INDEX))
    localtime = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
    print("Start at {}".format(localtime))
    
    model = CRNN(NUM_CLASSES, args.backbone)
    model.summary()

    checkpoint = tf.train.Checkpoint(model=model)
    checkpoint.restore(tf.train.latest_checkpoint(args.checkpoint))
    if tf.train.latest_checkpoint(args.checkpoint):
        print("Restored from {}".format(tf.train.latest_checkpoint(args.checkpoint)))
    else:
        print("Initializing fail, check checkpoint")
        exit(0)

    num_correct_samples = 0
    for index, (X, Y) in enumerate(dataloader()):
        start_time = time.perf_counter()
        decoded, neg_sum_logits = eval_one_step(model, X, Y)
        end_time = time.perf_counter()
Ejemplo n.º 29
0
    mj_synth = MjSynth('mnt/ramdisk/max/90kDICT32px')
    print('Num. of images:', len(mj_synth.all_image_paths))
    print('All Train {} / All Val {} / All Test {}'.format(
        len(mj_synth.annotation_train), len(mj_synth.annotation_val),
        len(mj_synth.annotation_test)))

    X_train, y_train, X_val, y_val, X_test, y_test = mj_synth.random_choice(
        random_choice_rate=0.005)
    print('Train {} / Val {} / Test {}'.format(len(y_train), len(y_val),
                                               len(y_test)))

    train_ds, val_ds, test_ds = mj_synth.create_datasets(
        X_train, y_train, X_val, y_val, X_test, y_val)

    # Model definition
    crnn = CRNN()
    crnn.compile(mj_synth.max_label_len)

    # Train the model
    ckpt = ModelCheckpoint(filepath=args.save_model_path,
                           monitor='val_loss',
                           verbose=1,
                           save_best_only=True,
                           mode='auto')
    callbacks_list = [ckpt]
    crnn.training_model.fit(x=[*train_ds],
                            y=np.zeros(len(train_ds[0])),
                            batch_size=args.batch_size,
                            epochs=args.epochs,
                            validation_data=([*val_ds],
                                             [np.zeros(len(val_ds[0]))]),
Ejemplo n.º 30
0
def main():
    epochs = config['epochs']
    train_batch_size = config['train_batch_size']
    eval_batch_size = config['eval_batch_size']
    lr = config['lr']
    show_interval = config['show_interval']
    valid_interval = config['valid_interval']
    save_interval = config['save_interval']
    cpu_workers = config['cpu_workers']
    reload_checkpoint = config['reload_checkpoint']
    valid_max_iter = config['valid_max_iter']

    img_width = config['img_width']
    img_height = config['img_height']
    data_dir = config['data_dir']

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f'device: {device}')

    train_dataset = Synth90kDataset(root_dir=data_dir,
                                    mode='train',
                                    img_height=img_height,
                                    img_width=img_width)
    valid_dataset = Synth90kDataset(root_dir=data_dir,
                                    mode='dev',
                                    img_height=img_height,
                                    img_width=img_width)

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=train_batch_size,
                              shuffle=True,
                              num_workers=cpu_workers,
                              collate_fn=synth90k_collate_fn)
    valid_loader = DataLoader(dataset=valid_dataset,
                              batch_size=eval_batch_size,
                              shuffle=True,
                              num_workers=cpu_workers,
                              collate_fn=synth90k_collate_fn)

    num_class = len(Synth90kDataset.LABEL2CHAR) + 1
    crnn = CRNN(1,
                img_height,
                img_width,
                num_class,
                map_to_seq_hidden=config['map_to_seq_hidden'],
                rnn_hidden=config['rnn_hidden'],
                leaky_relu=config['leaky_relu'])
    if reload_checkpoint:
        crnn.load_state_dict(torch.load(reload_checkpoint,
                                        map_location=device))
    crnn.to(device)

    optimizer = optim.RMSprop(crnn.parameters(), lr=lr)
    criterion = CTCLoss(reduction='sum')
    criterion.to(device)

    assert save_interval % valid_interval == 0
    i = 1
    for epoch in range(1, epochs + 1):
        print(f'epoch: {epoch}')
        tot_train_loss = 0.
        tot_train_count = 0
        for train_data in train_loader:
            loss = train_batch(crnn, train_data, optimizer, criterion, device)
            train_size = train_data[0].size(0)

            tot_train_loss += loss
            tot_train_count += train_size
            if i % show_interval == 0:
                print('train_batch_loss[', i, ']: ', loss / train_size)

            if i % valid_interval == 0:
                evaluation = evaluate(crnn,
                                      valid_loader,
                                      criterion,
                                      decode_method=config['decode_method'],
                                      beam_size=config['beam_size'])
                print('valid_evaluation: loss={loss}, acc={acc}'.format(
                    **evaluation))

                if i % save_interval == 0:
                    prefix = 'crnn'
                    loss = evaluation['loss']
                    save_model_path = os.path.join(
                        config['checkpoints_dir'],
                        f'{prefix}_{i:06}_loss{loss}.pt')
                    torch.save(crnn.state_dict(), save_model_path)
                    print('save model at ', save_model_path)

            i += 1

        print('train_loss: ', tot_train_loss / tot_train_count)