def train(): print('start training ...........') batch_size = 16 num_epochs = 50 learning_rate = 0.1 label_converter = LabelConverter(char_set=string.ascii_lowercase + string.digits) vocab_size = label_converter.get_vocab_size() device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu") model = CRNN(vocab_size=vocab_size).to(device) # model.load_state_dict(torch.load('output/weight.pth', map_location=device)) train_loader, val_loader = get_loader('data/CAPTCHA Images/', batch_size=batch_size) optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, nesterov=True) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min') # scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 10, 2) train_losses, val_losses = [], [] for epoch in range(num_epochs): train_epoch_loss = fit(epoch, model, optimizer, label_converter, device, train_loader, phase='training') val_epoch_loss = fit(epoch, model, optimizer, label_converter, device, val_loader, phase='validation') print('-----------------------------------------') if epoch == 0 or val_epoch_loss <= np.min(val_losses): torch.save(model.state_dict(), 'output/weight.pth') train_losses.append(train_epoch_loss) val_losses.append(val_epoch_loss) write_figure('output', train_losses, val_losses) write_log('output', epoch, train_epoch_loss, val_epoch_loss) scheduler.step(val_epoch_loss)
def test_model(): model = CRNN(32, 1, 53, 256) inp = torch.randn((1, 1,32, 100)) out = model.forward(inp) print(out.shape)
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") args = parse_args() train_dataset = DenoisingDataset(args.train_dir) val_dataset = DenoisingDataset(args.val_dir) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False) model = CRNN().to(device) train_model(model, train_loader, val_loader, args.epochs, args.learning_rate, device, log=True) model_dir = os.path.dirname(args.model_save_path) if not os.path.exists(model_dir) and model_dir: os.mkdir(model_dir) torch.save(model.state_dict(), args.model_save_path, _use_new_zipfile_serialization=False)
def main(): params = _get_args() cfg = read_config(params.config) with tf.device(params.device): model = CRNN(cfg) model.train()
def main(): params = _get_args() cfg = read_config(params.config) with tf.device(params.device): model = CRNN(cfg, pretrained=True) preds, labels, cer, macc = model.evaluate(params.dataset) print(preds, labels, cer, macc)
def load_model(model_path): with tf.Graph().as_default(): inputs = tf.placeholder(tf.float32, [1, 32, None, 3]) crnn = CRNN(inputs) seq_len = tf.placeholder(tf.int32, [None], name='seq_len') logits = tf.reshape(crnn, [-1, 512]) # (batchsizex(width/32))*512 W = tf.Variable(tf.truncated_normal([512, 7000], stddev=0.1), name="W") b = tf.Variable(tf.constant(0., shape=[7000]), name="b") logits = tf.matmul(logits, W) + b logits = tf.reshape(logits, [1, -1, 7000], name="reshape_log") logits = tf.transpose(logits, (1, 0, 2), name="final_log") decoded, log_prob = tf.nn.ctc_beam_search_decoder(logits, seq_len, top_paths=1, merge_repeated=False) saver = tf.train.Saver(tf.global_variables()) config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=config) ckpt = tf.train.get_checkpoint_state(model_path) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("load success") else: print("no such file") return return sess, decoded, inputs, seq_len
def main(argv=None): if not os.path.exists(Constants.MODEL_DIR): os.makedirs(Constants.MODEL_DIR) if not os.path.exists(Constants.TENSORBOARD_DIR): os.makedirs(Constants.TENSORBOARD_DIR) with open(Constants.CHARLIST_FILE, "rb") as fp: charList = pickle.load(fp) lenCharList = len(charList) with tf.device("CPU:0"): train_ds, train_image_count = create_datasets(Constants.TRAIN_TFRECORD) val_ds, val_image_count = create_datasets(Constants.VAL_TFRECORDS) train_batches = int(np.floor(train_image_count/Constants.BATCH_SIZE)) val_batches = int(np.floor(val_image_count/Constants.BATCH_SIZE)) model = CRNN(lenCharList) global_step_op = tf.Variable(0) starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step_op, decay_steps=10000, decay_rate=0.1, staircase=False) optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate) epoch = 1 summary_writer = tf.contrib.summary.create_file_writer(Constants.TENSORBOARD_DIR, flush_millis=10000) with summary_writer.as_default(), tf.contrib.summary.always_record_summaries(): while True: print("Epoch "+str(epoch)) loss = train_on_batch(model, train_ds, train_batches, charList, optimizer) images, recognized, charErrorRate, wordAccuracy = validate_on_batch(model, val_ds, val_batches, charList, epoch) if charErrorRate < 15: save_model(model, epoch) write_to_tensorboard(epoch, images, recognized, loss, charErrorRate, wordAccuracy) epoch += 1
def main(): if torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') parser = argparse.ArgumentParser(prog='train', description="""Script to train the DOA estimation system""") parser.add_argument("--input", "-i", default="data", help="Directory where data and labels are", type=str) parser.add_argument("--savedir", "-s", default=".", help="Directory to write results", type=str) parser.add_argument("--rate", "-r", type=float, default=None, help="Choose a learning rate, default to sweep") parser.add_argument("--batchsize", "-b", type=int, default=None, help="Choose a batchsize, default to sweep") parser.add_argument("--epochs", "-e", type=int, default=10, help="Number of epochs") parser.add_argument("--dropout", "-dp", type=float, default=0., help="Specify dropout rate") # parser.add_argument("--input_dropout", "-id", type=float, default=0., help="Specify input dropout rate") # parser.add_argument("--conv_dropout", "-cd", type=float, default=0., help="Specify conv dropout rate (applied at all layers)") # parser.add_argument("--lstm_dropout", "-ld", type=float, default=0., help="Specify lstm dropout rate (applied to lstm output)") parser.add_argument("--model", "-m", type=str, choices=["CNN", "CRNN"], required=True, help="Choose network model") parser.add_argument("--outputformulation", "-of", type=str, choices=["Reg", "Class"], required=True, help="Choose output formulation") parser.add_argument("--lstmout", "-lo", type=str, choices=[LSTM_FULL, LSTM_FIRST, LSTM_LAST], required=False, default=LSTM_FULL, help="Choose what to use from LSTM ouput") args = parser.parse_args() # dropouts = Dropouts(args.input_dropout, args.conv_dropout, args.lstm_dropout) dropouts = Dropouts(args.dropout, args.dropout, args.dropout) rates = [1e-5, 1e-7, 1e-3, 1e-9, 1e-1] if not args.rate else [args.rate] batches = [128, 32, 64] if not args.batchsize else [args.batchsize] for learning_rate in rates: for batch_size in batches: # dir to store the experiment files results_dir = os.path.join(args.savedir, \ "results" + '_{}'.format(args.model) + '_{}'.format(args.outputformulation) + \ '_lr{}'.format(learning_rate) + '_bs{}'.format(batch_size) + '_drop{}'.format(args.dropout)) print('writing results to {}'.format(results_dir)) doa_classes = None if args.outputformulation == "Reg": loss = nn.MSELoss(reduction='sum') output_dimension = 3 elif args.outputformulation == "Class": loss = nn.CrossEntropyLoss(reduction="sum") doa_classes = DoaClasses() output_dimension = len(doa_classes.classes) if args.model == "CNN": model_choice = ConvNet(device, dropouts, output_dimension, doa_classes).to(device) elif args.model == "CRNN": model_choice = CRNN(device, dropouts, output_dimension, doa_classes, args.lstmout).to(device) config = Config(data_folder=args.input,\ learning_rate=learning_rate,\ batch_size=batch_size,\ num_epochs=args.epochs,\ test_to_all_ratio=0.1,\ results_dir=results_dir,\ model=model_choice,\ loss_criterion=loss,\ doa_classes=doa_classes,\ lstm_output=args.lstmout,\ shuffle=True) doa_train(config)
def __init__(self, args): self.args = args self.dataset = DataSet(self.args.dataset, self.args.batch_size) self.model = CRNN(args.lr, self.dataset.img_width, self.dataset.img_height, self.dataset.total_labels) # restoring the model weights if self.args.resume is not None: print(f"Resuming from checkpoint: {self.args.resume}") self.model.load_weights(self.args.resume) # inverse chord map for single character -> name used in decode chord self.inverse_chord_map = { "Δ":"major", "M": "major", "m": "minor", "+": "augmented", "-" : "diminished", "o": "diminished ", "ø": "half diminished", "#": "sharp", "b": "bimol", # "<" : "mychord", }
def inference_model(network,lstm_out,out_format,model_path): doa_classes = DoaClasses() if out_format == "cartesian": out_dim = 3 elif out_format == "class": out_dim = len(doa_classes.classes) if network == "CNN": model = ConvNet(device, Dropouts(0,0,0), out_dim, doa_classes) elif network == "CRNN": model = CRNN(device, Dropouts(0,0,0), out_dim, doa_classes, lstm_out) model.load_state_dict(torch.load(model_path,map_location=device)) model.eval() model.to(device) return model,doa_classes
def create_model(formulation, model): dropouts = Dropouts(0, 0, 0) doa_classes = None if formulation == "Reg": loss = nn.MSELoss(reduction='sum') output_dimension = 3 elif formulation == "Class": loss = nn.CrossEntropyLoss(reduction="sum") doa_classes = DoaClasses() output_dimension = len(doa_classes.classes) if model == "CNN": model_choice = ConvNet(device, dropouts, output_dimension, doa_classes).to(device) elif model == "CRNN": model_choice = CRNN(device, dropouts, output_dimension, doa_classes, "Full").to(device) return model_choice
def eval_model(): model = CRNN() model.load_state_dict(torch.load('./model_EEG.pt')) # specify the target classes classes = ('True', 'False') # track test loss test_loss = 0.0 class_correct = list(0. for i in range(num_classes)) class_total = list(0. for i in range(num_classes)) model.eval() with torch.no_grad(): for data, target in testloader: data, target = data, target target = target.long() output, _ = model(data) #print(output.data) # convert output probabilities to predicted class _, pred = torch.max(output, 1) # print(pred) # compare predictions to true label correct = (pred == target).squeeze() for i, label in enumerate(target): class_correct[label] += correct[i].item() class_total[label] += 1 for i in range(len(classes)): print('Accuracy of %s : %2d%% out of %d cases' % (classes[i], 100 * class_correct[i] / class_total[i], class_total[i])) data = next(iter(testloader)) inputs, targets = data inputs = inputs targets = targets targets = targets.long() outputs, _ = model(inputs) probability, predicted = torch.max(outputs.data, 1) c = (predicted == targets).squeeze() eval_metrics = pd.DataFrame(np.empty([2, 4])) eval_metrics.index = ["baseline"] + ['RNN'] eval_metrics.columns = ["Accuracy", "ROC AUC", "PR AUC", "Log Loss"] pred = np.repeat(0, len(y_test.cpu())) pred_proba = np.repeat(0.5, len(y_test.cpu())) eval_metrics.iloc[0, 0] = accuracy_score(y_test.cpu(), pred) eval_metrics.iloc[0, 1] = roc_auc_score(y_test.cpu(), pred_proba) eval_metrics.iloc[0, 2] = average_precision_score(y_test.cpu(), pred_proba) eval_metrics.iloc[0, 3] = log_loss(y_test.cpu(), pred_proba) eval_metrics.iloc[1, 0] = accuracy_score(y_test.cpu(), predicted.cpu()) eval_metrics.iloc[1, 1] = roc_auc_score(y_test.cpu(), probability.cpu()) eval_metrics.iloc[1, 2] = average_precision_score(y_test.cpu(), probability.cpu()) eval_metrics.iloc[1, 3] = 0 # log_loss(y_test.cpu(), pred_proba[:, 1]) print(eval_metrics)
def main(): eval_batch_size = config["eval_batch_size"] cpu_workers = config["cpu_workers"] reload_checkpoint = config["reload_checkpoint"] img_height = config["img_height"] img_width = config["img_width"] device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"device: {device}") test_dataset = Synth90kDataset( root_dir=config["data_dir"], mode="test", img_height=img_height, img_width=img_width, ) test_loader = DataLoader( dataset=test_dataset, batch_size=eval_batch_size, shuffle=False, num_workers=cpu_workers, collate_fn=synth90k_collate_fn, ) num_class = len(Synth90kDataset.LABEL2CHAR) + 1 crnn = CRNN( 1, img_height, img_width, num_class, map_to_seq_hidden=config["map_to_seq_hidden"], rnn_hidden=config["rnn_hidden"], leaky_relu=config["leaky_relu"], ) crnn.load_state_dict(torch.load(reload_checkpoint, map_location=device)) crnn.to(device) criterion = CTCLoss(reduction="sum") criterion.to(device) evaluation = evaluate( crnn, test_loader, criterion, decode_method=config["decode_method"], beam_size=config["beam_size"], ) print("test_evaluation: loss={loss}, acc={acc}".format(**evaluation))
def black_box_function(lr_pow): learning_rate = 10.0**lr_pow results_dir = os.path.join( savedir, "results" + '_{}'.format(modelname) + '_{}'.format(args.outputformulation) + '_lr{}'.format(learning_rate) + '_bs{}'.format(batch_size) + '_drop{}'.format(dropout)) print('writing results to {}'.format(results_dir)) dropouts = Dropouts(dropout, dropout, dropout) doa_classes = None if outputformulation == "Reg": loss = nn.MSELoss(reduction='sum') output_dimension = 3 elif outputformulation == "Class": loss = nn.CrossEntropyLoss() doa_classes = DoaClasses() output_dimension = len(doa_classes.classes) if modelname == "CNN": model_choice = ConvNet(device, dropouts, output_dimension, doa_classes).to(device) elif modelname == "CRNN": model_choice = CRNN(device, dropouts, output_dimension, doa_classes, lstmout).to(device) config = TrainConfig() \ .set_data_folder(inputdir) \ .set_learning_rate(learning_rate) \ .set_batch_size(batch_size) \ .set_num_epochs(epochs) \ .set_test_to_all_ratio(0.1) \ .set_results_dir(results_dir) \ .set_model(model_choice) \ .set_loss_criterion(loss) \ .set_doa_classes(doa_classes) \ .set_lstm_output(lstmout) # negative sign for minimization return -doa_train(config)
def __init__(self): super(EnsembleNetwork, self).__init__() # Init dual class classifier self.resnet = Resnet() self.crnn = CRNN() self.unet = UNet() # Init one class classifier self.deep_sad_normal = LG_1DCNN() self.deep_sad_abnormal = LG_1DCNN() # Init models list self.models = [self.resnet, self.crnn, self.unet, self.deep_sad_normal, self.deep_sad_abnormal] # Load weights for non-anomaly detectors self.resnet.load_state_dict(torch.load('/workspace/jinsung/resnet_final-Copy1js.pt')) #self.crnn.load_state_dict(torch.load('/workspace/demon/crnn_random700_spectrogram.pt')) #self.unet.load_state_dict(torch.load('/workspace/demon/unet_random700_spectrogram.pt')) # Load DeepSAD Normal model_dict_normal = torch.load('/workspace/demon/deepSAD_1117_7k_10ep_64batch_normal_flip.tar') self.c_normal = model_dict_normal["c"] self.deep_sad_normal.load_state_dict(model_dict_normal["net_dict"]) # Load DeepSAD Abnormal model_dict_abnormal = torch.load('/workspace/demon/deepSADModel_7k_10ep_64batch_abnormal.tar') self.c_abnormal = model_dict_abnormal["c"] self.deep_sad_abnormal.load_state_dict(model_dict_abnormal["net_dict"]) # Load on CUDA and freeze parameter values for model in self.models: model.to('cuda') model.eval() for param in model.parameters(): param.requires_grad_(False)
def _multi_models(self, **kwargs): models = [] ## Calculate the gradients for each model tower. tower_losses = [] tower_grads = [] with tf.variable_scope(tf.get_variable_scope()) as scope: for i in xrange(self._n_gpus): with tf.device('/gpu:%d' % i): crnn = CRNN(**kwargs) tf.get_variable_scope().reuse_variables() ## Calculate the gradients for the batch of data on this CIFAR tower. grads = self._optimizer.compute_gradients(crnn.loss) models.append(crnn) tower_losses.append(crnn.loss) ## Keep track of the gradients across all towers. tower_grads.append(grads) loss = tf.reduce_mean(tower_losses) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = self._average_gradients(tower_grads) # Apply the gradients to adjust the shared variables. apply_grad = self._optimizer.apply_gradients(grads) return models, apply_grad, loss
def main(): crnn = CRNN(is_train=True) ada = Adadelta() crnn.model.compile(loss=lambda y_true, y_pred: y_pred, optimizer=ada) batch_size = 64 train_gen = DataGenerator(data_path=TRAIN_DIR_NAME, batch_size=batch_size) os.system('mkdir -p models') early_stop = EarlyStopping(monitor='loss', min_delta=0.01, patience=8, mode='min', verbose=1) checkpoint = ModelCheckpoint( filepath='models/model_curr_{epoch:02d}_{loss:.3f}.h5', monitor='loss', verbose=1, mode='min', period=1, save_weights_only=True) # load previous checkpoints crnn.model.load_weights('models/model_synth_2_35.h5') crnn.model.fit_generator( generator=train_gen, steps_per_epoch=len(train_gen.image_paths) // batch_size, epochs=100, callbacks=[checkpoint, early_stop], workers=16, use_multiprocessing=True, max_queue_size=10, verbose=1, )
def main(): eval_batch_size = config['eval_batch_size'] cpu_workers = config['cpu_workers'] reload_checkpoint = config['reload_checkpoint'] img_height = config['img_height'] img_width = config['img_width'] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(f'device: {device}') test_dataset = Synth90kDataset(root_dir=config['data_dir'], mode='test', img_height=img_height, img_width=img_width) test_loader = DataLoader(dataset=test_dataset, batch_size=eval_batch_size, shuffle=False, num_workers=cpu_workers, collate_fn=synth90k_collate_fn) num_class = len(Synth90kDataset.LABEL2CHAR) + 1 crnn = CRNN(1, img_height, img_width, num_class, map_to_seq_hidden=config['map_to_seq_hidden'], rnn_hidden=config['rnn_hidden'], leaky_relu=config['leaky_relu']) crnn.load_state_dict(torch.load(reload_checkpoint, map_location=device)) crnn.to(device) criterion = CTCLoss(reduction='sum') criterion.to(device) evaluation = evaluate(crnn, test_loader, criterion, decode_method=config['decode_method'], beam_size=config['beam_size']) print('test_evaluation: loss={loss}, acc={acc}'.format(**evaluation))
def main(): arguments = docopt(__doc__) images = arguments['IMAGE'] reload_checkpoint = arguments['-m'] batch_size = int(arguments['-s']) decode_method = arguments['-d'] beam_size = int(arguments['-b']) img_height = config['img_height'] img_width = config['img_width'] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(f'device: {device}') predict_dataset = Synth90kDataset(paths=images, img_height=img_height, img_width=img_width) predict_loader = DataLoader(dataset=predict_dataset, batch_size=batch_size, shuffle=False) num_class = len(Synth90kDataset.LABEL2CHAR) + 1 crnn = CRNN(1, img_height, img_width, num_class, map_to_seq_hidden=config['map_to_seq_hidden'], rnn_hidden=config['rnn_hidden'], leaky_relu=config['leaky_relu']) crnn.load_state_dict(torch.load(reload_checkpoint, map_location=device)) crnn.to(device) preds = predict(crnn, predict_loader, Synth90kDataset.LABEL2CHAR, decode_method=decode_method, beam_size=beam_size) show_result(images, preds)
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") args = parse_args() noisy_mel = np.load(args.path_to_file) h, w = noisy_mel.shape noisy_mel = pad_mel_spectogram(noisy_mel) noisy_mel = torch.tensor(noisy_mel, dtype=torch.float32) noisy_mel = noisy_mel.unsqueeze(0) model = CRNN().to(device) model.load_state_dict(torch.load(args.path_to_model, map_location=device)) model.eval() clean_mel = model(noisy_mel) clean_mel = clean_mel.squeeze(0) clean_mel = clean_mel.data.cpu().numpy() clean_mel = clean_mel[:h] save_dir = os.path.dirname(args.path_to_save) if save_dir and not os.path.exists(save_dir): os.mkdir(save_dir) np.save(args.path_to_save, clean_mel)
pass gendata = generatorData() train_datas, train_labels = gendata.gen_Data(batch_size=60) test_datas, test_labels = gendata.gen_Data(batch_size=40) train_datas_shapes = np.shape(train_datas) train_labels_s = np.reshape(train_labels, (int(len(train_labels) / 4), 4)) train_hot = to_categorical(train_labels_s) input_shapes = np.shape(train_datas)[1:] model = CRNN(input_shapes).CRNN_model model.summary() model.compile(loss={ 'ctc': lambda train_hot, y_pred: y_pred }, optimizer=RMSprop()) history = model.fit( [train_datas, train_hot, np.ones(1) * 15, np.ones(1) * 37], np.ones(1), batch_size=10, epochs=100, verbose=2)
#!/usr/bin/env python # encoding: utf-8 ''' @author: Li Huan @contact: [email protected] @file: train.py @time: 2019/5/6 14:00 @desc: ''' from util import data_generater_test from model import CRNN import os import string os.environ["CUDA_VISIBLE_DEVICES"] = "-1" in_put = (32,100) content = list(string.digits)+list(string.ascii_lowercase) data = data_generater_test(content,batch_size=128,input_shape = in_put) class_numbers = data.class_numbers im_shape = data.im_shape print('training model on {} samples'.format(data.lenth)) model = CRNN(in_put,class_numbers,batch_size = 128) model.train(data,content,epoch=60)
import cv2 from torchvision import transforms import torch from torch.autograd import Variable from dataset import LabelConverter, Rescale, Normalize from model import CRNN IMAGE_HEIGHT = 32 model_path = './ocr-model/crnn_address.pth' img_path = './ocr_address.jpg' # alphabet = '0123456789X' alphabet = alphabet = ''.join(json.load(open('./cn-alphabet.json', 'rb'))) model = CRNN(IMAGE_HEIGHT, 1, len(alphabet) + 1, 256) if torch.cuda.is_available(): model = model.cuda() print('loading pretrained model from %s' % model_path) model.load_state_dict(torch.load(model_path)) converter = LabelConverter(alphabet) image_transform = transforms.Compose( [Rescale(IMAGE_HEIGHT), transforms.ToTensor(), Normalize()]) image = cv2.imread(img_path, 0) image = image_transform(image) if torch.cuda.is_available(): image = image.cuda()
def train(field): alphabet = ''.join(json.load(open('./cn-alphabet.json', 'rb'))) nclass = len(alphabet) + 1 # add the dash - batch_size = BATCH_SIZE if field == 'address' or field == 'psb': batch_size = 1 # image length varies converter = LabelConverter(alphabet) criterion = CTCLoss(zero_infinity=True) crnn = CRNN(IMAGE_HEIGHT, nc, nclass, number_hidden) crnn.apply(weights_init) image_transform = transforms.Compose([ Rescale(IMAGE_HEIGHT), transforms.ToTensor(), Normalize() ]) dataset = LmdbDataset(db_path, field, image_transform) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4) image = torch.FloatTensor(batch_size, 3, IMAGE_HEIGHT, IMAGE_HEIGHT) text = torch.IntTensor(batch_size * 5) length = torch.IntTensor(batch_size) image = Variable(image) text = Variable(text) length = Variable(length) loss_avg = utils.averager() optimizer = optim.RMSprop(crnn.parameters(), lr=lr) if torch.cuda.is_available(): crnn.cuda() crnn = nn.DataParallel(crnn) image = image.cuda() criterion = criterion.cuda() def train_batch(net, iteration): data = iteration.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.load_data(image, cpu_images) t, l = converter.encode(cpu_texts) utils.load_data(text, t) utils.load_data(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() return cost nepoch = 25 for epoch in range(nepoch): train_iter = iter(dataloader) i = 0 while i < len(dataloader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = train_batch(crnn, train_iter) loss_avg.add(cost) i += 1 if i % 500 == 0: print('%s [%d/%d][%d/%d] Loss: %f' % (datetime.datetime.now(), epoch, nepoch, i, len(dataloader), loss_avg.val())) loss_avg.reset() # do checkpointing if i % 500 == 0: torch.save( crnn.state_dict(), f'{model_path}crnn_{field}_{epoch}_{i}.pth')
import matplotlib.pyplot as plt from model import CRNN import os from tqdm import tqdm import glob from dataset import CaptchaImagesDataset from utils import LabelConverter from tqdm import tqdm if __name__ == '__main__': device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu") label_converter = LabelConverter(char_set=string.ascii_lowercase + string.digits) vocab_size = label_converter.get_vocab_size() model = CRNN(vocab_size=vocab_size).to(device) model.load_state_dict(torch.load('output/weight.pth', map_location=device)) model.eval() correct = 0.0 image_list = glob.glob('data/CAPTCHA Images/test/*') for image in tqdm(image_list): ground_truth = image.split('/')[-1].split('.')[0] image = Image.open(image).convert('RGB') image = F.to_tensor(image).unsqueeze(0).to(device) output = model(image) encoded_text = output.squeeze().argmax(1) decoded_text = label_converter.decode(encoded_text) if ground_truth == decoded_text:
import numpy as np from model import CRNN train_X = np.loadtxt("data/train_sequence.csv", delimiter=",") train_Y = np.loadtxt("data/train_label.csv", delimiter=",") # hyper-parameters input_size = 12 number_filter = 12 output_size = 12 rate_drop_dense = 0 validation_split_ratio = 0.1 cnn = CRNN(input_size, number_filter, output_size, rate_drop_dense, validation_split_ratio) best_model_path = cnn.train_model(train_X, train_Y, model_save_directory='./')
import torch.nn.functional as F device = 'cuda' if torch.cuda.is_available() else 'cpu' parser = argparse.ArgumentParser(description='LID testing script for single audio') parser.add_argument('-i', type=str, help='path to input audio', required=True) parser.add_argument('-o', type=str, help='path to output result file', default='./result.txt') args = parser.parse_args() out_f = open(args.o, 'w') checkpoint = torch.load('model_saves/vgg_lstm_subset_newloader/best.pth') model = CRNN(hidden_size=checkpoint['hidden_size'], only_cnn=checkpoint['only_cnn'], cnn_type=checkpoint['cnn_type'], recurrent_type=checkpoint['recurrent_type'], lstm_layers=checkpoint['lstm_layers'], nheads=checkpoint['nheads'], nlayers=checkpoint['nlayers'], input_shape=checkpoint['input_shape']).double().to(device) model.load_state_dict(checkpoint['model_state_dict']) model.eval() audio, sample_rate = torchaudio.load(args.i) assert(sample_rate == 8000) audio = audio.unsqueeze(0) audio = audio.double().to(device) with torch.no_grad(): pred = model(audio) probs = F.softmax(pred, dim=1) print(probs)
if __name__ == "__main__": dataloader = OCRDataLoader(args.annotation_paths, args.image_height, args.image_width, table_path=args.table_path, blank_index=BLANK_INDEX, shuffle=True, batch_size=args.batch_size) print("Num of eval samples: {}".format(len(dataloader))) print("Num of classes: {}".format(NUM_CLASSES)) print("Blank index is {}".format(BLANK_INDEX)) localtime = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) print("Start at {}".format(localtime)) model = CRNN(NUM_CLASSES, args.backbone) model.summary() checkpoint = tf.train.Checkpoint(model=model) checkpoint.restore(tf.train.latest_checkpoint(args.checkpoint)) if tf.train.latest_checkpoint(args.checkpoint): print("Restored from {}".format(tf.train.latest_checkpoint(args.checkpoint))) else: print("Initializing fail, check checkpoint") exit(0) num_correct_samples = 0 for index, (X, Y) in enumerate(dataloader()): start_time = time.perf_counter() decoded, neg_sum_logits = eval_one_step(model, X, Y) end_time = time.perf_counter()
mj_synth = MjSynth('mnt/ramdisk/max/90kDICT32px') print('Num. of images:', len(mj_synth.all_image_paths)) print('All Train {} / All Val {} / All Test {}'.format( len(mj_synth.annotation_train), len(mj_synth.annotation_val), len(mj_synth.annotation_test))) X_train, y_train, X_val, y_val, X_test, y_test = mj_synth.random_choice( random_choice_rate=0.005) print('Train {} / Val {} / Test {}'.format(len(y_train), len(y_val), len(y_test))) train_ds, val_ds, test_ds = mj_synth.create_datasets( X_train, y_train, X_val, y_val, X_test, y_val) # Model definition crnn = CRNN() crnn.compile(mj_synth.max_label_len) # Train the model ckpt = ModelCheckpoint(filepath=args.save_model_path, monitor='val_loss', verbose=1, save_best_only=True, mode='auto') callbacks_list = [ckpt] crnn.training_model.fit(x=[*train_ds], y=np.zeros(len(train_ds[0])), batch_size=args.batch_size, epochs=args.epochs, validation_data=([*val_ds], [np.zeros(len(val_ds[0]))]),
def main(): epochs = config['epochs'] train_batch_size = config['train_batch_size'] eval_batch_size = config['eval_batch_size'] lr = config['lr'] show_interval = config['show_interval'] valid_interval = config['valid_interval'] save_interval = config['save_interval'] cpu_workers = config['cpu_workers'] reload_checkpoint = config['reload_checkpoint'] valid_max_iter = config['valid_max_iter'] img_width = config['img_width'] img_height = config['img_height'] data_dir = config['data_dir'] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(f'device: {device}') train_dataset = Synth90kDataset(root_dir=data_dir, mode='train', img_height=img_height, img_width=img_width) valid_dataset = Synth90kDataset(root_dir=data_dir, mode='dev', img_height=img_height, img_width=img_width) train_loader = DataLoader(dataset=train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=cpu_workers, collate_fn=synth90k_collate_fn) valid_loader = DataLoader(dataset=valid_dataset, batch_size=eval_batch_size, shuffle=True, num_workers=cpu_workers, collate_fn=synth90k_collate_fn) num_class = len(Synth90kDataset.LABEL2CHAR) + 1 crnn = CRNN(1, img_height, img_width, num_class, map_to_seq_hidden=config['map_to_seq_hidden'], rnn_hidden=config['rnn_hidden'], leaky_relu=config['leaky_relu']) if reload_checkpoint: crnn.load_state_dict(torch.load(reload_checkpoint, map_location=device)) crnn.to(device) optimizer = optim.RMSprop(crnn.parameters(), lr=lr) criterion = CTCLoss(reduction='sum') criterion.to(device) assert save_interval % valid_interval == 0 i = 1 for epoch in range(1, epochs + 1): print(f'epoch: {epoch}') tot_train_loss = 0. tot_train_count = 0 for train_data in train_loader: loss = train_batch(crnn, train_data, optimizer, criterion, device) train_size = train_data[0].size(0) tot_train_loss += loss tot_train_count += train_size if i % show_interval == 0: print('train_batch_loss[', i, ']: ', loss / train_size) if i % valid_interval == 0: evaluation = evaluate(crnn, valid_loader, criterion, decode_method=config['decode_method'], beam_size=config['beam_size']) print('valid_evaluation: loss={loss}, acc={acc}'.format( **evaluation)) if i % save_interval == 0: prefix = 'crnn' loss = evaluation['loss'] save_model_path = os.path.join( config['checkpoints_dir'], f'{prefix}_{i:06}_loss{loss}.pt') torch.save(crnn.state_dict(), save_model_path) print('save model at ', save_model_path) i += 1 print('train_loss: ', tot_train_loss / tot_train_count)