def save_ocr(): generator = SampleGenerator(10) crnn = CRNN(generator.vocab_size() + 1) optimizer = tf.keras.optimizers.Adam(1e-4) checkpoint = tf.train.Checkpoint(model=crnn, optimizer=optimizer) checkpoint.restore(tf.train.latest_checkpoint('checkpoints')) if False == exists('model'): mkdir("model") crnn.save(join("model", "crnn.h5"))
def start_training(log_dir, optimizer=Adam(lr=0.001, decay=1e-6), loss="categorical_crossentropy", metrics=None): """ Train the model :param log_dir: directory to log the reports :param optimizer: optimizer for model (default: Adam) :param loss: loss function for model (default: categorical_crossentropy) :param metrics: metrics for model (default: accuracy) :return: model checkpoint for the best epoch with regards to validation accuracy """ if metrics is None: metrics = ["accuracy"] train_data = ImageLoader(os.path.join(args.data_path, "training.csv")) val_data = ImageLoader(os.path.join(args.data_path, "validation.csv")) # Training Callbacks checkpoint_filename = os.path.join(log_dir, "weights.{epoch:02d}.model") model_checkpoint_callback = ModelCheckpoint(checkpoint_filename, save_best_only=True, verbose=1, monitor="val_acc") csv_logger_callback = CSVLogger(os.path.join(log_dir, "log.csv")) early_stopping_callback = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode="min") crnn = CRNN() model = crnn.build_model(train_data.get_input_shape()) model.compile(optimizer, loss, metrics) history = model.fit_generator( train_data.get_data(), steps_per_epoch=train_data.get_num_files() // args.batch_size, epochs=args.num_epochs, callbacks=[model_checkpoint_callback, csv_logger_callback, early_stopping_callback], verbose=1, validation_data=val_data.get_data(should_shuffle=False), validation_steps=val_data.get_num_files() // args.batch_size, max_queue_size=args.batch_size, workers=4, use_multiprocessing=True ) # Plot the loss and accuracy curves for training and validation fig, ax = plt.subplots(2, 1) ax[0].plot(history.history['loss'], color='b', label="Training loss") ax[0].plot(history.history['val_loss'], color='r', label="validation loss", axes=ax[0]) ax[0].legend(loc='best', shadow=True) ax[1].plot(history.history['acc'], color='b', label="Training accuracy") ax[1].plot(history.history['val_acc'], color='r', label="Validation accuracy") ax[1].legend(loc='best', shadow=True) plt.savefig(os.path.join(log_dir, "history.png")) # Evaluation on model with best validation accuracy best_epoch = np.argmax(history.history["val_acc"]) print("Log files: ", log_dir) print("Best epoch: ", best_epoch+1) model_file_name = checkpoint_filename.replace("{epoch:02d}", "{:02d}".format(best_epoch)) return model_file_name
def construct_model(self, model_name='r1', lr=1e-6): self.model_name = model_name tf.summary.image('image_angle_0', self.images, 1) with open(self.save_path + '/setup.txt', 'a') as self.out: self.out.write('Architecture: ' + str(model_name)+ '\n') self.out.write('number of channels: ' + str(self.n_channels) + '\n') self.out.write('img dimensionality: ' + str(self.img_dimens) + '\n') if model_name == 'r3': self.model = Regressor_3(self.images, self.counts, lr=lr) if model_name == 'alexnet': self.model = AlexNet(self.images, self.counts, lr=0.003) if model_name == 'lstm': self.model = CRNN(self.images, self.counts, lr=0.003) self.loss = self.model.loss() tf.summary.scalar("loss", self.loss) with tf.name_scope('train'): self.train_step = tf.train.AdamOptimizer(lr).minimize(self.loss) tf.add_to_collection(name='saved', value=self.loss) tf.add_to_collection(name='saved', value=self.model.pred_counts) if self.model_name == 'lstm': tf.add_to_collection(name='saved', value=self.reconstruction) tf.add_to_collection(name='placeholder', value=self.x) tf.add_to_collection(name='placeholder', value=self.y) tf.add_to_collection(name='placeholder', value=self.images) tf.add_to_collection(name='placeholder', value=self.counts) tf.add_to_collection(name='placeholder', value=self.model.keep_prob) tf.add_to_collection(name='placeholder', value=self.model.is_training) tf.add_to_collection(name='placeholder', value=self.iterator.initializer) self.summaries = tf.summary.merge_all() self.saver = tf.train.Saver() self.writer = tf.summary.FileWriter(self.save_path+'/logs/train') self.writer_test = tf.summary.FileWriter(self.save_path+'/logs/test')
def trainBatch(net, criterion, optimizer): data = train_iter.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() return cost
def __init__(self): self.tokenizer = Tokenizer(); # NOTE: extra class for blank index self.crnn = CRNN(self.tokenizer.size() + 1); if exists(join('model', 'crnn.h5')): self.crnn = tf.keras.models.load_model(join('model','crnn.h5'), compile = False);
def predict_word(self, X): crnn = CRNN() crnn.build(dropout=False) crnn.model.load_weights(self.weight_name + ".h5") out = crnn.test_func([X])[0] ret = [] for j in range(out.shape[0]): out_best = list(np.argmax(out[j, 2:], 1)) out_best = [k for k, g in itertools.groupby(out_best)] outstr = '' for c in out_best: if 0 <= c <= 9: outstr += chr(c + ord('0')) elif 10 <= c <= 35: outstr += chr(c - 10 + ord('A')) elif 36 <= c <= 61: outstr += chr(c - 36 + ord('a')) ret.append(outstr) return ret
def main(opts): alphabet = '0123456789.' nclass = len(alphabet) + 1 model_name = 'crnn' net = CRNN(nclass) print("Using {0}".format(model_name)) if opts.cuda: net.cuda() learning_rate = opts.base_lr optimizer = torch.optim.Adam(net.parameters(), lr=opts.base_lr, weight_decay=weight_decay) if os.path.exists(opts.model): print('loading model from %s' % args.model) step_start, learning_rate = net_utils.load_net(args.model, net, optimizer) ## 数据集 converter = strLabelConverter(alphabet) dataset = ImgDataset( root='/home/yangna/deepblue/OCR/mech_demo2/dataset/imgs/image', csv_root='/home/yangna/deepblue/OCR/mech_demo2/dataset/imgs/train_list.txt', transform=None, target_transform=converter.encode ) ocrdataloader = torch.utils.data.DataLoader( dataset, batch_size=1, shuffle=False, collate_fn=own_collate ) num_count = 0 net = net.eval() converter = strLabelConverter(alphabet) ctc_loss = CTCLoss() for step in range(len(dataset)): try: data = next(data_iter) except: data_iter = iter(ocrdataloader) data = next(data_iter) im_data, gt_boxes, text = data im_data = im_data.cuda() try: res = process_crnn(im_data, gt_boxes, text, net, ctc_loss, converter, training=False) pred, target = res if pred == target[0]: num_count += 1 except: import sys, traceback traceback.print_exc(file=sys.stdout) pass print('correct/total:%d/%d'%(num_count, len(dataset)))
def val(net, dataset, criterion, max_iter=100): print('Start val') for p in crnn.parameters(): p.requires_grad = False net.eval() data_loader = torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers)) val_iter = iter(data_loader) i = 0 n_correct = 0 loss_avg = utils.averager() max_iter = min(max_iter, len(data_loader)) for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) for pred, target in zip(sim_preds, cpu_texts): if pred == target.lower(): n_correct += 1 raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp] for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) accuracy = n_correct / float(max_iter * opt.batchSize) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
def main(): args = hyperparameters() train_path = os.path.join(args.path, 'train') test_path = os.path.join(args.path, 'test') # gpu or cpu 설정 device = torch.device(f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu') # train dataset load train_dataset = CRNN_dataset(path=train_path, w=args.img_width, h=args.img_height) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) # test dataset load test_dataset = CRNN_dataset(path=test_path, w=args.img_width, h=args.img_height) test_dataloader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True) # model 정의 model = CRNN(args.img_height, 1, 37, 256) # loss 정의 criterion = nn.CTCLoss() if args.optim == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.5, 0.999)) elif args.optim == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=args.lr) else: assert False, "옵티마이저를 다시 입력해주세요. :(" model = model.to(device) best_test_loss = 100000000 for i in range(args.epochs): print('epochs: ', i) print("<----training---->") model.train() for inputs, targets in tqdm(train_dataloader): # inputs의 dimension을 (batch, channel, h, w)로 바꿔주세요. hint: pytorch tensor에 제공되는 함수 사용 batch_size = inputs.size(0) inputs = inputs.to(device) target_text, target_length = targets target_text, target_length = target_text.to(device), target_length.to(device) preds = model(inputs) # 여기를 log probability로 바꿔야할 것 같은데욥... preds_length = Variable(torch.IntTensor([preds.size(0)] * batch_size)) """ CTCLoss의 설명과 해당 로스의 input에 대해 설명해주세요. CTC = Connectionist Temporal Classification 각각의 수평적인 위치에서 annotation을 획득한 label L을 input으로 삼는다. 이 input은 한 문자가 여러 위치단위에 있는 경우(한 글자의 크기가 커서) annotation이 중복되어 도출될 수 있기 때문에 문제가 발생하는데 이 때 CTC는 위치와 넓이를 무시하고, ground-truth text만을 CTC Loss function에 제공하고 잘못 중복된 annotation을 제거해준다. 그리고 이 때 생성되는 가능한 모든 gt text의 점수들의 합에 -log를 취한 값이 CTC Loss이다. """ loss = criterion(preds, target_text, preds_length, target_length) / batch_size optimizer.zero_grad() loss.backward() optimizer.step() print("<----evaluation---->") """ model.train(), model.eval()의 차이에 대해 설명해주세요. .eval()을 하는 이유가 무엇일까요? batchnorm과 dropout이 있는 모델은 train할 때와 evaluate할 때 모델이 달라지기 때문에 설정하는 것이다. (평가 모델에 batchnorm과 dropout을 실행한다.) """ model.eval() loss = 0.0 for inputs, targets in tqdm(test_dataloader): with torch.no_grad(): batch_size = inputs.size(0) inputs = inputs.to(device) target_text, target_length = targets target_text, target_length = target_text.to(device), target_length.to(device) # 설정한 device (gpu or cpu) 에 저장되도록 preds = model(inputs) preds_length = Variable(torch.IntTensor([preds.size(0)] * batch_size)) loss = criterion(preds, target_text, preds_length, target_length) / batch_size # test를 어떻게 할까?? print("test loss: ", loss) if loss < best_test_loss: # loss가 bset_test_loss보다 작다면 지금의 loss가 best loss가 되겠죠? best_test_loss = loss # args.savepath을 이용하여 best model 저장하기 PATH = args.savepath torch.save(model, PATH) print("best model 저장 성공")
def get_models(): if cfg.model == 'CRNN_STN': return CRNN_STN(cfg) else: return CRNN(cfg)
def main(): parser = argparse.ArgumentParser(description='Speech Emotion Recognition') parser.add_argument('--hidden_size', type=int, default=512, help='hidden size of model (default: 256)') parser.add_argument('--layer_size', type=int, default=3, help='number of layers of model (default: 3)') parser.add_argument('--n_class', type=int, default=7, help='number of classes of data (default: 7)') parser.add_argument('--dropout', type=float, default=0.2, help='dropout rate in training (default: 0.2') parser.add_argument('--bidirectional', default=True, action='store_true', help='use bidirectional RNN (default: False') parser.add_argument('--batch_size', type=int, default=8, help='batch size in training (default: 32') parser.add_argument( '--workers', type=int, default=4, help='number of workers in dataset loader (default: 4)') parser.add_argument('--max_epochs', type=int, default=30, help='number of max epochs in training (default: 10)') parser.add_argument('--lr', type=float, default=1e-04, help='learning rate (default: 0.0001)') parser.add_argument('--no_cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') parser.add_argument('--save_name', type=str, default='model', help='the name of model') parser.add_argument('--mode', type=str, default='train') args = parser.parse_args() random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) args.cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device('cuda' if args.cuda else 'cpu') # N_FFT: defined in loader.py feature_size = N_FFT / 2 + 1 cnn = resnet.ResNet(feature_size, resnet.BasicBlock, [3, 3, 3]) rnn = RNN.RNN(cnn.feature_size, args.hidden_size, args.n_class, input_dropout_p=args.dropout, dropout_p=args.dropout, n_layers=args.layer_size, bidirectional=args.bidirectional, rnn_cell='gru', variable_lengths=False) model = CRNN.CRNN(cnn, rnn) model.flatten_parameters() model = nn.DataParallel(model).to(device) optimizer = optim.Adam(model.module.parameters(), lr=args.lr) criterion = nn.CrossEntropyLoss(reduction='sum').to(device) if args.mode != 'train': return data_download() wav_paths = [ os.path.join('./dataset/wav', fname) for fname in os.listdir('./dataset/wav') ] best_acc = 0 begin_epoch = 0 loss_acc = [[], [], [], []] train_batch_num, train_dataset_list, valid_dataset, test_dataset = split_dataset( args, wav_paths, dataset_ratio=[0.7, 0.1, 0.2]) logger.info('start') train_begin = time.time() for epoch in range(begin_epoch, args.max_epochs): train_queue = queue.Queue(args.workers * 2) train_loader = MultiLoader(train_dataset_list, train_queue, args.batch_size, args.workers) train_loader.start() train_loss, train_acc = train(model, train_batch_num, train_queue, criterion, optimizer, device, train_begin, args.workers, 10) logger.info('Epoch %d (Training) Loss %0.4f Acc %0.4f' % (epoch, train_loss, train_acc)) train_loader.join() loss_acc[0].append(train_loss) loss_acc[1].append(train_acc) valid_queue = queue.Queue(args.workers * 2) valid_loader = BaseDataLoader(valid_dataset, valid_queue, args.batch_size, 0) valid_loader.start() eval_loss, eval_acc = evaluate(model, valid_loader, valid_queue, criterion, device) logger.info('Epoch %d (Evaluate) Loss %0.4f Acc %0.4f' % (epoch, eval_loss, eval_acc)) valid_loader.join() loss_acc[2].append(eval_loss) loss_acc[3].append(eval_acc) best_model = (eval_acc > best_acc) if best_model: best_acc = eval_acc torch.save(model.state_dict(), './save_model/best_model.pt') save_epoch = epoch model.load_state_dict(torch.load('./save_model/best_model.pt')) test_queue = queue.Queue(args.workers * 2) test_loader = BaseDataLoader(test_dataset, test_queue, args.batch_size, 0) test_loader.start() test_loss, test_acc = evaluate(model, test_loader, test_queue, criterion, device) logger.info('Epoch %d (Test) Loss %0.4f Acc %0.4f' % (save_epoch, test_loss, test_acc)) test_loader.join() save_data(loss_acc, test_loss, test_acc) plot_data(loss_acc, test_loss, test_acc) return 0
def modelRun(Path, LoadMatFileName, dataVar, labelVar, numOfClasses, numOfKernels, scaleFactor, BS, checkpoint, SaveMatFileName, numOfEpochs, samplingRate, type, dropoutRate, visibleGPU): os.environ["CUDA_VISIBLE_DEVICES"] = visibleGPU tf.keras.backend.clear_session() with tf.Graph().as_default() as g: # Load data file mat = sio.loadmat(Path + LoadMatFileName) labels = (mat[labelVar]) # Labels data = mat[dataVar] # Data # Variables Initialization numOfSamples = data.shape[1] numOfChannels = data.shape[0] numOfTrials = data.shape[2] kernelLength = (int)(samplingRate / 2) # EEGNet_F1 = (int)(2 * numOfChannels) # Double to the number of Channels EEGNet_F2 = (int)(4 * EEGNet_F1) # Double to the EEGNet_F1 if type == "EnK_EEGNet": model = EEGNet(nb_classes=numOfClasses, Chans=numOfChannels, Samples=numOfSamples, dropoutRate=dropoutRate, kernLength=kernelLength, F1=EEGNet_F1, D=2, F2=EEGNet_F2, EnK=True, dropoutType='Dropout') elif type == "EEGNet": model = EEGNet(nb_classes=numOfClasses, Chans=numOfChannels, Samples=numOfSamples, dropoutRate=dropoutRate, kernLength=kernelLength, F1=EEGNet_F1, D=2, F2=EEGNet_F2, EnK=False, dropoutType='Dropout') elif type == "Gau_EEGNet": model = Gau_EEGNet(nb_classes=numOfClasses, Chans=numOfChannels, Samples=numOfSamples, dropoutRate=dropoutRate, kernLength=kernelLength, F1=EEGNet_F1, D=2, F2=EEGNet_F2, EnK=False, dropoutType='Dropout') elif type == "EnK_ShallowConvNet": model = ShallowConvNet(nb_classes=numOfClasses, Chans=numOfChannels, Samples=numOfSamples, dropoutRate=dropoutRate, kernLength=kernelLength, F1=EEGNet_F1, D=2, F2=EEGNet_F2, EnK=True, dropoutType='Dropout') elif type == "ShallowConvNet": model = ShallowConvNet(nb_classes=numOfClasses, Chans=numOfChannels, Samples=numOfSamples, dropoutRate=dropoutRate, kernLength=kernelLength, F1=EEGNet_F1, D=2, F2=EEGNet_F2, EnK=False, dropoutType='Dropout') elif type == "Gau_ShallowConvNet": model = Gau_ShallowConvNet(nb_classes=numOfClasses, Chans=numOfChannels, Samples=numOfSamples, dropoutRate=dropoutRate, kernLength=kernelLength, F1=EEGNet_F1, D=2, F2=EEGNet_F2, EnK=False, dropoutType='Dropout') elif type == "EnK_DeepConvNet": model = DeepConvNet(nb_classes=numOfClasses, Chans=numOfChannels, Samples=numOfSamples, dropoutRate=dropoutRate, kernLength=kernelLength, F1=EEGNet_F1, D=2, F2=EEGNet_F2, EnK=True, dropoutType='Dropout') elif type == "DeepConvNet": model = DeepConvNet(nb_classes=numOfClasses, Chans=numOfChannels, Samples=numOfSamples, dropoutRate=dropoutRate, kernLength=kernelLength, F1=EEGNet_F1, D=2, F2=EEGNet_F2, EnK=False, dropoutType='Dropout') elif type == "Gau_DeepConvNet": model = Gau_DeepConvNet(nb_classes=numOfClasses, Chans=numOfChannels, Samples=numOfSamples, dropoutRate=dropoutRate, kernLength=kernelLength, F1=EEGNet_F1, D=2, F2=EEGNet_F2, EnK=False, dropoutType='Dropout') elif type == "ConvGau_DeepConvNet": model = ConvGau_DeepConvNet(nb_classes=numOfClasses, Chans=numOfChannels, Samples=numOfSamples, dropoutRate=dropoutRate, kernLength=kernelLength, F1=EEGNet_F1, D=2, F2=EEGNet_F2, EnK=False, dropoutType='Dropout') elif type == "ConvGau_ShallowConvNet": model = ConvGau_ShallowConvNet(nb_classes=numOfClasses, Chans=numOfChannels, Samples=numOfSamples, dropoutRate=dropoutRate, kernLength=kernelLength, F1=EEGNet_F1, D=2, F2=EEGNet_F2, EnK=False, dropoutType='Dropout') elif type == "ConvGau_EEGNet": model = ConvGau_EEGNet(nb_classes=numOfClasses, Chans=numOfChannels, Samples=numOfSamples, dropoutRate=dropoutRate, kernLength=kernelLength, F1=EEGNet_F1, D=2, F2=EEGNet_F2, EnK=False, dropoutType='Dropout') elif type == "CRNN": model = CRNN(nb_classes=numOfClasses, Chans=numOfChannels, Samples=numOfSamples, dropoutRate=dropoutRate, kernLength=kernelLength, F1=EEGNet_F1, D=2, F2=EEGNet_F2, EnK=False, dropoutType='Dropout') else: print("Error: no such model exist") # Selecting the class f1_avg, pos_label, loss_type, class_weights = getClassInfo( numOfClasses) # extract raw data. scale by scaleFactor due to scaling sensitivity in deep learning X = data * scaleFactor print(X.shape) X = np.reshape( X, (numOfTrials, numOfChannels, numOfSamples)) # format should be in (trials, channels, samples) y = np.asarray(labels) Y = y.reshape(-1) # convert data to NCHW (trials, kernels, channels, samples) format. Data X = X.reshape(numOfTrials, numOfKernels, numOfChannels, numOfSamples) print(model.summary()) model.compile(loss=loss_type, optimizer='adam', metrics=['accuracy']) seed = 7 # Fix number CNNacc = [] CNNmse = [] CNNf1 = [] CNNct = [] X_train, X_test, Y_train, Y_test = getTrainTestVal(X, Y, testSize=0.2) Y_train = oneHot(Y_train, numOfClasses, True) Y_test = oneHot(Y_test, numOfClasses, True) # fitting existing model # model.fit(X_train, Y_train, validation_data=(X_val, Y_val), # batch_size=BS, epochs=numOfEpochs, verbose=2, class_weight=class_weights, # callbacks=checkpoint) # model.fit(X_train, Y_train, # batch_size=BS, epochs=numOfEpochs, verbose=2, class_weight=class_weights, # callbacks=checkpoint, validation_split = 0.20) model.fit(X_train, Y_train, batch_size=BS, epochs=numOfEpochs, verbose=2, class_weight=class_weights, callbacks=checkpoint) tic = time.clock() predicted = model.predict(X_test) toc = time.clock() # predicted= predicted computation_time = toc - tic predicted = oneHot(predicted.argmax(axis=-1), numOfClasses, False) mse,mae,co_kap_sco,acc,avg_pre_sco,precision,recall,\ f1_sc=getPerformanceMetricsDL(numOfClasses, pos_label, f1_avg, Y_test, predicted) # Grad-Cam method with Test Data predicted_class = predicted.argmax(axis=-1) camTest, heatmapTest = grad_cam( model, X_test[2, :, :, :].reshape(1, 1, numOfChannels, numOfSamples), predicted_class[2], "en_k_layer", numOfClasses) cv2.imwrite(SaveMatFileName + 'Test.png', camTest) # Grad-Cam method with Test Data predicted_class = Y_train.argmax(axis=-1) camTrain, heatmapTrain = grad_cam( model, X_train[2, :, :, :].reshape(1, 1, numOfChannels, numOfSamples), predicted_class[2], "en_k_layer", numOfClasses) cv2.imwrite(SaveMatFileName + 'Train.png', camTrain) print('acc, f1 score, coh kappa is ', acc, ' ', f1_sc, ' ', co_kap_sco) ######################################################### # For classifiers # reshape back to (trials, channels, samples) X = X.reshape(numOfTrials, numOfChannels, numOfSamples) # convert labels to one-hot encodings. Y = oneHot(Y, numOfClasses, True) # Disabled the running basic classifiers # other_acc, other_mse, other_mae, other_avpc, \ # other_cks, other_pre, other_rec, other_f1, other_ct = Classifiers(X, Y.argmax(axis=-1),f1_avg,numOfClasses) other_acc, other_mse, other_mae, other_avpc, \ other_cks, other_pre, other_rec, other_f1, other_ct = [0.],[0.],[0.],[0.],[0.],[0.],[0.],[0.],[0.] other_acc.append(acc) other_mse.append(mse) other_mae.append(mse) other_avpc.append(avg_pre_sco) other_cks.append(co_kap_sco) other_pre.append(precision) other_rec.append(recall) other_f1.append(f1_sc) other_ct.append(computation_time) print("Classifier ACC for LogRef, LDA, L-SVM, RBF-SVM, NN, Proposed:", other_acc) print("Classifier MSE for LogRef, LDA, L-SVM, RBF-SVM, NN, Proposed :", other_mse) print( "Classifier f1 score for LogRef, LDA, L-SVM, RBF-SVM, NN, Proposed :", other_f1) sio.savemat( SaveMatFileName + '.mat', { "acc": other_acc, "mse": other_mse, "mae": other_mae, "avg_pre_recl": other_avpc, "cohen_kappa": other_cks, "precision": other_pre, "recall": other_rec, "f1": other_f1, "times_prediction": other_ct, "pre_labels": predicted.argmax(axis=-1), "true_labels": Y_test.argmax(axis=-1), "camTest": camTest, "camheatmapTest": heatmapTest, "camTrain": camTrain, "camheatmapTrain": heatmapTrain, "camData": X_train[2, :, :, :].reshape(1, 1, numOfChannels, numOfSamples), "camLabel": predicted_class })
HIDDEN_SIZE = 64 KERNEL_SIZE = (20, 5) STRIDE = (8, 2) GRU_NUM_LAYERS = 2 NUM_DIRS = 2 NUM_CLASSES = 2 kernel_x = KERNEL_SIZE[1] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') test_audio, sr = torchaudio.load(sys.argv[1]) test_audio = test_audio.squeeze() # Create models CRNN_model = CRNN(IN_SIZE, HIDDEN_SIZE, KERNEL_SIZE, STRIDE, GRU_NUM_LAYERS) attn_layer = AttnMech(HIDDEN_SIZE * NUM_DIRS) apply_attn = ApplyAttn(HIDDEN_SIZE * 2, NUM_CLASSES) # Load models checkpoint = torch.load('models/crnn_final', map_location=device) CRNN_model.load_state_dict(checkpoint['model_state_dict']) checkpoint = torch.load('models/attn_final', map_location=device) attn_layer.load_state_dict(checkpoint['model_state_dict']) checkpoint = torch.load('models/apply_attn_final', map_location=device) apply_attn.load_state_dict(checkpoint['model_state_dict']) # Create melspec melspec_test = torchaudio.transforms.MelSpectrogram( sample_rate=48000, n_mels=N_MELS ).to(device)
def main(args): # cuda check device = 'cuda' if torch.cuda.is_available() else 'cpu' # argument handling input_size = [int(x) for x in args.input_size.split('x')] # random seed random.seed(random.randint(1, 10000)) # for faster training cudnn.banchmark = True cudnn.fastest = True # train transformation transform = transforms.Compose( [Resize(size=(input_size[0], input_size[1])), ToTensor()]) # train dataset data = CrnnDataLoader(data_path=args.dataroot, mode="train", transform=transform) # model load nclass = data.cls_len() net = CRNN(nclass) # optimizer optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=5e-4) # loss_function -> CTCLoss criterion = nn.CTCLoss() # epoch best_acc = 0 epoch = 0 while epoch < args.epochs: data_loader = torch.utils.data.DataLoader(data, batch_size=args.batch_size, num_workers=4, shuffle=True) iterator = tqdm(data_loader) iter_count = 0 ''' TODO: CTC LOSS ''' for sample in iterator: optimizer.zero_grad() imgs = Variable(sample["img"]) labels = Variable(sample["seq"]).view(-1) label_lens = Variable(sample["seq_len"]).view(-1) if device == 'cuda': imgs = imgs.cuda() preds = net(imgs).cpu() pred_lens = Variable(torch.Tensor(preds.size(0)).int()) print("preds:", preds.shape) print("labels:", labels.shape) print("pred_lens", pred_lens.shape) print("label_lens", label_lens.shape) loss = criterion(preds, labels, pred_lens, label_lens) loss.backward() optimizer.step() status = "epoch: {}; loss: {}".format(epoch, loss.data[0]) epoch += 1
def train(args): @tf.function def train_step(x, y): with tf.GradientTape() as tape: y_pred = model(x["the_input"]) # loss = tf.reduce_mean(ctc_lambda_func((y_pred, x["the_labels"], x["input_length"].reshape((-1,1)), x["label_length"].reshape((-1,1))))) loss = tf.reduce_mean(ctc_lambda_func((y_pred, x["the_labels"], tf.reshape(x["input_length"], [-1, 1]), tf.reshape(x["label_length"], [-1, 1])))) # Compute gradients trainable_vars = model.trainable_variables gradients = tape.gradient(loss, trainable_vars) # Update weights model.optimizer.apply_gradients(zip(gradients, trainable_vars)) return loss epochs = 1000 iter_per_epoch = 100 #model, test_func = get_CResRNN(weights=os.path.join("OUTPUT_DIR", "exp1", "weights06.h5")) #model, test_func = get_CResRNN(weights=os.path.join("OUTPUT_DIR", "weights0995.h5")) #model.load_weights(os.path.join("OUTPUT_DIR", "exp1", "weights15.h5")) #model.load_weights(os.path.join("OUTPUT_DIR", "weights0995.h5")) model2, test_func = CRNN_model() train_generator = FakeImageGenerator(args).next_gen() model = CRNN(ALPHABET) model.build() model.summary() # model = tf.keras.load_model('checkpoints/checkpoint') model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001, clipnorm=5)) loss_train = [] for epoch in range(1, epochs): print(f"Start of epoch {epoch}") pb = Progbar(iter_per_epoch, stateful_metrics="loss") for iter in range(iter_per_epoch): x, y = next(train_generator) with tf.GradientTape() as tape: y_pred = model(x["the_input"]) # loss = tf.reduce_mean(ctc_lambda_func((y_pred, x["the_labels"], x["input_length"].reshape((-1,1)), x["label_length"].reshape((-1,1))))) loss = tf.reduce_mean(ctc_lambda_func((y_pred, x["the_labels"], tf.reshape(x["input_length"], [-1, 1]), tf.reshape(x["label_length"], [-1, 1])))) # Compute gradients trainable_vars = model.trainable_variables gradients = tape.gradient(loss, trainable_vars) # Update weights model.optimizer.apply_gradients(zip(gradients, trainable_vars)) values = [('loss', loss)] pb.add(1, values=values) if epoch % 5 == 0: model.save("checkpoints/base_crnn.h5") # print("test2") # x, y = next(train_generator) # model.fit(x, y) # print("test1") x, y = next(train_generator) print(model(x["the_input"])) """
class Pipeline: def __init__(self, save_path): tf.reset_default_graph() self.save_path = save_path self.sess = tf.Session() if not os.path.exists(self.save_path): os.makedirs(self.save_path) def load_image(self, image, label): image = tf.image.convert_image_dtype(image, tf.float32) image = tf.image.resize_images(image, [self.img_dimens[0], self.img_dimens[1]]) image = tf.image.per_image_standardization(image) return image, label def load_data(self, img_dimension=(5,5), n_channels=3): self.x = tf.placeholder('float32', shape=[None, img_dimension[0], img_dimension[1], 3], name='input_images') self.y = tf.placeholder('int32', shape=[None, 1], name='counts_images') self.img_dimens = img_dimension self.n_channels = n_channels n_process = int(multiprocessing.cpu_count()/2) self.dataset_img = tf.data.Dataset.from_tensor_slices((self.x, self.y)) self.dataset_img = self.dataset_img.map(self.load_image, num_parallel_calls=n_process) def create_batches(self, batch_size=32): batches = self.dataset_img.batch(batch_size) batches = batches.prefetch(buffer_size=1) self.iterator = batches.make_initializable_iterator() self.images, self.counts = self.iterator.get_next() def construct_model(self, model_name='r1', lr=1e-6): self.model_name = model_name tf.summary.image('image_angle_0', self.images, 1) with open(self.save_path + '/setup.txt', 'a') as self.out: self.out.write('Architecture: ' + str(model_name)+ '\n') self.out.write('number of channels: ' + str(self.n_channels) + '\n') self.out.write('img dimensionality: ' + str(self.img_dimens) + '\n') if model_name == 'r3': self.model = Regressor_3(self.images, self.counts, lr=lr) if model_name == 'alexnet': self.model = AlexNet(self.images, self.counts, lr=0.003) if model_name == 'lstm': self.model = CRNN(self.images, self.counts, lr=0.003) self.loss = self.model.loss() tf.summary.scalar("loss", self.loss) with tf.name_scope('train'): self.train_step = tf.train.AdamOptimizer(lr).minimize(self.loss) tf.add_to_collection(name='saved', value=self.loss) tf.add_to_collection(name='saved', value=self.model.pred_counts) if self.model_name == 'lstm': tf.add_to_collection(name='saved', value=self.reconstruction) tf.add_to_collection(name='placeholder', value=self.x) tf.add_to_collection(name='placeholder', value=self.y) tf.add_to_collection(name='placeholder', value=self.images) tf.add_to_collection(name='placeholder', value=self.counts) tf.add_to_collection(name='placeholder', value=self.model.keep_prob) tf.add_to_collection(name='placeholder', value=self.model.is_training) tf.add_to_collection(name='placeholder', value=self.iterator.initializer) self.summaries = tf.summary.merge_all() self.saver = tf.train.Saver() self.writer = tf.summary.FileWriter(self.save_path+'/logs/train') self.writer_test = tf.summary.FileWriter(self.save_path+'/logs/test') def train(self, x_train, y_train, keep_prob=0.5): epoch_train_loss = [] self.sess.run(self.iterator.initializer, feed_dict={self.x: x_train, self.y: y_train}) try: while True: train_loss,_,_,_,sm = self.sess.run([self.loss, self.images, self.counts, self.train_step, self.summaries], feed_dict={self.model.keep_prob: keep_prob, self.model.is_training: True}) epoch_train_loss.append(train_loss) self.writer.add_summary(sm, self.it) self.it += 1 except tf.errors.OutOfRangeError: pass return np.mean(epoch_train_loss) def validation(self, x_val, y_val): epoch_val_loss = [] self.sess.run(self.iterator.initializer, feed_dict={self.x: x_val, self.y: y_val}) try: while True: #Aqui no esta reutilizando los batches val_loss,_,_,sm = self.sess.run([self.loss, self.images, self.counts, self.summaries], feed_dict={self.model.keep_prob: 1, self.model.is_training: False}) epoch_val_loss.append(val_loss) self.writer_test.add_summary(sm, self.it) self.it += 1 except tf.errors.OutOfRangeError: pass return np.mean(epoch_val_loss) def test(self, x_test, y_test): epoch_test_loss = [] self.sess.run(self.iterator.initializer, feed_dict={self.x: x_test, self.y: y_test}) try: while True: #Aqui no esta reutilizando los batches test_loss,_,_= self.sess.run([self.loss, self.images, self.counts], feed_dict={self.model.keep_prob: 1, self.model.is_training: False}) epoch_test_loss.append(test_loss) except tf.errors.OutOfRangeError: pass with open(self.save_path + '/setup.txt', 'a') as self.out: self.out.write('best model found in iter: ' + str(self.best_model_epoch) + '\n') return np.mean(epoch_test_loss) def fit(self, x_train, y_train, x_val, y_val, n_epochs=10, stop_step=20, keep_prob=0.5): # init variables self.sess.run(tf.local_variables_initializer()) self.sess.run(tf.global_variables_initializer()) self.writer.add_graph(self.sess.graph) # Variable for early stopping best_loss = math.inf nochanges = 0 # count to break the train # GLobal train iterations self.it = 0 self.best_model_epoch = n_epochs for epoch in range(n_epochs): train_loss = self.train(x_train, y_train, keep_prob) if epoch % 2 == 0: val_loss = self.validation(x_val, y_val) print('Epoch: {0} Train Loss: {1} Val Loss: {2}'.format(epoch, train_loss, val_loss)) if val_loss < best_loss: print('saving best model on epoch {0}'.format(epoch)) best_loss = val_loss nochanges = 0 if os.path.exists(self.save_path+'/model/best_model'): shutil.rmtree(dir) self.best_model_epoch = epoch self.saver.save(self.sess, self.save_path+'/model/best_model') else: nochanges += 1 if nochanges == stop_step: print('Early stopping at epoch: {}'.format(self.best_model_epoch)) break
converter = utils.strLabelConverter(opt.alphabet) criterion = torch.nn.CTCLoss() # custom weights initialization called on crnn def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) crnn = crnn.CRNN(opt.imgH, nc, nclass, opt.nh) crnn.apply(weights_init) if opt.pretrained != '': print('loading pretrained model from %s' % opt.pretrained) crnn.load_state_dict(torch.load(opt.pretrained)) print(crnn) image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH) text = torch.IntTensor(opt.batchSize * 5) length = torch.IntTensor(opt.batchSize) if opt.cuda: crnn.cuda() crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu)) image = image.cuda() criterion = criterion.cuda()
def main(): args = hyperparameters() train_path = os.path.join(args.path, 'train') test_path = os.path.join(args.path, 'test') # gpu or cpu 설정 device = torch.device( f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu') # train dataset load train_dataset = CRNN_dataset(path=train_path, w=args.img_width, h=args.img_height) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) # test dataset load test_dataset = CRNN_dataset(path=test_path, w=args.img_width, h=args.img_height) test_dataloader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True) # model 정의 model = CRNN( nc=1, nclass=37, nh=256, imgH=args.img_height) #nc =1 ,nclass = 36, nh = 100, #args.img_height # loss 정의 criterion = nn.CTCLoss() if args.optim == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.5, 0.999)) elif args.optim == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=args.lr) else: assert False, "옵티마이저를 다시 입력해주세요. :(" model = model.to(device) best_test_loss = 100000000 for i in range(args.epochs): print('epochs: ', i) print("<----training---->") model.train() for inputs, targets in tqdm(train_dataloader): # ---?--- # inputs의 dimension을 (batch, channel, h, w)로 바꿔주세요. hint: pytorch tensor에 제공되는 함수 사용 inputs = inputs.permute(0, 1, 3, 2) batch_size = inputs.size(0) inputs = inputs.to(device) target_text, target_length = targets target_text, target_length = target_text.to( device), target_length.to(device) preds = model(inputs) preds = preds.log_softmax(2) preds_length = Variable( torch.IntTensor([preds.size(0)] * batch_size)) """ CTCLoss의 설명과 해당 로스의 input에 대해 설명해주세요. 학습데이터에 클래스 라벨만 순서대로 있고 각 클래스의 위치는 어디있는지 모르는 unsegmented 시퀀스 데이터의 학습을 위해서 사용하는 알고리즘 ocr(광학 문자 인식)이나 음성 인식등에 널리 사용된다 input: 예측값, 정답값, 예측 시퀀스의 길이, 정답 시퀀스의 길이 """ loss = criterion(preds, target_text, preds_length, target_length) / batch_size optimizer.zero_grad() loss.backward() optimizer.step() print("<----evaluation---->") """ model.train(), model.eval()의 차이에 대해 설명해주세요. .eval()을 하는 이유가 무엇일까요? 모델을 학습할 때 train/eval에 맞게 모델을 변경시킨다 Dropout이나 batchNormalization을 쓰는 모델은 학습시킬 때와 평가할 때 구조/역할이 다르기 때문이다. """ model.eval() loss = 0.0 for inputs, targets in tqdm(test_dataloader): inputs = inputs.permute(0, 1, 3, 2) batch_size = inputs.size(0) inputs = inputs.to(device) target_text, target_length = targets target_text, target_length = target_text.to( device), target_length.to(device) preds = model(inputs) preds = preds.log_softmax(2) preds_length = Variable( torch.IntTensor([preds.size(0)] * batch_size)) loss += criterion(preds, target_text, preds_length, target_length) / batch_size print("test loss: ", loss / len(test_dataloader)) if loss < best_test_loss: # loss가 bset_test_loss보다 작다면 지금의 loss가 best loss가 되겠죠? best_test_loss = loss.clone() # args.savepath을 이용하여 best model 저장하기 torch.save(model.state_dict(), args.savepath) print("best model 저장 성공")
def main(): args = hyperparameters() train_path = os.path.join(args.path, 'train') test_path = os.path.join(args.path, 'test') # gpu or cpu 설정 device = torch.device( f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu') # train dataset load train_dataset = CRNN_dataset(path=train_path, w=args.img_width, h=args.img_height) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) # test dataset load test_dataset = CRNN_dataset(path=test_path, w=args.img_width, h=args.img_height) test_dataloader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True) # model 정의 model = CRNN(args.img_height, 1, 37, 256) # nc=1, nclass=37, nh=256 # loss 정의 criterion = nn.CTCLoss() if args.optim == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.5, 0.999)) elif args.optim == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=args.lr) else: assert False, "옵티마이저를 다시 입력해주세요. :(" model = model.to(device) best_test_loss = 100000000 for i in range(args.epochs): print('epochs: ', i) print("<----training---->") model.train() for inputs, targets in tqdm(train_dataloader): inputs = inputs.permute( 0, 1, 3, 2 ) # inputs의 dimension을 (batch, channel, h, w)로 바꿔주세요. hint: pytorch tensor에 제공되는 함수 사용 batch_size = inputs.size(0) inputs = inputs.to(device) target_text, target_length = targets target_text, target_length = target_text.to( device), target_length.to(device) preds = model(inputs) preds = F.log_softmax(preds, dim=-1) preds_length = Variable( torch.IntTensor([preds.size(0)] * batch_size)) """ CTCLoss의 설명과 해당 로스의 input에 대해 설명해주세요. CTC(Connectionist Temporal Classification)이란, 입력 프레임 시퀀스와 타겟 시퀀스 간에 명시적으로 할당해주지 않아도 모델을 학습할 수 있는 기법을 말한다. CRNN을 살펴보면, 입력 이미지 feature vector sequence의 길이는 가변적이고 실제 단어의 글자수와도 맞지 않는다. 기존의 CNN은 라벨 할당으로 학습한 것과 달리, 입력 sequence가 주어졌을 때 각 시점별로 본래 label sequence로 향하는 모든 가능한 경로를 고려하여 우도를 구하여 학습한다. 연산량의 감소를 위해 dynamic programming (앞에서 계산한 경로의 우도를 기억해두는 방법) 알고리즘을 활용한다는 특징이 있고, CTC layer는 RNN 출력 확률 벡터 sequence를 입력받아 loss를 계산하여 grandient를 통해 학습을 가능하게 만든다. loss의 input은 RNN layer의 출력 확률 벡터 sequence라고 할 수 있다. """ loss = criterion(preds, target_text, preds_length, target_length) / batch_size optimizer.zero_grad() loss.backward() optimizer.step() print("\n<----evaluation---->") """ model.train(), model.eval()의 차이에 대해 설명해주세요. .eval()을 하는 이유가 무엇일까요? train은 말 그대로 학습 모드 , eval은 test 모드를 의미한다. 학습이 끝났으니 test 모드에 들어가자~! 하고 모델에게 알려주는 것이다. """ model.eval() loss = 0.0 for inputs, targets in tqdm(test_dataloader): inputs = inputs.permute(0, 1, 3, 2) batch_size = inputs.size(0) inputs = inputs.to(device) target_text, target_length = targets target_text, target_length = target_text.to( device), target_length.to(device) preds = model(inputs) preds = F.log_softmax(preds, dim=-1) preds_length = Variable( torch.IntTensor([preds.size(0)] * batch_size)) loss += criterion( preds, target_text, preds_length, target_length ) / batch_size # 학습이 아니라 test loss이니 밑에서 찍으려면 이 한 줄이 더 있어야 한다. print("\ntest loss: ", loss) if loss < best_test_loss: # loss가 bset_test_loss보다 작다면 지금의 loss가 best loss가 되겠죠? best_test_loss = loss # args.savepath을 이용하여 best model 저장하기 torch.save(model.state_dict(), args.savepath) print("best model 저장 성공")
def main(): parser = argparse.ArgumentParser( description='Spoken Language Idenfication') parser.add_argument('--hidden_size', type=int, default=512, help='hidden size of model (default: 256)') parser.add_argument('--layer_size', type=int, default=3, help='number of layers of model (default: 3)') parser.add_argument('--n_class', type=int, default=2, help='number of classes of data (default: 7)') parser.add_argument('--dropout', type=float, default=0.2, help='dropout rate in training (default: 0.2') parser.add_argument('--bidirectional', default=True, action='store_true', help='use bidirectional RNN (default: False') parser.add_argument('--batch_size', type=int, default=2, help='batch size in training (default: 32') parser.add_argument( '--workers', type=int, default=4, help='number of workers in dataset loader (default: 4)') parser.add_argument('--max_epochs', type=int, default=10, help='number of max epochs in training (default: 10)') parser.add_argument('--lr', type=float, default=1e-04, help='learning rate (default: 0.0001)') parser.add_argument('--no_cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') parser.add_argument('--save_name', type=str, default='model', help='the name of model') parser.add_argument('--mode', type=str, default='train') parser.add_argument('--nn_type', type=str, default='crnn', help='type of neural networks') args = parser.parse_args() random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) args.cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device('cuda' if args.cuda else 'cpu') feature_size = N_FFT / 2 + 1 cnn = CNN.CNN(feature_size) rnn = RNN.RNN(cnn.feature_size, args.hidden_size, args.n_class, input_dropout_p=args.dropout, dropout_p=args.dropout, n_layers=args.layer_size, bidirectional=args.bidirectional, rnn_cell='gru', variable_lengths=False) model = CRNN.CRNN(cnn, rnn) model.flatten_parameters() model = nn.DataParallel(model).to(device) optimizer = optim.Adam(model.module.parameters(), lr=args.lr) criterion = nn.CrossEntropyLoss(reduction='sum').to(device) if args.mode != 'train': return download_data() kor_db_list = [] search('dataset/train/train_data', kor_db_list) train_wav_paths = np.loadtxt("dataset/TRAIN_list.csv", delimiter=',', dtype=np.unicode) valid_wav_paths = np.loadtxt("dataset/TEST_developmentset_list.csv", delimiter=',', dtype=np.unicode) test_wav_paths = np.loadtxt("dataset/TEST_coreset_list.csv", delimiter=',', dtype=np.unicode) train_wav_paths = list( map(lambda x: "dataset/TIMIT/{}.WAV".format(x), train_wav_paths)) valid_wav_paths = list( map(lambda x: "dataset/TIMIT/{}.WAV".format(x), valid_wav_paths)) test_wav_paths = list( map(lambda x: "dataset/TIMIT/{}.WAV".format(x), test_wav_paths)) min_loss = 100000 begin_epoch = 0 loss_acc = [[], [], [], []] train_batch_num, train_dataset_list, valid_dataset, test_dataset = \ split_dataset(args, train_wav_paths, valid_wav_paths, test_wav_paths, kor_db_list) logger.info('start') train_begin = time.time() for epoch in range(begin_epoch, args.max_epochs): train_queue = queue.Queue(args.workers * 2) train_loader = MultiLoader(train_dataset_list, train_queue, args.batch_size, args.workers, args.nn_type) train_loader.start() train_loss, train_acc = train(model, train_batch_num, train_queue, criterion, optimizer, device, train_begin, args.workers, 10) logger.info('Epoch %d (Training) Loss %0.4f Acc %0.4f' % (epoch, train_loss, train_acc)) train_loader.join() loss_acc[0].append(train_loss) loss_acc[1].append(train_acc) valid_queue = queue.Queue(args.workers * 2) valid_loader = BaseDataLoader(valid_dataset, valid_queue, args.batch_size, 0, args.nn_type) valid_loader.start() eval_loss, eval_acc = evaluate(model, valid_loader, valid_queue, criterion, device) logger.info('Epoch %d (Evaluate) Loss %0.4f Acc %0.4f' % (epoch, eval_loss, eval_acc)) valid_loader.join() loss_acc[2].append(eval_loss) loss_acc[3].append(eval_acc) best_model = (eval_loss < min_loss) if best_model: min_loss = eval_loss torch.save(model.state_dict(), './save_model/best_model.pt') save_epoch = epoch model.load_state_dict(torch.load('./save_model/best_model.pt')) test_queue = queue.Queue(args.workers * 2) test_loader = BaseDataLoader(test_dataset, test_queue, args.batch_size, 0, args.nn_type) test_loader.start() confusion_matrix = torch.zeros((args.n_class, args.n_class)) test_loss, test_acc = evaluate(model, test_loader, test_queue, criterion, device, confusion_matrix) logger.info('Epoch %d (Test) Loss %0.4f Acc %0.4f' % (save_epoch, test_loss, test_acc)) test_loader.join() save_data(loss_acc, test_loss, test_acc, confusion_matrix.to('cpu').numpy()) plot_data(loss_acc, test_loss, test_acc) return 0
def train_net(args): manual_seed = 7 random.seed(manual_seed) np.random.seed(manual_seed) torch.manual_seed(manual_seed) checkpoint = args.checkpoint start_epoch = 0 best_loss = float('inf') writer = SummaryWriter() epochs_since_improvement = 0 # custom weights initialization called on crnn def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) # Initialize / load checkpoint if checkpoint is None: model = CRNN(imgH, nc, nclass, nh) model.apply(weights_init) # model = nn.DataParallel(model) if args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay) else: optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(args.beta1, 0.999)) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] logger = utils.get_logger() # Move to GPU, if available model = model.to(device) # Loss function criterion = nn.CTCLoss(reduction='mean').to(device) # Custom dataloaders train_dataset = data_gen.Ic2015Dataset('train') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=num_workers) test_dataset = data_gen.Ic2015Dataset('test') test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=num_workers) # Epochs for epoch in range(start_epoch, args.end_epoch): # One epoch's training train_loss, train_acc = train(train_loader=train_loader, model=model, criterion=criterion, optimizer=optimizer, epoch=epoch, logger=logger) effective_lr = utils.get_learning_rate(optimizer) print('\nCurrent effective learning rate: {}\n'.format(effective_lr)) writer.add_scalar('Learning_Rate', effective_lr, epoch) writer.add_scalar('Train_Loss', train_loss, epoch) writer.add_scalar('Train_Accuracy', train_acc, epoch) # One epoch's validation test_loss, test_acc = test(test_loader=test_loader, model=model, criterion=criterion, logger=logger) writer.add_scalar('Test_Loss', test_loss, epoch) writer.add_scalar('Test_Accuracy', test_acc, epoch) # Check if there was an improvement is_best = test_loss < best_loss best_loss = min(test_loss, best_loss) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint utils.save_checkpoint(epoch, epochs_since_improvement, model, optimizer, best_loss, is_best)
def train_net(args): manual_seed = 7 random.seed(manual_seed) np.random.seed(manual_seed) torch.manual_seed(manual_seed) checkpoint = args.checkpoint start_epoch = 0 best_loss = float('inf') writer = SummaryWriter() epochs_since_improvement = 0 # custom weights initialization called on crnn def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) # Initialize / load checkpoint if checkpoint is None: model = CRNN(imgH, num_channels, num_classes, num_hidden) model.apply(weights_init) # model = nn.DataParallel(model) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.98), eps=1e-09) # optimizer = CRNNOptimizer( # torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09), # args.k, # num_hidden, # args.warmup_steps) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] logger = utils.get_logger() # Move to GPU, if available model = model.to(device) # Loss function criterion = nn.CTCLoss(reduction='mean').to(device) # Custom dataloaders train_dataset = data_gen.MJSynthDataset('train') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=num_workers) valid_dataset = data_gen.MJSynthDataset('val') valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=num_workers) # Epochs for epoch in range(start_epoch, args.end_epoch): # One epoch's training train_loss, train_acc = train(train_loader=train_loader, model=model, criterion=criterion, optimizer=optimizer, epoch=epoch, logger=logger) writer.add_scalar('Train_Loss', train_loss, epoch) writer.add_scalar('Train_Accuracy', train_acc, epoch) # One epoch's validation valid_loss, valid_acc = valid(valid_loader=valid_loader, model=model, criterion=criterion, logger=logger) writer.add_scalar('Validation_Loss', valid_loss, epoch) writer.add_scalar('Validation_Accuracy', valid_acc, epoch) # Check if there was an improvement is_best = valid_loss < best_loss best_loss = min(valid_loss, best_loss) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint utils.save_checkpoint(epoch, epochs_since_improvement, model, optimizer, best_loss, is_best)
def main(opts): alphabet = '0123456789.' nclass = len(alphabet) + 1 model_name = 'crnn' net = CRNN(nclass) print("Using {0}".format(model_name)) if opts.cuda: net.cuda() learning_rate = opts.base_lr optimizer = torch.optim.Adam(net.parameters(), lr=opts.base_lr, weight_decay=weight_decay) if os.path.exists(opts.model): print('loading model from %s' % args.model) step_start, learning_rate = net_utils.load_net(args.model, net, optimizer) ## 数据集 converter = strLabelConverter(alphabet) dataset = ImgDataset( root='/home/yangna/deepblue/OCR/mech_demo2/dataset/imgs/image', csv_root='/home/yangna/deepblue/OCR/mech_demo2/dataset/imgs/train_list.txt', transform=None, target_transform=converter.encode ) ocrdataloader = torch.utils.data.DataLoader( dataset, batch_size=opts.batch_size, shuffle=True, collate_fn=own_collate ) step_start = 0 net.train() converter = strLabelConverter(alphabet) ctc_loss = CTCLoss() for step in range(step_start, opts.max_iters): try: data = next(data_iter) except: data_iter = iter(ocrdataloader) data = next(data_iter) im_data, gt_boxes, text = data im_data = im_data.cuda() try: loss= process_crnn(im_data, gt_boxes, text, net, ctc_loss, converter, training=True) net.zero_grad() optimizer.zero_grad() loss.backward() optimizer.step() except: import sys, traceback traceback.print_exc(file=sys.stdout) pass if step % disp_interval == 0: try: print('step:%d || loss %.4f' % (step, loss)) except: import sys, traceback traceback.print_exc(file=sys.stdout) pass if step > step_start and (step % batch_per_epoch == 0): save_name = os.path.join(opts.save_path, '{}_{}.h5'.format(model_name, step)) state = {'step': step, 'learning_rate': learning_rate, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict()} torch.save(state, save_name) print('save model: {}'.format(save_name))
def train_and_predict(x_train, y_train, x_val, y_val, x_test): """Train a neural network classifier and compute predictions. Args: x_train (np.ndarray): Training instances. y_train (np.ndarray): Training labels. x_val (np.ndarray): Validation instances. y_val (np.ndarray): Validation labels. x_test (np.ndarray): Test instances. Returns: The predictions of the classifier. """ _ensure_reproducibility() # Determine which device (GPU or CPU) to use device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # Convert data into PyTorch tensors x_train = torch.FloatTensor(x_train).transpose(1, 2) x_val = torch.FloatTensor(x_val).transpose(1, 2) x_test = torch.FloatTensor(x_test).transpose(1, 2) y_train = torch.FloatTensor(y_train) y_val = torch.FloatTensor(y_val) # Instantiate neural network n_classes = y_train.shape[-1] n_feats = x_train.shape[1] net = CRNN(n_classes, n_feats).to(device) # Use binary cross-entropy loss function criterion = BCELoss() # Use Adam optimization algorithm optimizer = Adam(net.parameters(), lr=0.01) # Use scheduler to decay learning rate regularly scheduler = StepLR(optimizer, step_size=2, gamma=0.9) # Use helper class to iterate over data in batches loader_train = DataLoader(TensorDataset(x_train, y_train), batch_size=128, shuffle=True) loader_val = DataLoader(TensorDataset(x_val, y_val), batch_size=512) loader_test = DataLoader(TensorDataset(x_test), batch_size=512) # Instantiate Logger to record training/validation performance # Configure to save the states of the top 3 models during validation logger = Logger(net, n_states=3) for epoch in range(15): # Train model using training set pbar = tqdm(loader_train) pbar.set_description('Epoch %d' % epoch) train(net.train(), criterion, optimizer, pbar, logger, device) # Evaluate model using validation set and monitor F1 score validate(net.eval(), criterion, loader_val, logger, device) logger.monitor('val_f1') # Print training and validation results logger.print_results() # Invoke learning rate scheduler scheduler.step() # Ensemble top 3 model predictions y_preds = [] for state_dict in logger.state_dicts: net.load_state_dict(state_dict) y_preds.append(_flatten(predict(net, loader_test, device))) return torch.stack(y_preds).mean(dim=0).cpu().numpy()
num_workers=1, pin_memory=True) ### Create melspecs # With augmentations melspec_train = nn.Sequential( torchaudio.transforms.MelSpectrogram(sample_rate=16000, n_mels=N_MELS), torchaudio.transforms.FrequencyMasking(freq_mask_param=15), torchaudio.transforms.TimeMasking(time_mask_param=35), ).to(device) # W/o augmentations melspec_val = torchaudio.transforms.MelSpectrogram( sample_rate=16000, n_mels=N_MELS).to(device) ### Create model CRNN_model = CRNN(IN_SIZE, HIDDEN_SIZE, KERNEL_SIZE, STRIDE, GRU_NUM_LAYERS) attn_layer = AttnMech(HIDDEN_SIZE * NUM_DIRS) apply_attn = ApplyAttn(HIDDEN_SIZE * 2, NUM_CLASSES) ### Download ready models # checkpoint = torch.load('crnn_final', map_location=device) # CRNN_model.load_state_dict(checkpoint['model_state_dict']) # checkpoint = torch.load('attn_final', map_location=device) # attn_layer.load_state_dict(checkpoint['model_state_dict']) # checkpoint = torch.load('apply_attn_final', map_location=device) # apply_attn.load_state_dict(checkpoint['model_state_dict']) full_model = FullModel(CRNN_model, attn_layer, apply_attn) print(full_model.to(device)) print(count_parameters(full_model)) #wandb.init()