def _load_crnn(state, model_name="model"): crnn_args = state[model_name]["args"] crnn_kwargs = state[model_name]["kwargs"] crnn = CRNN(*crnn_args, **crnn_kwargs) crnn.load_state_dict(state[model_name]["state_dict"]) crnn.eval() crnn = to_cuda_if_available(crnn) logger.info("Model loaded at epoch: {}".format(state["epoch"])) logger.info(crnn) return crnn
def test_model(state, reference_tsv_path, reduced_number_of_data=None, strore_predicitions_fname=None): dataset = DatasetDcase2019Task4(os.path.join(cfg.workspace), base_feature_dir=os.path.join( cfg.workspace, "dataset", "features"), save_log_feature=False) crnn_kwargs = state["model"]["kwargs"] crnn = CRNN(**crnn_kwargs) crnn.load(parameters=state["model"]["state_dict"]) LOG.info("Model loaded at epoch: {}".format(state["epoch"])) pooling_time_ratio = state["pooling_time_ratio"] crnn.load(parameters=state["model"]["state_dict"]) scaler = Scaler() scaler.load_state_dict(state["scaler"]) classes = cfg.classes many_hot_encoder = ManyHotEncoder.load_state_dict( state["many_hot_encoder"]) crnn = crnn.eval() [crnn] = to_cuda_if_available([crnn]) transforms_valid = get_transforms(cfg.max_frames, scaler=scaler) LOG.info(reference_tsv_path) df = dataset.initialize_and_get_df(reference_tsv_path, reduced_number_of_data) strong_dataload = DataLoadDf(df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, transform=transforms_valid) predictions = get_predictions(crnn, strong_dataload, many_hot_encoder.decode_strong, pooling_time_ratio, save_predictions=strore_predicitions_fname) compute_strong_metrics(predictions, df) weak_dataload = DataLoadDf(df, dataset.get_feature_file, many_hot_encoder.encode_weak, transform=transforms_valid) weak_metric = get_f_measure_by_class( crnn, len(classes), DataLoader(weak_dataload, batch_size=cfg.batch_size)) LOG.info("Weak F1-score per class: \n {}".format( pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))
def main(): ctpn = CTPN(cfg) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) ctpn.load_ckpt(sess) # ctpn load if torch.cuda.is_available() and cfg.ALL_GPU: crnn = CRNN(cfg, 32, 1, len(cfg.KEYS) + 1, 256, 1).cuda() else: crnn = CRNN(cfg, 32, 1, len(cfg.KEYS) + 1, 256, 1).cpu() crnn.eval() crnn.load_state_dict(torch.load(cfg.CRNN_MODEL)) # crnn load if cfg.ADJUST_ANGLE: angle_detector = VGG(cfg) # vgg load angle_detector.load_weights() data = DataLoader(cfg) text = TextGenerator(cfg) # image_path = raw_input("please input your image path and name:") # get image path image_path = str('/home/jwm/Desktop/OCR-standard/images/xuanye.jpg') img = data.load_data(image_path) t = time.time() if cfg.ADJUST_ANGLE: img = rotate(img, angle_detector) # rotate image if necessary # img = cv2.resize(img, (2000,3000),interpolation=cv2.INTER_CUBIC) text_recs, detected_img, img = detect(img, data, ctpn, sess) # detect text results = recognize(img, text_recs, crnn, text, adjust=True) # recognize text print("It takes time:{}s".format(time.time() - t)) for key in results: print(results[key][1])
for epoch in range(cfg.n_epoch): crnn.train() crnn_ema.train() crnn, crnn_ema = to_cuda_if_available(crnn, crnn_ema) loss_value, meters = train(training_loader, crnn, optim, epoch, ema_model=crnn_ema, mask_weak=weak_mask, mask_strong=strong_mask, adjust_lr=cfg.adjust_lr) # Validation crnn = crnn.eval() logger.info("\n ### Valid synthetic metric ### \n") predictions = get_predictions(crnn, valid_synth_loader, many_hot_encoder.decode_strong, pooling_time_ratio, median_window=median_window, save_predictions=None) # Validation with synthetic data (dropping feature_filename for psds) valid_synth = dfs["valid_synthetic"].drop("feature_filename", axis=1) valid_synth_f1, psds_m_f1 = compute_metrics(predictions, valid_synth, durations_synth) # --------------- # Save the trainable PCEN parameters (if any) if crnn.trainable_pcen is not None:
line_acc_arr.append(line_acc) min_dis += min_dis_ total_dis += total_dis_ if i % 100 == 0: batch100_loss = np.mean(loss_arr) print( f'\nepoch:{epoch} step:{i} loss:{batch100_loss} time:{time.time()-start_time} \ line_acc:{np.mean(line_acc_arr)} acc:{min_dis}/{total_dis} {(total_dis-min_dis)/total_dis if not total_dis == 0 else 0}' ) print(f'pred:{pred_text[0]}') print(f'true:{text[0]}') start_time = time.time() loss_arr = [] line_acc_arr = [] min_dis = 0 total_dis = 0 if i % train_cfg['save_step'] == 0 and i > 0: model.eval() test_loader_dict = dataloader.get_test_loader(data_cfg, char2index) Eval.valid(model, model_cfg, test_loader_dict, index2char, device) model.train() save_path = os.path.join(train_cfg['workdir'], 'checkpoints') utils.mkdir(save_path) torch.save(model.state_dict(), f'{save_path}/{model_cfg["method"]}_{epoch}_{i}.pth') if i % train_cfg['decay_steps'] == 0: adjust_learning_rate(optimizer, global_step)
def test_model(state, reduced_number_of_data, strore_predicitions_fname=None): crnn_kwargs = state["model"]["kwargs"] crnn = CRNN(**crnn_kwargs) crnn.load(parameters=state["model"]["state_dict"]) LOG.info("Model loaded at epoch: {}".format(state["epoch"])) pooling_time_ratio = state["pooling_time_ratio"] crnn.load(parameters=state["model"]["state_dict"]) scaler = Scaler() scaler.load_state_dict(state["scaler"]) classes = cfg.classes many_hot_encoder = ManyHotEncoder.load_state_dict( state["many_hot_encoder"]) # ############## # Validation # ############## crnn = crnn.eval() [crnn] = to_cuda_if_available([crnn]) transforms_valid = get_transforms(cfg.max_frames, scaler=scaler) # # 2018 # LOG.info("Eval 2018") # eval_2018_df = dataset.initialize_and_get_df(cfg.eval2018, reduced_number_of_data) # # Strong # eval_2018_strong = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, # transform=transforms_valid) # predictions = get_predictions(crnn, eval_2018_strong, many_hot_encoder.decode_strong) # compute_strong_metrics(predictions, eval_2018_df, pooling_time_ratio) # # Weak # eval_2018_weak = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_weak, # transform=transforms_valid) # weak_metric = get_f_measure_by_class(crnn, len(classes), DataLoader(eval_2018_weak, batch_size=cfg.batch_size)) # LOG.info("Weak F1-score per class: \n {}".format(pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) # LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric))) # Validation 2019 # LOG.info("Validation 2019 (original code)") # b_dataset = B_DatasetDcase2019Task4(cfg.workspace, # base_feature_dir=os.path.join(cfg.workspace, 'dataset', 'features'), # save_log_feature=False) # b_validation_df = b_dataset.initialize_and_get_df(cfg.validation, reduced_number_of_data) # b_validation_df.to_csv('old.csv') # b_validation_strong = B_DataLoadDf(b_validation_df, # b_dataset.get_feature_file, many_hot_encoder.encode_strong_df, # transform=transforms_valid) # predictions2 = get_predictions(crnn, b_validation_strong, many_hot_encoder.decode_strong, # save_predictions=strore_predicitions_fname) # compute_strong_metrics(predictions2, b_validation_df, pooling_time_ratio) # b_validation_weak = B_DataLoadDf(b_validation_df, b_dataset.get_feature_file, many_hot_encoder.encode_weak, # transform=transforms_valid) # weak_metric = get_f_measure_by_class(crnn, len(classes), DataLoader(b_validation_weak, batch_size=cfg.batch_size)) # LOG.info("Weak F1-score per class: \n {}".format(pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) # LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric))) # ============================================================================================ # ============================================================================================ # ============================================================================================ dataset = DatasetDcase2019Task4(feature_dir=cfg.feature_dir, local_path=cfg.workspace, exp_tag=cfg.exp_tag, save_log_feature=False) # Validation 2019 LOG.info("Validation 2019") validation_df = dataset.initialize_and_get_df(cfg.validation, reduced_number_of_data) validation_strong = DataLoadDf(validation_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, transform=transforms_valid) predictions = get_predictions(crnn, validation_strong, many_hot_encoder.decode_strong, save_predictions=strore_predicitions_fname) vdf = validation_df.copy() vdf.filename = vdf.filename.str.replace('.npy', '.wav') pdf = predictions.copy() pdf.filename = pdf.filename.str.replace('.npy', '.wav') compute_strong_metrics(pdf, vdf, pooling_time_ratio) validation_weak = DataLoadDf(validation_df, dataset.get_feature_file, many_hot_encoder.encode_weak, transform=transforms_valid) weak_metric = get_f_measure_by_class( crnn, len(classes), DataLoader(validation_weak, batch_size=cfg.batch_size)) LOG.info("Weak F1-score per class: \n {}".format( pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))