def __init__(self,base_model_list=bagging_config.base_model_list): self.base_model_list = base_model_list.split("-") self.num_random=len(self.base_model_list) self.dataDir = general_config.data_dir + "/random" createRandomData(self.num_random) self.models = [] self.models_name=[] for i in range(self.num_random): base_model = self.base_model_list[i] assert base_model in ["1", "2", "3", "4","5"], "Invalid base model type!" if base_model == "1": model = TextCNN() elif base_model == "2": model = TextRNN() elif base_model == "3": model = CRNN() elif base_model=="4": model = RCNN() else: model=HAN() self.models.append(model) self.models_name.append(modelDict[base_model]) self.logDir = ensure_dir_exist(general_config.log_dir + "/bagging/" + "-".join(self.models_name)) self.saveDir = ensure_dir_exist(general_config.save_dir + "/bagging/" + "-".join(self.models_name)) self.logger=my_logger(self.logDir+"/log.txt")
def test_model(state, reference_tsv_path, reduced_number_of_data=None, strore_predicitions_fname=None): dataset = DatasetDcase2019Task4(os.path.join(cfg.workspace), base_feature_dir=os.path.join( cfg.workspace, "dataset", "features"), save_log_feature=False) crnn_kwargs = state["model"]["kwargs"] crnn = CRNN(**crnn_kwargs) crnn.load(parameters=state["model"]["state_dict"]) LOG.info("Model loaded at epoch: {}".format(state["epoch"])) pooling_time_ratio = state["pooling_time_ratio"] crnn.load(parameters=state["model"]["state_dict"]) scaler = Scaler() scaler.load_state_dict(state["scaler"]) classes = cfg.classes many_hot_encoder = ManyHotEncoder.load_state_dict( state["many_hot_encoder"]) crnn = crnn.eval() [crnn] = to_cuda_if_available([crnn]) transforms_valid = get_transforms(cfg.max_frames, scaler=scaler) LOG.info(reference_tsv_path) df = dataset.initialize_and_get_df(reference_tsv_path, reduced_number_of_data) strong_dataload = DataLoadDf(df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, transform=transforms_valid) predictions = get_predictions(crnn, strong_dataload, many_hot_encoder.decode_strong, pooling_time_ratio, save_predictions=strore_predicitions_fname) compute_strong_metrics(predictions, df) weak_dataload = DataLoadDf(df, dataset.get_feature_file, many_hot_encoder.encode_weak, transform=transforms_valid) weak_metric = get_f_measure_by_class( crnn, len(classes), DataLoader(weak_dataload, batch_size=cfg.batch_size)) LOG.info("Weak F1-score per class: \n {}".format( pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))
def _load_crnn(state, model_name="model"): crnn_args = state[model_name]["args"] crnn_kwargs = state[model_name]["kwargs"] crnn = CRNN(*crnn_args, **crnn_kwargs) crnn.load_state_dict(state[model_name]["state_dict"]) crnn.eval() crnn = to_cuda_if_available(crnn) logger.info("Model loaded at epoch: {}".format(state["epoch"])) logger.info(crnn) return crnn
def main(): ctpn = CTPN(cfg) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) ctpn.load_ckpt(sess) # ctpn load if torch.cuda.is_available() and cfg.ALL_GPU: crnn = CRNN(cfg, 32, 1, len(cfg.KEYS) + 1, 256, 1).cuda() else: crnn = CRNN(cfg, 32, 1, len(cfg.KEYS) + 1, 256, 1).cpu() crnn.eval() crnn.load_state_dict(torch.load(cfg.CRNN_MODEL)) # crnn load if cfg.ADJUST_ANGLE: angle_detector = VGG(cfg) # vgg load angle_detector.load_weights() data = DataLoader(cfg) text = TextGenerator(cfg) # image_path = raw_input("please input your image path and name:") # get image path image_path = str('/home/jwm/Desktop/OCR-standard/images/xuanye.jpg') img = data.load_data(image_path) t = time.time() if cfg.ADJUST_ANGLE: img = rotate(img, angle_detector) # rotate image if necessary # img = cv2.resize(img, (2000,3000),interpolation=cv2.INTER_CUBIC) text_recs, detected_img, img = detect(img, data, ctpn, sess) # detect text results = recognize(img, text_recs, crnn, text, adjust=True) # recognize text print("It takes time:{}s".format(time.time() - t)) for key in results: print(results[key][1])
def __init__(self, base_model_list=stacking_config.base_model_list, num_cv=stacking_config.num_cv): self.base_model_list = base_model_list.split("-") self.num_models = len(self.base_model_list) self.num_cv = num_cv self.dataDir = general_config.data_dir + "/cv/" + str(self.num_cv) if not os.path.exists(self.dataDir): createCrossValidationData(self.num_cv) self.models = [] self.models_name = [] for n in range(self.num_models): base_model = self.base_model_list[n] assert base_model in ["1", "2", "3", "4", "5"], "Invalid base model type!" if base_model == "1": model = TextCNN() elif base_model == "2": model = TextRNN() elif base_model == "3": model = CRNN() elif base_model == "4": model = RCNN() else: model = HAN() self.models.append(model) self.models_name.append(modelDict[base_model]) self.logDir = ensure_dir_exist(general_config.log_dir + "/stacking/" + "-".join(self.models_name) + "/" + str(self.num_cv)) self.saveDir = ensure_dir_exist(general_config.save_dir + "/stacking/" + "-".join(self.models_name) + "/" + str(self.num_cv)) self.classifier = LogisticRegression() self.logger = my_logger(self.logDir + "/log.txt")
weak_mask = slice( batch_sizes[0]) # Assume weak data is always the first one concat_dataset = ConcatDataset(list_dataset) sampler = MultiStreamBatchSampler(concat_dataset, batch_sizes=batch_sizes) training_loader = DataLoader(concat_dataset, batch_sampler=sampler, num_workers=cfg.num_workers) valid_synth_loader = DataLoader(valid_synth_data, batch_size=cfg.batch_size, num_workers=cfg.num_workers) # ############## # Model # ############## crnn = CRNN(**crnn_kwargs) pytorch_total_params = sum(p.numel() for p in crnn.parameters() if p.requires_grad) logger.info(crnn) logger.info( "number of parameters in the model: {}".format(pytorch_total_params)) crnn.apply(weights_init) crnn_ema = CRNN(**crnn_kwargs) crnn_ema.apply(weights_init) for param in crnn_ema.parameters(): param.detach_() optim_kwargs = {"lr": cfg.default_learning_rate, "betas": (0.9, 0.999)} optim = torch.optim.Adam( filter(lambda p: p.requires_grad, crnn.parameters()), **optim_kwargs)
index2char, char2index = utils.get_dict( data_cfg['dict_file']) #获取char-index对应字典 os.environ["CUDA_VISIBLE_DEVICES"] = opt.gpu torch.backends.cudnn.benchmark = True nclass = len(index2char) #类别总数 print('the dice length is {}'.format(len(index2char))) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") train_loader = dataloader.get_train_loader(data_cfg, char2index) ##获取数据loader if model_cfg['method'] == 'CRNN': criterion = nn.CTCLoss(zero_infinity=True) model = CRNN(nclass, model_cfg).to(device) elif model_cfg['method'] == 'SAR': criterion = torch.nn.CrossEntropyLoss(ignore_index=char2index['PAD']) model = SAR(nclass, model_cfg).to(device) if not model_cfg['load_model_path'] == '': model.load_state_dict(torch.load(model_cfg['load_model_path'])) optimizer = optim.Adam(model.parameters(), lr=train_cfg['learning_rate']) def adjust_learning_rate(optimizer, global_step, init_lr=train_cfg['learning_rate'], decay_rate=train_cfg['decay_rate'],
def test_model(state, reduced_number_of_data, strore_predicitions_fname=None): crnn_kwargs = state["model"]["kwargs"] crnn = CRNN(**crnn_kwargs) crnn.load(parameters=state["model"]["state_dict"]) LOG.info("Model loaded at epoch: {}".format(state["epoch"])) pooling_time_ratio = state["pooling_time_ratio"] crnn.load(parameters=state["model"]["state_dict"]) scaler = Scaler() scaler.load_state_dict(state["scaler"]) classes = cfg.classes many_hot_encoder = ManyHotEncoder.load_state_dict( state["many_hot_encoder"]) # ############## # Validation # ############## crnn = crnn.eval() [crnn] = to_cuda_if_available([crnn]) transforms_valid = get_transforms(cfg.max_frames, scaler=scaler) # # 2018 # LOG.info("Eval 2018") # eval_2018_df = dataset.initialize_and_get_df(cfg.eval2018, reduced_number_of_data) # # Strong # eval_2018_strong = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, # transform=transforms_valid) # predictions = get_predictions(crnn, eval_2018_strong, many_hot_encoder.decode_strong) # compute_strong_metrics(predictions, eval_2018_df, pooling_time_ratio) # # Weak # eval_2018_weak = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_weak, # transform=transforms_valid) # weak_metric = get_f_measure_by_class(crnn, len(classes), DataLoader(eval_2018_weak, batch_size=cfg.batch_size)) # LOG.info("Weak F1-score per class: \n {}".format(pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) # LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric))) # Validation 2019 # LOG.info("Validation 2019 (original code)") # b_dataset = B_DatasetDcase2019Task4(cfg.workspace, # base_feature_dir=os.path.join(cfg.workspace, 'dataset', 'features'), # save_log_feature=False) # b_validation_df = b_dataset.initialize_and_get_df(cfg.validation, reduced_number_of_data) # b_validation_df.to_csv('old.csv') # b_validation_strong = B_DataLoadDf(b_validation_df, # b_dataset.get_feature_file, many_hot_encoder.encode_strong_df, # transform=transforms_valid) # predictions2 = get_predictions(crnn, b_validation_strong, many_hot_encoder.decode_strong, # save_predictions=strore_predicitions_fname) # compute_strong_metrics(predictions2, b_validation_df, pooling_time_ratio) # b_validation_weak = B_DataLoadDf(b_validation_df, b_dataset.get_feature_file, many_hot_encoder.encode_weak, # transform=transforms_valid) # weak_metric = get_f_measure_by_class(crnn, len(classes), DataLoader(b_validation_weak, batch_size=cfg.batch_size)) # LOG.info("Weak F1-score per class: \n {}".format(pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) # LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric))) # ============================================================================================ # ============================================================================================ # ============================================================================================ dataset = DatasetDcase2019Task4(feature_dir=cfg.feature_dir, local_path=cfg.workspace, exp_tag=cfg.exp_tag, save_log_feature=False) # Validation 2019 LOG.info("Validation 2019") validation_df = dataset.initialize_and_get_df(cfg.validation, reduced_number_of_data) validation_strong = DataLoadDf(validation_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, transform=transforms_valid) predictions = get_predictions(crnn, validation_strong, many_hot_encoder.decode_strong, save_predictions=strore_predicitions_fname) vdf = validation_df.copy() vdf.filename = vdf.filename.str.replace('.npy', '.wav') pdf = predictions.copy() pdf.filename = pdf.filename.str.replace('.npy', '.wav') compute_strong_metrics(pdf, vdf, pooling_time_ratio) validation_weak = DataLoadDf(validation_df, dataset.get_feature_file, many_hot_encoder.encode_weak, transform=transforms_valid) weak_metric = get_f_measure_by_class( crnn, len(classes), DataLoader(validation_weak, batch_size=cfg.batch_size)) LOG.info("Weak F1-score per class: \n {}".format( pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))
else: add_dir_path = "_synthetic_only" store_dir = os.path.join("stored_data", "simple_CRNN" + add_dir_path) saved_model_dir = os.path.join(store_dir, "model") saved_pred_dir = os.path.join(store_dir, "predictions") create_folder(store_dir) create_folder(saved_model_dir) create_folder(saved_pred_dir) # ############## # Model # ############## crnn_kwargs = cfg.crnn_kwargs crnn = CRNN(**crnn_kwargs) crnn.apply(weights_init) pooling_time_ratio = cfg.pooling_time_ratio LOG.info(crnn) # ############## # DATA # ############## dataset = DatasetDcase2019Task4(os.path.join(cfg.workspace), base_feature_dir=os.path.join( cfg.workspace, "dataset", "features"), save_log_feature=False) weak_df = dataset.initialize_and_get_df(cfg.weak, reduced_number_of_data) synthetic_df = dataset.initialize_and_get_df(cfg.synthetic,
transforms = get_transforms(cfg.max_frames, scaler, augment_type="noise") transforms_valid = get_transforms(cfg.max_frames, scaler=scaler) for i in range(len(list_dataset)): list_dataset[i].set_transform(transforms) validation_data.set_transform(transforms_valid) test_data.set_transform(transforms_valid) concat_dataset = ConcatDataset(list_dataset) sampler = MultiStreamBatchSampler(concat_dataset, batch_sizes=batch_sizes) training_data = DataLoader(concat_dataset, batch_sampler=sampler) # ############## # Model # ############## crnn_kwargs = cfg.crnn_kwargs crnn = CRNN(**crnn_kwargs) crnn_ema = CRNN(**crnn_kwargs) if path.exists(cfg.load_weights_fn): model_cfg = torch.load(cfg.load_weights_fn) crnn.load(parameters=model_cfg['model']['state_dict']) update_ema_variables(crnn, crnn_ema, 0.999, 0) else: crnn.apply(weights_init) crnn_ema.apply(weights_init) LOG.info(crnn) for param in crnn_ema.parameters(): param.detach_() optim_kwargs = {"lr": 0.001, "betas": (0.9, 0.999)}
transforms_valid = get_transforms(cfg.max_frames, scaler=scaler) valid_synth_data = DataLoadDf(valid_synth_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, transform=transforms_valid) valid_weak_data = DataLoadDf(valid_weak_df, dataset.get_feature_file, many_hot_encoder.encode_weak, transform=transforms_valid) # Eval 2018 eval_2018_df = dataset.initialize_and_get_df(cfg.eval2018, reduced_number_of_data) eval_2018 = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, transform=transforms_valid) # ############## # Model # ############## crnn_kwargs = cfg.crnn_kwargs crnn = CRNN(**crnn_kwargs) crnn_ema = CRNN(**crnn_kwargs) crnn.apply(weights_init) crnn_ema.apply(weights_init) LOG.info(crnn) for param in crnn_ema.parameters(): param.detach_() optim_kwargs = {"lr": 0.001, "betas": (0.9, 0.999)} optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, crnn.parameters()), **optim_kwargs) bce_loss = nn.BCELoss() state = { 'model': {"name": crnn.__class__.__name__,