예제 #1
0
    def __init__(self,base_model_list=bagging_config.base_model_list):
        self.base_model_list = base_model_list.split("-")
        self.num_random=len(self.base_model_list)
        self.dataDir = general_config.data_dir + "/random"
        createRandomData(self.num_random)

        self.models = []
        self.models_name=[]
        for i in range(self.num_random):
            base_model = self.base_model_list[i]
            assert base_model in ["1", "2", "3", "4","5"], "Invalid base model type!"
            if base_model == "1":
                model = TextCNN()
            elif base_model == "2":
                model = TextRNN()
            elif base_model == "3":
                model = CRNN()
            elif base_model=="4":
                model = RCNN()
            else:
                model=HAN()
            self.models.append(model)
            self.models_name.append(modelDict[base_model])
        self.logDir = ensure_dir_exist(general_config.log_dir + "/bagging/" + "-".join(self.models_name))
        self.saveDir = ensure_dir_exist(general_config.save_dir + "/bagging/" + "-".join(self.models_name))
        self.logger=my_logger(self.logDir+"/log.txt")
예제 #2
0
def test_model(state,
               reference_tsv_path,
               reduced_number_of_data=None,
               strore_predicitions_fname=None):
    dataset = DatasetDcase2019Task4(os.path.join(cfg.workspace),
                                    base_feature_dir=os.path.join(
                                        cfg.workspace, "dataset", "features"),
                                    save_log_feature=False)

    crnn_kwargs = state["model"]["kwargs"]
    crnn = CRNN(**crnn_kwargs)
    crnn.load(parameters=state["model"]["state_dict"])
    LOG.info("Model loaded at epoch: {}".format(state["epoch"]))
    pooling_time_ratio = state["pooling_time_ratio"]

    crnn.load(parameters=state["model"]["state_dict"])
    scaler = Scaler()
    scaler.load_state_dict(state["scaler"])
    classes = cfg.classes
    many_hot_encoder = ManyHotEncoder.load_state_dict(
        state["many_hot_encoder"])

    crnn = crnn.eval()
    [crnn] = to_cuda_if_available([crnn])
    transforms_valid = get_transforms(cfg.max_frames, scaler=scaler)

    LOG.info(reference_tsv_path)
    df = dataset.initialize_and_get_df(reference_tsv_path,
                                       reduced_number_of_data)
    strong_dataload = DataLoadDf(df,
                                 dataset.get_feature_file,
                                 many_hot_encoder.encode_strong_df,
                                 transform=transforms_valid)

    predictions = get_predictions(crnn,
                                  strong_dataload,
                                  many_hot_encoder.decode_strong,
                                  pooling_time_ratio,
                                  save_predictions=strore_predicitions_fname)
    compute_strong_metrics(predictions, df)

    weak_dataload = DataLoadDf(df,
                               dataset.get_feature_file,
                               many_hot_encoder.encode_weak,
                               transform=transforms_valid)
    weak_metric = get_f_measure_by_class(
        crnn, len(classes), DataLoader(weak_dataload,
                                       batch_size=cfg.batch_size))
    LOG.info("Weak F1-score per class: \n {}".format(
        pd.DataFrame(weak_metric * 100, many_hot_encoder.labels)))
    LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))
예제 #3
0
def _load_crnn(state, model_name="model"):
    crnn_args = state[model_name]["args"]
    crnn_kwargs = state[model_name]["kwargs"]
    crnn = CRNN(*crnn_args, **crnn_kwargs)
    crnn.load_state_dict(state[model_name]["state_dict"])
    crnn.eval()
    crnn = to_cuda_if_available(crnn)
    logger.info("Model loaded at epoch: {}".format(state["epoch"]))
    logger.info(crnn)
    return crnn
예제 #4
0
def main():
    ctpn = CTPN(cfg)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    ctpn.load_ckpt(sess)  # ctpn load

    if torch.cuda.is_available() and cfg.ALL_GPU:
        crnn = CRNN(cfg, 32, 1, len(cfg.KEYS) + 1, 256, 1).cuda()
    else:
        crnn = CRNN(cfg, 32, 1, len(cfg.KEYS) + 1, 256, 1).cpu()
    crnn.eval()
    crnn.load_state_dict(torch.load(cfg.CRNN_MODEL))  # crnn load

    if cfg.ADJUST_ANGLE:
        angle_detector = VGG(cfg)  # vgg load
        angle_detector.load_weights()

    data = DataLoader(cfg)
    text = TextGenerator(cfg)

    #        image_path = raw_input("please input your image path and name:") # get image path
    image_path = str('/home/jwm/Desktop/OCR-standard/images/xuanye.jpg')
    img = data.load_data(image_path)
    t = time.time()
    if cfg.ADJUST_ANGLE:
        img = rotate(img, angle_detector)  # rotate image if necessary


#    img = cv2.resize(img, (2000,3000),interpolation=cv2.INTER_CUBIC)
    text_recs, detected_img, img = detect(img, data, ctpn, sess)  # detect text
    results = recognize(img, text_recs, crnn, text,
                        adjust=True)  # recognize text
    print("It takes time:{}s".format(time.time() - t))
    for key in results:
        print(results[key][1])
예제 #5
0
    def __init__(self,
                 base_model_list=stacking_config.base_model_list,
                 num_cv=stacking_config.num_cv):
        self.base_model_list = base_model_list.split("-")
        self.num_models = len(self.base_model_list)
        self.num_cv = num_cv
        self.dataDir = general_config.data_dir + "/cv/" + str(self.num_cv)
        if not os.path.exists(self.dataDir):
            createCrossValidationData(self.num_cv)

        self.models = []
        self.models_name = []
        for n in range(self.num_models):
            base_model = self.base_model_list[n]
            assert base_model in ["1", "2", "3", "4",
                                  "5"], "Invalid base model type!"
            if base_model == "1":
                model = TextCNN()
            elif base_model == "2":
                model = TextRNN()
            elif base_model == "3":
                model = CRNN()
            elif base_model == "4":
                model = RCNN()
            else:
                model = HAN()
            self.models.append(model)
            self.models_name.append(modelDict[base_model])
        self.logDir = ensure_dir_exist(general_config.log_dir + "/stacking/" +
                                       "-".join(self.models_name) + "/" +
                                       str(self.num_cv))
        self.saveDir = ensure_dir_exist(general_config.save_dir +
                                        "/stacking/" +
                                        "-".join(self.models_name) + "/" +
                                        str(self.num_cv))
        self.classifier = LogisticRegression()
        self.logger = my_logger(self.logDir + "/log.txt")
예제 #6
0
    weak_mask = slice(
        batch_sizes[0])  # Assume weak data is always the first one

    concat_dataset = ConcatDataset(list_dataset)
    sampler = MultiStreamBatchSampler(concat_dataset, batch_sizes=batch_sizes)
    training_loader = DataLoader(concat_dataset,
                                 batch_sampler=sampler,
                                 num_workers=cfg.num_workers)
    valid_synth_loader = DataLoader(valid_synth_data,
                                    batch_size=cfg.batch_size,
                                    num_workers=cfg.num_workers)

    # ##############
    # Model
    # ##############
    crnn = CRNN(**crnn_kwargs)
    pytorch_total_params = sum(p.numel() for p in crnn.parameters()
                               if p.requires_grad)
    logger.info(crnn)
    logger.info(
        "number of parameters in the model: {}".format(pytorch_total_params))
    crnn.apply(weights_init)

    crnn_ema = CRNN(**crnn_kwargs)
    crnn_ema.apply(weights_init)
    for param in crnn_ema.parameters():
        param.detach_()

    optim_kwargs = {"lr": cfg.default_learning_rate, "betas": (0.9, 0.999)}
    optim = torch.optim.Adam(
        filter(lambda p: p.requires_grad, crnn.parameters()), **optim_kwargs)
예제 #7
0
index2char, char2index = utils.get_dict(
    data_cfg['dict_file'])  #获取char-index对应字典

os.environ["CUDA_VISIBLE_DEVICES"] = opt.gpu
torch.backends.cudnn.benchmark = True

nclass = len(index2char)  #类别总数
print('the dice length is {}'.format(len(index2char)))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_loader = dataloader.get_train_loader(data_cfg, char2index)  ##获取数据loader

if model_cfg['method'] == 'CRNN':
    criterion = nn.CTCLoss(zero_infinity=True)
    model = CRNN(nclass, model_cfg).to(device)

elif model_cfg['method'] == 'SAR':
    criterion = torch.nn.CrossEntropyLoss(ignore_index=char2index['PAD'])
    model = SAR(nclass, model_cfg).to(device)

if not model_cfg['load_model_path'] == '':
    model.load_state_dict(torch.load(model_cfg['load_model_path']))

optimizer = optim.Adam(model.parameters(), lr=train_cfg['learning_rate'])


def adjust_learning_rate(optimizer,
                         global_step,
                         init_lr=train_cfg['learning_rate'],
                         decay_rate=train_cfg['decay_rate'],
예제 #8
0
def test_model(state, reduced_number_of_data, strore_predicitions_fname=None):
    crnn_kwargs = state["model"]["kwargs"]
    crnn = CRNN(**crnn_kwargs)
    crnn.load(parameters=state["model"]["state_dict"])
    LOG.info("Model loaded at epoch: {}".format(state["epoch"]))
    pooling_time_ratio = state["pooling_time_ratio"]

    crnn.load(parameters=state["model"]["state_dict"])
    scaler = Scaler()
    scaler.load_state_dict(state["scaler"])
    classes = cfg.classes
    many_hot_encoder = ManyHotEncoder.load_state_dict(
        state["many_hot_encoder"])

    # ##############
    # Validation
    # ##############
    crnn = crnn.eval()
    [crnn] = to_cuda_if_available([crnn])
    transforms_valid = get_transforms(cfg.max_frames, scaler=scaler)

    # # 2018
    # LOG.info("Eval 2018")
    # eval_2018_df = dataset.initialize_and_get_df(cfg.eval2018, reduced_number_of_data)
    # # Strong
    # eval_2018_strong = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df,
    #                               transform=transforms_valid)
    # predictions = get_predictions(crnn, eval_2018_strong, many_hot_encoder.decode_strong)
    # compute_strong_metrics(predictions, eval_2018_df, pooling_time_ratio)
    # # Weak
    # eval_2018_weak = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_weak,
    #                             transform=transforms_valid)
    # weak_metric = get_f_measure_by_class(crnn, len(classes), DataLoader(eval_2018_weak, batch_size=cfg.batch_size))
    # LOG.info("Weak F1-score per class: \n {}".format(pd.DataFrame(weak_metric * 100, many_hot_encoder.labels)))
    # LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))

    # Validation 2019
    # LOG.info("Validation 2019 (original code)")
    # b_dataset = B_DatasetDcase2019Task4(cfg.workspace,
    #                                   base_feature_dir=os.path.join(cfg.workspace, 'dataset', 'features'),
    #                                   save_log_feature=False)
    # b_validation_df = b_dataset.initialize_and_get_df(cfg.validation, reduced_number_of_data)
    # b_validation_df.to_csv('old.csv')
    # b_validation_strong = B_DataLoadDf(b_validation_df,
    #                                  b_dataset.get_feature_file, many_hot_encoder.encode_strong_df,
    #                                  transform=transforms_valid)

    # predictions2 = get_predictions(crnn, b_validation_strong, many_hot_encoder.decode_strong,
    #                               save_predictions=strore_predicitions_fname)
    # compute_strong_metrics(predictions2, b_validation_df, pooling_time_ratio)

    # b_validation_weak = B_DataLoadDf(b_validation_df, b_dataset.get_feature_file, many_hot_encoder.encode_weak,
    #                              transform=transforms_valid)
    # weak_metric = get_f_measure_by_class(crnn, len(classes), DataLoader(b_validation_weak, batch_size=cfg.batch_size))
    # LOG.info("Weak F1-score per class: \n {}".format(pd.DataFrame(weak_metric * 100, many_hot_encoder.labels)))
    # LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))

    # ============================================================================================
    # ============================================================================================
    # ============================================================================================

    dataset = DatasetDcase2019Task4(feature_dir=cfg.feature_dir,
                                    local_path=cfg.workspace,
                                    exp_tag=cfg.exp_tag,
                                    save_log_feature=False)
    # Validation 2019
    LOG.info("Validation 2019")
    validation_df = dataset.initialize_and_get_df(cfg.validation,
                                                  reduced_number_of_data)
    validation_strong = DataLoadDf(validation_df,
                                   dataset.get_feature_file,
                                   many_hot_encoder.encode_strong_df,
                                   transform=transforms_valid)

    predictions = get_predictions(crnn,
                                  validation_strong,
                                  many_hot_encoder.decode_strong,
                                  save_predictions=strore_predicitions_fname)
    vdf = validation_df.copy()
    vdf.filename = vdf.filename.str.replace('.npy', '.wav')
    pdf = predictions.copy()
    pdf.filename = pdf.filename.str.replace('.npy', '.wav')
    compute_strong_metrics(pdf, vdf, pooling_time_ratio)

    validation_weak = DataLoadDf(validation_df,
                                 dataset.get_feature_file,
                                 many_hot_encoder.encode_weak,
                                 transform=transforms_valid)
    weak_metric = get_f_measure_by_class(
        crnn, len(classes),
        DataLoader(validation_weak, batch_size=cfg.batch_size))
    LOG.info("Weak F1-score per class: \n {}".format(
        pd.DataFrame(weak_metric * 100, many_hot_encoder.labels)))
    LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))
    else:
        add_dir_path = "_synthetic_only"

    store_dir = os.path.join("stored_data", "simple_CRNN" + add_dir_path)
    saved_model_dir = os.path.join(store_dir, "model")
    saved_pred_dir = os.path.join(store_dir, "predictions")
    create_folder(store_dir)
    create_folder(saved_model_dir)
    create_folder(saved_pred_dir)

    # ##############
    # Model
    # ##############

    crnn_kwargs = cfg.crnn_kwargs
    crnn = CRNN(**crnn_kwargs)
    crnn.apply(weights_init)
    pooling_time_ratio = cfg.pooling_time_ratio

    LOG.info(crnn)

    # ##############
    # DATA
    # ##############
    dataset = DatasetDcase2019Task4(os.path.join(cfg.workspace),
                                    base_feature_dir=os.path.join(
                                        cfg.workspace, "dataset", "features"),
                                    save_log_feature=False)

    weak_df = dataset.initialize_and_get_df(cfg.weak, reduced_number_of_data)
    synthetic_df = dataset.initialize_and_get_df(cfg.synthetic,
예제 #10
0
    transforms = get_transforms(cfg.max_frames, scaler, augment_type="noise")
    transforms_valid = get_transforms(cfg.max_frames, scaler=scaler)
    for i in range(len(list_dataset)):
        list_dataset[i].set_transform(transforms)
    validation_data.set_transform(transforms_valid)
    test_data.set_transform(transforms_valid)

    concat_dataset = ConcatDataset(list_dataset)
    sampler = MultiStreamBatchSampler(concat_dataset, batch_sizes=batch_sizes)
    training_data = DataLoader(concat_dataset, batch_sampler=sampler)

    # ##############
    # Model
    # ##############
    crnn_kwargs = cfg.crnn_kwargs
    crnn = CRNN(**crnn_kwargs)
    crnn_ema = CRNN(**crnn_kwargs)

    if path.exists(cfg.load_weights_fn):
        model_cfg = torch.load(cfg.load_weights_fn)
        crnn.load(parameters=model_cfg['model']['state_dict'])
        update_ema_variables(crnn, crnn_ema, 0.999, 0)
    else:
        crnn.apply(weights_init)
        crnn_ema.apply(weights_init)
    LOG.info(crnn)

    for param in crnn_ema.parameters():
        param.detach_()

    optim_kwargs = {"lr": 0.001, "betas": (0.9, 0.999)}
예제 #11
0
    transforms_valid = get_transforms(cfg.max_frames, scaler=scaler)
    valid_synth_data = DataLoadDf(valid_synth_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df,
                                  transform=transforms_valid)
    valid_weak_data = DataLoadDf(valid_weak_df, dataset.get_feature_file, many_hot_encoder.encode_weak,
                                  transform=transforms_valid)

    # Eval 2018
    eval_2018_df = dataset.initialize_and_get_df(cfg.eval2018, reduced_number_of_data)
    eval_2018 = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df,
                           transform=transforms_valid)

    # ##############
    # Model
    # ##############
    crnn_kwargs = cfg.crnn_kwargs
    crnn = CRNN(**crnn_kwargs)
    crnn_ema = CRNN(**crnn_kwargs)

    crnn.apply(weights_init)
    crnn_ema.apply(weights_init)
    LOG.info(crnn)

    for param in crnn_ema.parameters():
        param.detach_()

    optim_kwargs = {"lr": 0.001, "betas": (0.9, 0.999)}
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, crnn.parameters()), **optim_kwargs)
    bce_loss = nn.BCELoss()

    state = {
        'model': {"name": crnn.__class__.__name__,