Exemple #1
0
def test():

    model_file = "/disc1/ARNet/exp/libri/res18_gru/012.h5"

    FEATS_DEV_CLEAN = us.kaldiio.load_scp(dev_clean_file)
    FEATS_DEV_OTHER = us.kaldiio.load_scp(dev_other_file)
    FEATS_TEST_CLEAN = us.kaldiio.load_scp(test_clean_file)
    FEATS_TEST_OTHER = us.kaldiio.load_scp(test_other_file)

    dev_clean_lst = us.scp2key(us.read_lines(dev_clean_file))
    dev_other_lst = us.scp2key(us.read_lines(dev_other_file))
    test_clean_lst = us.scp2key(us.read_lines(test_clean_file))
    test_other_lst = us.scp2key(us.read_lines(test_other_file))

    dev_clean_data = us.load_ctc(dev_clean_lst, FEATS_DEV_CLEAN,
                           encoder_len=ENCODER_LEN,
                           max_input_len=MAX_INPUT_LEN,
                           max_label_len=MAX_LABEL_LEN,
                           trans_ids=us.LIBRI_TRANS_IDS)
    dev_other_data = us.load_ctc(dev_other_lst, FEATS_DEV_OTHER,
                                 encoder_len=ENCODER_LEN,
                                 max_input_len=MAX_INPUT_LEN,
                                 max_label_len=MAX_LABEL_LEN,
                                 trans_ids=us.LIBRI_TRANS_IDS)
    test_clean_data = us.load_ctc(test_clean_lst, FEATS_TEST_CLEAN,
                                 encoder_len=ENCODER_LEN,
                                 max_input_len=MAX_INPUT_LEN,
                                 max_label_len=MAX_LABEL_LEN,
                                 trans_ids=us.LIBRI_TRANS_IDS)
    test_other_data = us.load_ctc(test_other_lst, FEATS_TEST_OTHER,
                                 encoder_len=ENCODER_LEN,
                                 max_input_len=MAX_INPUT_LEN,
                                 max_label_len=MAX_LABEL_LEN,
                                 trans_ids=us.LIBRI_TRANS_IDS)

    model = mdl.model_res_gru_ctc(shapes=(MAX_INPUT_LEN, FEAT_DIM, 1),
                                         bpe_classes=BPE_CLASSES,
                                         max_label_len=MAX_LABEL_LEN,
                                         cnn=CNN,
                                         raw_model=model_file)

    ctc_decode_model = mdl.sub_model(model, 'inputs', 'ctc_pred')
    dev_clean_pred = mdl.ctc_pred(ctc_decode_model, dev_clean_data[0], input_len=ENCODER_LEN, batch_size=BATCH_SIZE)
    dev_other_pred = mdl.ctc_pred(ctc_decode_model, dev_other_data[0], input_len=ENCODER_LEN, batch_size=BATCH_SIZE)
    test_clean_pred = mdl.ctc_pred(ctc_decode_model, test_clean_data[0], input_len=ENCODER_LEN, batch_size=BATCH_SIZE)
    test_other_pred = mdl.ctc_pred(ctc_decode_model, test_other_data[0], input_len=ENCODER_LEN, batch_size=BATCH_SIZE)

    print("dev-clean-wer:",
          us.ctc_eval(dev_clean_data[0]["ctc_labels"],dev_clean_data[0]["ctc_label_len"], dev_clean_pred, True))
    print("dev-other-wer:",
          us.ctc_eval(dev_other_data[0]["ctc_labels"], dev_other_data[0]["ctc_label_len"], dev_other_pred, True))
    print("test-clean-wer:",
          us.ctc_eval(test_clean_data[0]["ctc_labels"], test_clean_data[0]["ctc_label_len"], test_clean_pred, True))
    print("test-other-wer:",
          us.ctc_eval(test_other_data[0]["ctc_labels"], test_other_data[0]["ctc_label_len"], test_other_pred, True))
Exemple #2
0
           %(RES_TYPE, ASR_EN, MANY_TO_ONE, METRIC_LOSS, MARGIN,BN_DIM,1 if RAW_MODEL else 0)
    MODEL_DIR = "exp/aesrc/%s/" % task
    if not os.path.isdir(MODEL_DIR): os.mkdir(MODEL_DIR)



    # file
    train_file = "/disc1/AESRC2020/data/aesrc_fbank/train.scp"
    dev_file = "/disc1/AESRC2020/data/aesrc_fbank/dev.scp"

    # feats
    FEATS = us.kaldiio.load_scp(train_file)
    FEATS_DEV = us.kaldiio.load_scp(dev_file)

    # list
    train_lst = us.scp2key(us.read_lines(train_file))
    dev_lst = us.scp2key(us.read_lines(dev_file))
    N_BATCHS = len(train_lst) // BATCH_SIZE

    lr_reducer = ReduceLROnPlateau(factor=0.3, cooldown=0, patience=1, min_lr=1e-5,
                                   monitor='val_accent_labels_acc', mode='max', min_delta=0.001, verbose=1)
    early_stopper = EarlyStopping(patience=3,
                                  monitor='val_accent_labels_acc', mode='max', min_delta=0.001, verbose=1)
    csv_logger = CSVLogger('%s/train.csv' % MODEL_DIR)


    # generator
    generator = us.generator_sarnet(train_lst, FEATS, BATCH_SIZE,
                                    encoder_len=ENCODER_LEN,
                                    max_input_len=MAX_INPUT_LEN,
                                    max_label_len=MAX_LABEL_LEN,
Exemple #3
0
    CNN = 'res18'
    RAW_MODEL = '/disc1/ARNet/exp/aesrc/res18_gru2/002.h5'
    LOG_FILE = '/disc1/ARNet/exp/aesrc/res18_gru2/model.csv'
    MODEL_DIR = '/disc1/ARNet/exp/aesrc/res18_gru2/'

    # file
    train_file = "/disc1/AESRC2020/data/aesrc_fbank_sp/train.scp"
    dev_file = "/disc1/AESRC2020/data/aesrc_fbank_sp/dev.scp"

    # feats
    FEATS = us.kaldiio.load_scp(train_file)
    FEATS_DEV = us.kaldiio.load_scp(dev_file)

    # list
    train_lst = us.limit_time_utts(
        us.limit_trans_utts(us.scp2key(us.read_lines(train_file)),
                            us.AESRC_TRANS_IDS, MAX_LABEL_LEN),
        us.AESRC_UTT2FRAMES, MAX_INPUT_LEN)
    dev_lst = us.scp2key(us.read_lines(dev_file))
    dev_lst = random.sample(dev_lst, 1000)
    N_BATCHS = len(train_lst) // BATCH_SIZE

    # callbacks
    lr_reducer = ReduceLROnPlateau(factor=0.5,
                                   cooldown=0,
                                   patience=0,
                                   min_lr=0.5e-6,
                                   monitor='accent_labels_acc',
                                   mode='max',
                                   min_delta=0.5,
                                   verbose=1)
Exemple #4
0
    7: "<RU>"
}

BPE_CLASSES = 1000
ACCENT_CLASSES = 8
MAX_INPUT_LEN = 1200
ENCODER_LEN = 114
MAX_LABEL_LEN = 72
FEAT_DIM = 80
EPOCHS = 10
BATCH_SIZE = 32
RAW_MODEL = "/disc1/ARNet/exp/aesrc/007.h5"

dev_file = "/disc1/AESRC2020/data/aesrc_fbank_sp/dev.scp"
FEATS_DEV = us.kaldiio.load_scp(dev_file)
dev_lst = us.scp2key(us.read_lines(dev_file))
# dev_lst = random.sample(dev_lst,100)
dev_data = us.load_ctc_accent(dev_lst,
                              FEATS_DEV,
                              encoder_len=ENCODER_LEN,
                              max_input_len=MAX_INPUT_LEN,
                              max_label_len=MAX_LABEL_LEN,
                              trans_ids=us.AESRC_TRANS_IDS,
                              accent_classes=ACCENT_CLASSES,
                              accent_dct=us.AESRC_ACCENT,
                              accent_ids=us.AESRC_ACCENT2INT)

print("==== test ====")
model = mdl.model_ctc_accent(shapes=(MAX_INPUT_LEN, FEAT_DIM, 1),
                             accent_classes=ACCENT_CLASSES,
                             bpe_classes=BPE_CLASSES,