def test(): model_file = "/disc1/ARNet/exp/libri/res18_gru/012.h5" FEATS_DEV_CLEAN = us.kaldiio.load_scp(dev_clean_file) FEATS_DEV_OTHER = us.kaldiio.load_scp(dev_other_file) FEATS_TEST_CLEAN = us.kaldiio.load_scp(test_clean_file) FEATS_TEST_OTHER = us.kaldiio.load_scp(test_other_file) dev_clean_lst = us.scp2key(us.read_lines(dev_clean_file)) dev_other_lst = us.scp2key(us.read_lines(dev_other_file)) test_clean_lst = us.scp2key(us.read_lines(test_clean_file)) test_other_lst = us.scp2key(us.read_lines(test_other_file)) dev_clean_data = us.load_ctc(dev_clean_lst, FEATS_DEV_CLEAN, encoder_len=ENCODER_LEN, max_input_len=MAX_INPUT_LEN, max_label_len=MAX_LABEL_LEN, trans_ids=us.LIBRI_TRANS_IDS) dev_other_data = us.load_ctc(dev_other_lst, FEATS_DEV_OTHER, encoder_len=ENCODER_LEN, max_input_len=MAX_INPUT_LEN, max_label_len=MAX_LABEL_LEN, trans_ids=us.LIBRI_TRANS_IDS) test_clean_data = us.load_ctc(test_clean_lst, FEATS_TEST_CLEAN, encoder_len=ENCODER_LEN, max_input_len=MAX_INPUT_LEN, max_label_len=MAX_LABEL_LEN, trans_ids=us.LIBRI_TRANS_IDS) test_other_data = us.load_ctc(test_other_lst, FEATS_TEST_OTHER, encoder_len=ENCODER_LEN, max_input_len=MAX_INPUT_LEN, max_label_len=MAX_LABEL_LEN, trans_ids=us.LIBRI_TRANS_IDS) model = mdl.model_res_gru_ctc(shapes=(MAX_INPUT_LEN, FEAT_DIM, 1), bpe_classes=BPE_CLASSES, max_label_len=MAX_LABEL_LEN, cnn=CNN, raw_model=model_file) ctc_decode_model = mdl.sub_model(model, 'inputs', 'ctc_pred') dev_clean_pred = mdl.ctc_pred(ctc_decode_model, dev_clean_data[0], input_len=ENCODER_LEN, batch_size=BATCH_SIZE) dev_other_pred = mdl.ctc_pred(ctc_decode_model, dev_other_data[0], input_len=ENCODER_LEN, batch_size=BATCH_SIZE) test_clean_pred = mdl.ctc_pred(ctc_decode_model, test_clean_data[0], input_len=ENCODER_LEN, batch_size=BATCH_SIZE) test_other_pred = mdl.ctc_pred(ctc_decode_model, test_other_data[0], input_len=ENCODER_LEN, batch_size=BATCH_SIZE) print("dev-clean-wer:", us.ctc_eval(dev_clean_data[0]["ctc_labels"],dev_clean_data[0]["ctc_label_len"], dev_clean_pred, True)) print("dev-other-wer:", us.ctc_eval(dev_other_data[0]["ctc_labels"], dev_other_data[0]["ctc_label_len"], dev_other_pred, True)) print("test-clean-wer:", us.ctc_eval(test_clean_data[0]["ctc_labels"], test_clean_data[0]["ctc_label_len"], test_clean_pred, True)) print("test-other-wer:", us.ctc_eval(test_other_data[0]["ctc_labels"], test_other_data[0]["ctc_label_len"], test_other_pred, True))
%(RES_TYPE, ASR_EN, MANY_TO_ONE, METRIC_LOSS, MARGIN,BN_DIM,1 if RAW_MODEL else 0) MODEL_DIR = "exp/aesrc/%s/" % task if not os.path.isdir(MODEL_DIR): os.mkdir(MODEL_DIR) # file train_file = "/disc1/AESRC2020/data/aesrc_fbank/train.scp" dev_file = "/disc1/AESRC2020/data/aesrc_fbank/dev.scp" # feats FEATS = us.kaldiio.load_scp(train_file) FEATS_DEV = us.kaldiio.load_scp(dev_file) # list train_lst = us.scp2key(us.read_lines(train_file)) dev_lst = us.scp2key(us.read_lines(dev_file)) N_BATCHS = len(train_lst) // BATCH_SIZE lr_reducer = ReduceLROnPlateau(factor=0.3, cooldown=0, patience=1, min_lr=1e-5, monitor='val_accent_labels_acc', mode='max', min_delta=0.001, verbose=1) early_stopper = EarlyStopping(patience=3, monitor='val_accent_labels_acc', mode='max', min_delta=0.001, verbose=1) csv_logger = CSVLogger('%s/train.csv' % MODEL_DIR) # generator generator = us.generator_sarnet(train_lst, FEATS, BATCH_SIZE, encoder_len=ENCODER_LEN, max_input_len=MAX_INPUT_LEN, max_label_len=MAX_LABEL_LEN,
CNN = 'res18' RAW_MODEL = '/disc1/ARNet/exp/aesrc/res18_gru2/002.h5' LOG_FILE = '/disc1/ARNet/exp/aesrc/res18_gru2/model.csv' MODEL_DIR = '/disc1/ARNet/exp/aesrc/res18_gru2/' # file train_file = "/disc1/AESRC2020/data/aesrc_fbank_sp/train.scp" dev_file = "/disc1/AESRC2020/data/aesrc_fbank_sp/dev.scp" # feats FEATS = us.kaldiio.load_scp(train_file) FEATS_DEV = us.kaldiio.load_scp(dev_file) # list train_lst = us.limit_time_utts( us.limit_trans_utts(us.scp2key(us.read_lines(train_file)), us.AESRC_TRANS_IDS, MAX_LABEL_LEN), us.AESRC_UTT2FRAMES, MAX_INPUT_LEN) dev_lst = us.scp2key(us.read_lines(dev_file)) dev_lst = random.sample(dev_lst, 1000) N_BATCHS = len(train_lst) // BATCH_SIZE # callbacks lr_reducer = ReduceLROnPlateau(factor=0.5, cooldown=0, patience=0, min_lr=0.5e-6, monitor='accent_labels_acc', mode='max', min_delta=0.5, verbose=1)
7: "<RU>" } BPE_CLASSES = 1000 ACCENT_CLASSES = 8 MAX_INPUT_LEN = 1200 ENCODER_LEN = 114 MAX_LABEL_LEN = 72 FEAT_DIM = 80 EPOCHS = 10 BATCH_SIZE = 32 RAW_MODEL = "/disc1/ARNet/exp/aesrc/007.h5" dev_file = "/disc1/AESRC2020/data/aesrc_fbank_sp/dev.scp" FEATS_DEV = us.kaldiio.load_scp(dev_file) dev_lst = us.scp2key(us.read_lines(dev_file)) # dev_lst = random.sample(dev_lst,100) dev_data = us.load_ctc_accent(dev_lst, FEATS_DEV, encoder_len=ENCODER_LEN, max_input_len=MAX_INPUT_LEN, max_label_len=MAX_LABEL_LEN, trans_ids=us.AESRC_TRANS_IDS, accent_classes=ACCENT_CLASSES, accent_dct=us.AESRC_ACCENT, accent_ids=us.AESRC_ACCENT2INT) print("==== test ====") model = mdl.model_ctc_accent(shapes=(MAX_INPUT_LEN, FEAT_DIM, 1), accent_classes=ACCENT_CLASSES, bpe_classes=BPE_CLASSES,