def test_utt2spk(self, prep_org_data): corp = bkw.create_corpus(tgt_dir=tgt_dir, speakers=["Mark Djandiomerr"]) assert len(corp.speakers) == 1 assert len(corp.get_train_fns()) < NUM_UTTERS / 2 corp = bkw.create_corpus(tgt_dir=tgt_dir) assert len(corp.speakers) == NUM_SPEAKERS assert len(corp.get_train_fns()) == NUM_UTTERS
def test_deterministic_2(self, prep_org_data): corp_1 = bkw.create_corpus(tgt_dir=tgt_dir) # Remove the prefix files. os.remove(str(corp_1.train_prefix_fn)) os.remove(str(corp_1.valid_prefix_fn)) os.remove(str(corp_1.test_prefix_fn)) corp_2 = bkw.create_corpus(tgt_dir=tgt_dir) assert corp_1.utterances != None assert corp_1.utterances == corp_2.utterances assert len(corp_1.utterances) == NUM_UTTERS assert set(corp_1.get_train_fns()[0]) == set(corp_2.get_train_fns()[0]) assert set(corp_1.get_valid_fns()[0]) == set(corp_2.get_valid_fns()[0]) assert set(corp_1.get_test_fns()[0]) == set(corp_2.get_test_fns()[0])
def train_bkw(num_layers: int) -> None: exp_dir = prep_exp_dir(directory=config.TEST_EXP_PATH) corp = bkw.create_corpus(tgt_dir=Path(config.TEST_DATA_PATH) / "bkw") cr = CorpusReader(corp) model = rnn_ctc.Model(exp_dir, cr, num_layers=num_layers, hidden_size=250) model.train(min_epochs=40)
def preprocessed_corpus(prep_org_data): """ Ensures corpus preprocessing happens before any of the tests run that rely on it""" return bkw.create_corpus(tgt_dir=tgt_dir)