temp_tags = [] return transcripts, tags if __name__ == '__main__': torch.manual_seed(0) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # test Parliament features_test = prepare_data.load_features_combined('../../TSD/augmented_labels/data/normalized/features/test.npy') target_test = prepare_data.load_transcripts('../../TSD/augmented_labels/data/normalized/augmented/parliament/test.txt') # compare againt conventional NER #features_test = prepare_data.load_features_combined('../augmented_labels/data/normalized/features/test.npy') #target_test = prepare_data.load_transcripts('output/parliament/e2e_asr_combined.txt') #tags_test = prepare_data.load_tags('output/parliament/conventional_ner.txt') features_test = features_test[:50] target_test = target_test[:50] print('Loading embeddings...') embeddings = fasttext.load_model('weights/embeddings/cc.fi.300.bin') print('Done...')
#target_train = prepare_data.load_transcripts('../augmented_labels/data/normalized/transcripts/swedish/train.txt') #features_dev = prepare_data.load_features_combined('../augmented_labels/data/normalized/features/swedish/dev.npy') #target_dev = prepare_data.load_transcripts('../augmented_labels/data/normalized/transcripts/swedish/dev.txt') # LibriSpeech ASR data #features_train = prepare_data.load_features('../augmented_labels/data/normalized/features/libri/train') #target_train = prepare_data.load_transcripts('../augmented_labels/data/normalized/transcripts/libri/train.txt') #features_dev = prepare_data.load_features_combined('../augmented_labels/data/normalized/features/libri/dev.npy') #target_dev = prepare_data.load_transcripts('../augmented_labels/data/normalized/transcripts/libri/dev.txt') # LibriSpeech data features_train = prepare_data.load_features( '../augmented_labels/data/normalized/features/libri/train') target_train = prepare_data.load_transcripts( '../augmented_labels/data/normalized/augmented/libri/train.txt') features_dev = prepare_data.load_features_combined( '../augmented_labels/data/normalized/features/libri/dev.npy') target_dev = prepare_data.load_transcripts( '../augmented_labels/data/normalized/augmented/libri/dev.txt') print('Done...') print('Loading embeddings...') #embeddings = fasttext.load_model('weights/embeddings/cc.sv.300.bin') embeddings = fasttext.load_model( 'weights/embeddings/crawl-300d-2M-subword.bin') print('Done...') # generate index dictionaries
if __name__ == '__main__': torch.manual_seed(0) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # test English-Gold #features_train = prepare_data.load_features_combined('../../TSD/augmented_labels/data/normalized/features/eng_ood/test.npy') #target_train = prepare_data.load_transcripts('../../TSD/augmented_labels/data/normalized/transcripts/eng_ood/test.txt') #tags_train = prepare_data.load_tags('../../TSD/augmented_labels/data/normalized/ner/eng_ood/ner_test.txt') # test LibriSpeech features_test = prepare_data.load_features_combined( '../../TSD/augmented_labels/data/normalized/features/libri/test_clean.npy' ) target_test = prepare_data.load_transcripts( '../../TSD/augmented_labels/data/normalized/augmented/libri/test_clean.txt' ) features_test = features_test[:50] target_test = target_test[:50] print('Loading embeddings...') embeddings = fasttext.load_model( 'weights/embeddings/crawl-300d-2M-subword.bin') print('Done...') tag2idx = {'O': 1, 'PER': 2, 'LOC': 3, 'ORG': 4} idx2tag = {1: 'O', 2: 'PER', 3: 'LOC', 4: 'ORG'} with open('weights/char2idx_libri.pkl', 'rb') as f: char2idx = pickle.load(f)
print(device) # load features and labels print('Loading data..') # Parliament data ASR #features_train = prepare_data.load_features('data/normalized/features/train') #target_train = prepare_data.load_transcripts('data/normalized/transcripts/train.txt') #features_dev = prepare_data.load_features_combined('data/normalized/features/dev.npy') #target_dev = prepare_data.load_transcripts('data/normalized/transcripts/dev.txt') # Parlaiament data augmented features_train = prepare_data.load_features('data/normalized/features/train') target_train = prepare_data.load_transcripts( 'data/normalized/augmented/parliament/train.txt') features_dev = prepare_data.load_features_combined( 'data/normalized/features/dev.npy') target_dev = prepare_data.load_transcripts( 'data/normalized/augmented/parliament/dev.txt') print('Done...') print('Loading embeddings...') embeddings = fasttext.load_model('weights/embeddings/cc.fi.300.bin') print('Done...') with open('weights/char2idx_augmented.pkl', 'rb') as f: char2idx = pickle.load(f) with open('weights/idx2char_augmented.pkl', 'rb') as f: