コード例 #1
0
        temp_tags = []

    return transcripts, tags






if __name__ == '__main__':
    torch.manual_seed(0)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # test Parliament
    features_test = prepare_data.load_features_combined('../../TSD/augmented_labels/data/normalized/features/test.npy')
    target_test = prepare_data.load_transcripts('../../TSD/augmented_labels/data/normalized/augmented/parliament/test.txt')
    
    # compare againt conventional NER
    #features_test = prepare_data.load_features_combined('../augmented_labels/data/normalized/features/test.npy')
    #target_test = prepare_data.load_transcripts('output/parliament/e2e_asr_combined.txt')
    #tags_test = prepare_data.load_tags('output/parliament/conventional_ner.txt')

    features_test = features_test[:50]
    target_test = target_test[:50]


    print('Loading embeddings...')
    embeddings = fasttext.load_model('weights/embeddings/cc.fi.300.bin')
    print('Done...')

コード例 #2
0
#target_train = prepare_data.load_transcripts('../augmented_labels/data/normalized/transcripts/swedish/train.txt')

#features_dev = prepare_data.load_features_combined('../augmented_labels/data/normalized/features/swedish/dev.npy')
#target_dev = prepare_data.load_transcripts('../augmented_labels/data/normalized/transcripts/swedish/dev.txt')

# LibriSpeech ASR data
#features_train = prepare_data.load_features('../augmented_labels/data/normalized/features/libri/train')
#target_train = prepare_data.load_transcripts('../augmented_labels/data/normalized/transcripts/libri/train.txt')

#features_dev = prepare_data.load_features_combined('../augmented_labels/data/normalized/features/libri/dev.npy')
#target_dev = prepare_data.load_transcripts('../augmented_labels/data/normalized/transcripts/libri/dev.txt')

# LibriSpeech data
features_train = prepare_data.load_features(
    '../augmented_labels/data/normalized/features/libri/train')
target_train = prepare_data.load_transcripts(
    '../augmented_labels/data/normalized/augmented/libri/train.txt')

features_dev = prepare_data.load_features_combined(
    '../augmented_labels/data/normalized/features/libri/dev.npy')
target_dev = prepare_data.load_transcripts(
    '../augmented_labels/data/normalized/augmented/libri/dev.txt')

print('Done...')

print('Loading embeddings...')
#embeddings = fasttext.load_model('weights/embeddings/cc.sv.300.bin')
embeddings = fasttext.load_model(
    'weights/embeddings/crawl-300d-2M-subword.bin')
print('Done...')

# generate index dictionaries
コード例 #3
0
if __name__ == '__main__':
    torch.manual_seed(0)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # test English-Gold
    #features_train = prepare_data.load_features_combined('../../TSD/augmented_labels/data/normalized/features/eng_ood/test.npy')
    #target_train = prepare_data.load_transcripts('../../TSD/augmented_labels/data/normalized/transcripts/eng_ood/test.txt')
    #tags_train = prepare_data.load_tags('../../TSD/augmented_labels/data/normalized/ner/eng_ood/ner_test.txt')

    # test LibriSpeech
    features_test = prepare_data.load_features_combined(
        '../../TSD/augmented_labels/data/normalized/features/libri/test_clean.npy'
    )
    target_test = prepare_data.load_transcripts(
        '../../TSD/augmented_labels/data/normalized/augmented/libri/test_clean.txt'
    )

    features_test = features_test[:50]
    target_test = target_test[:50]

    print('Loading embeddings...')
    embeddings = fasttext.load_model(
        'weights/embeddings/crawl-300d-2M-subword.bin')
    print('Done...')

    tag2idx = {'O': 1, 'PER': 2, 'LOC': 3, 'ORG': 4}
    idx2tag = {1: 'O', 2: 'PER', 3: 'LOC', 4: 'ORG'}

    with open('weights/char2idx_libri.pkl', 'rb') as f:
        char2idx = pickle.load(f)
コード例 #4
0
print(device)

# load features and labels
print('Loading data..')

# Parliament data ASR
#features_train = prepare_data.load_features('data/normalized/features/train')
#target_train = prepare_data.load_transcripts('data/normalized/transcripts/train.txt')

#features_dev = prepare_data.load_features_combined('data/normalized/features/dev.npy')
#target_dev = prepare_data.load_transcripts('data/normalized/transcripts/dev.txt')

# Parlaiament data augmented
features_train = prepare_data.load_features('data/normalized/features/train')
target_train = prepare_data.load_transcripts(
    'data/normalized/augmented/parliament/train.txt')

features_dev = prepare_data.load_features_combined(
    'data/normalized/features/dev.npy')
target_dev = prepare_data.load_transcripts(
    'data/normalized/augmented/parliament/dev.txt')

print('Done...')

print('Loading embeddings...')
embeddings = fasttext.load_model('weights/embeddings/cc.fi.300.bin')
print('Done...')

with open('weights/char2idx_augmented.pkl', 'rb') as f:
    char2idx = pickle.load(f)
with open('weights/idx2char_augmented.pkl', 'rb') as f: