예제 #1
0
                    '_' + lang + '.pkl', 'wb'))
    pickle.dump(kmeans, open(str(num_phones) + '_' + lang + \
                    '/kmeans_' + str(num_phones) + \
                    '_' + lang + '.pkl', 'wb'))


all_features = {}
# get bottleneck features of all .wav files (stimuli)
for root, dirs, files in os.walk(WAV_FOLDER):
    for wav_file in files:
        if wav_file.endswith(".wav"):
            audio = Audio.load(root + wav_file)
            all_features[wav_file] = audio

processor = BottleneckProcessor(weights='BabelMulti')
corpus_features = processor.process_all(all_features)

open_feats = []
for key in corpus_features:
    # access every features object
    feats = corpus_features[key].data
    # put them all together
    open_feats.append(feats)

unlisted_feats = np.asarray(open_feats)
#flattened_feats = np.concatenate(all_corpus_features, axis=0)
flattened_feats = np.concatenate(unlisted_feats)
# idx = (3,6,9)
# flattened_feats = np.insert(flattened_feats, idx, 0, axis=1)
print flattened_feats.shape
for NUM_PHONES in list(NUM_PHONES_LIST):
        # clustered = kmeans_corpus.fit_predict(stim_pqcode)
        np.save(to_folder + utterance_n, softmax_stim)
        #dist_df = pd.DataFrame(softmax_stim)
        #dist_df.to_csv(to_folder + utterance_n + '.csv')


all_features = {}
# get bottleneck features of all .wav files (stimuli)
for root, dirs, files in os.walk(WAV_FOLDER):
    for wav_file in files:
        if wav_file.endswith(".wav"):
            audio = Audio.load(root + wav_file)
            all_features[wav_file] = audio

processor = BottleneckProcessor(weights='BabelMulti')
stim_features = processor.process_all(all_features)

# dict_feats = {}
# for key in stim_features:
#     # access every features object
#     feats = stim_features[key].data
#     # put them all together
#     dict_feats[key] = feats

for root_p, dirs_p, files_ in pickles_folder:
    for dir_p in dirs_p:
        if dir_p == 'softmax_dist':
            pass
        else:
            PATH = root_p + dir_p
            NUM = dir_p.split('_')[0]