Beispiel #1
0
__author__ = "Thomas Pellegrini - 2020"

data_dir = '../clotho-dataset/data'

LETTER_LIST = pickle.load(open(data_dir + "/characters_list.p", "rb"))
LETTER_FREQ = pickle.load(open(data_dir + "/characters_frequencies.p", "rb"))
# ['<pad>', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', \
#                'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '-', "'", '.', '_', '+', ' ','<sos>','<eos>']
WORD_LIST = pickle.load(open(data_dir + "/words_list.p", "rb"))# 4367 word types
WORD_FREQ = pickle.load(open(data_dir + "/words_frequencies.p", "rb"))

# WORD_COUNT_THRESHOLD = 10
WORD_COUNT_THRESHOLD = None
print("\n !!! WORD_COUNT_THRESHOLD = ", WORD_COUNT_THRESHOLD, " !!!\n")

letter2index, index2letter = create_dictionaries(LETTER_LIST)
word2index, index2word = create_dictionaries(WORD_LIST)

if WORD_COUNT_THRESHOLD is not None:
    print("WORD_COUNT_THRESHOLD =", WORD_COUNT_THRESHOLD)
    word2index, index2word, WORD_LIST, mapping_index_dict = modify_vocab(WORD_LIST, WORD_FREQ, WORD_COUNT_THRESHOLD)
else:
    mapping_index_dict = None
print("Vocab:", len(WORD_LIST) )


# fpath1='checkpoints/seq2seq/clotho/best_model/4367_red_2_2__128_64_0.98_False_False_0.0005_1e-06/val_predicted_captions_greedy.csv'
fpath1='checkpoints/seq2seq/clotho/best_model/4367_red_2_2__128_64_0.98_False_False_0.0005_1e-06/val_predicted_captions_beamsearch_lm_0.50_2g.csv'
fpath2='checkpoints/seq2seq/clotho/best_model/4367_red_2_2__128_64_0.98_False_False_0.0005_1e-06/val_predicted_captions_beamsearch_nolm_bs25_alpha_12.csv'

wav_id_list, captions_dict_pred1 = read_csv_prediction_file(fpath1, add_sos_eos=False)
Beispiel #2
0
# host_name = socket.gethostname()
# print(host_name)

from pynlpl.lm import lm

from clotho_dataloader.data_handling.my_clotho_data_loader import create_dictionaries

import glob

__author__ = "Thomas Pellegrini - 2020"

data_dir='../clotho-dataset/data/'
WORD_LIST = pickle.load(open(data_dir + "/words_list.p", "rb"))# 4367 word types
WORD_FREQ = pickle.load(open(data_dir + "/words_frequencies.p", "rb"))

word2index, index2word = create_dictionaries(WORD_LIST)

def gather_captions_to_text(caption_dir, out_fpath):

    fh = open(out_fpath, 'wt')
    i = 0
    for npy_fpath in glob.glob(caption_dir + '/*.npy'):

        recarray = np_load(str(npy_fpath), allow_pickle=True)
        word_indices_list = recarray['words_ind'][0]
        # print(word_indices_list)
        word_str_list = [index2word[w] for w in word_indices_list]
        # word_str = ' '.join(word_str_list).replace('<sos> ', '')
        word_str = ' '.join(word_str_list)
        # print(npy_fpath, word_str)
        fh.write(word_str + '\n')