sentences, sentence_length = pad_sentences( conversations, max_sentence_length=max_sent_len, max_conversation_length=max_conv_len) print('Saving preprocessed data at', split_data_dir) to_pickle(conversation_length, split_data_dir.joinpath('conversation_length.pkl')) to_pickle(sentences, split_data_dir.joinpath('sentences.pkl')) to_pickle(sentence_length, split_data_dir.joinpath('sentence_length.pkl')) to_pickle(emotions, split_data_dir.joinpath('labels.pkl')) if split_type == 'train': print('Save Vocabulary...') vocab = Vocab(tokenizer) vocab.add_dataframe(conversations) assert(GLOVE_DIR != "") vocab.update(GLOVE_DIR, max_size=max_vocab_size, min_freq=min_freq) print('Vocabulary size: ', len(vocab)) vocab.pickle(dailydialog_dir.joinpath('word2id.pkl'), dailydialog_dir.joinpath('id2word.pkl'), dailydialog_dir.joinpath('word_emb.pkl')) print('Done!')
sentences, sentence_length = pad_sentences( conv_sentences, max_sentence_length=max_sent_len, max_conversation_length=max_conv_len) for sentence_len, label in zip(conversation_length, conv_labels): assert(sentence_len ==len(label)) print('Saving preprocessed data at', split_data_dir) to_pickle(conversation_length, split_data_dir.joinpath( 'conversation_length.pkl')) to_pickle(sentences, split_data_dir.joinpath('sentences.pkl')) to_pickle(conv_labels, split_data_dir.joinpath('labels.pkl')) to_pickle(sentence_length, split_data_dir.joinpath( 'sentence_length.pkl')) to_pickle(iemocap.vids[split_type], split_data_dir.joinpath('video_id.pkl')) if split_type == 'train': print('Save Vocabulary...') vocab = Vocab(tokenizer) vocab.add_dataframe(conv_sentences) assert(GLOVE_DIR != "") vocab.update(GLOVE_DIR, max_size=max_vocab_size, min_freq=min_freq) print('Vocabulary size: ', len(vocab)) vocab.pickle(iemocap_dir.joinpath('word2id.pkl'), iemocap_dir.joinpath('id2word.pkl'), iemocap_dir.joinpath('word_emb.pkl'))