def create_batches(self, contexts): batch_data = [] label_data = [] for target, t_context in contexts: target_index = Vocabulary.getIndex(target) context_index = Vocabulary.getIndex(t_context) if target_index is not None and context_index is not None: batch_data.append(target_index) label_data.append(context_index) return batch_data, label_data
def createDataFrameOutput(self): vector_column = [] embedding_lookup_table = self.embeddings.eval() for text in self.texts: word_tokens = text.tokens vector_tokens = np.zeros([self.embedding_size]) for word in word_tokens: word_index = Vocabulary.getIndex(word) if word_index is not None: vector_tokens += embedding_lookup_table[word_index, :] vector_column.append(vector_tokens) pd_column = pd.DataFrame({'VECTOR': vector_column}) self.dataframe['VECTOR'] = pd_column['VECTOR'] return self.dataframe