def __init__(self, texts): Tokenizer.__init__(self, num_words=10000) # Create the vocabulary from the texts. self.fit_on_texts(texts) # Create inverse lookup from integer-tokens to words. self.index_to_word = dict(zip(self.word_index.values(),self.word_index.keys()))
def __init__(self, texts, num_words=None): """ :param texts: List of strings with the data-set. :param num_words: Max number of words to use. """ Tokenizer.__init__(self, num_words=num_words) # Create the vocabulary from the texts. self.fit_on_texts(texts) # Create inverse lookup from integer-tokens to words. self.index_to_word = dict(zip(self.word_index.values(), self.word_index.keys()))
def __init__(self, options): Tokenizer.__init__(self, num_words=options.num_words) self.mark_start = 'ssss ' self.mark_end = ' eeee' self.pad = ' pppp' self.temporal_length = options.temporal_length self.mode_dict = {0: 'validation', 1: 'test', 2: 'train'} # self.caption_dictionary = self.get_caption_dict(options.caption_path) self.caption_dictionary = self.get_full_caption_dict( options.caption_path) self.texts = self.create_tokenizer(self.caption_dictionary) self.fit_on_texts(self.texts) self.index_to_word = dict( zip(self.word_index.values(), self.word_index.keys())) self.word_to_index = dict( zip(self.word_index.keys(), self.word_index.values()))
def __init__(self, text, num_words=None): Tokenizer.__init__(self, num_words=num_words) self.fit_on_texts(text) self.index_to_word = dict( zip(self.word_index.values(), self.word_index.keys()))
def __init__(self, **tokenizer_params): Tokenizer.__init__(self, **tokenizer_params)