def __init__(self, texts):
     
     Tokenizer.__init__(self, num_words=10000)
     
     # Create the vocabulary from the texts.
     self.fit_on_texts(texts)
     
     # Create inverse lookup from integer-tokens to words.
     self.index_to_word = dict(zip(self.word_index.values(),self.word_index.keys()))
Exemple #2
0
    def __init__(self, texts, num_words=None):
        """
        :param texts: List of strings with the data-set.
        :param num_words: Max number of words to use.
        """

        Tokenizer.__init__(self, num_words=num_words)

        # Create the vocabulary from the texts.
        self.fit_on_texts(texts)

        # Create inverse lookup from integer-tokens to words.
        self.index_to_word = dict(zip(self.word_index.values(),
                                      self.word_index.keys()))
Exemple #3
0
    def __init__(self, options):
        Tokenizer.__init__(self, num_words=options.num_words)
        self.mark_start = 'ssss '
        self.mark_end = ' eeee'
        self.pad = ' pppp'
        self.temporal_length = options.temporal_length
        self.mode_dict = {0: 'validation', 1: 'test', 2: 'train'}

        #         self.caption_dictionary = self.get_caption_dict(options.caption_path)
        self.caption_dictionary = self.get_full_caption_dict(
            options.caption_path)
        self.texts = self.create_tokenizer(self.caption_dictionary)
        self.fit_on_texts(self.texts)

        self.index_to_word = dict(
            zip(self.word_index.values(), self.word_index.keys()))
        self.word_to_index = dict(
            zip(self.word_index.keys(), self.word_index.values()))
Exemple #4
0
 def __init__(self, text, num_words=None):
     Tokenizer.__init__(self, num_words=num_words)
     self.fit_on_texts(text)
     self.index_to_word = dict(
         zip(self.word_index.values(), self.word_index.keys()))
Exemple #5
0
 def __init__(self, **tokenizer_params):
     Tokenizer.__init__(self, **tokenizer_params)