Esempi in Python per Tokenizer.init

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: keras.preprocessing.text

Classe/tipologia: Tokenizer

Metodo/funzione: __init__

Esempi su hotexamples.com: 8

Tokenizer.__init__ in Python: 8 esempi trovati. Questi sono i migliori esempi reali in Python per keras.preprocessing.text.Tokenizer.__init__, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Tokenizer(30)

fit_on_texts(30)

texts_to_sequences(30)

sequences_to_texts(30)

sequences_to_matrix(30)

word_index(30)

fit_on_sequences(18)

num_words(16)

to_json(16)

filters(16)

texts_to_matrix(14)

texts_to_sequences_generator(11)

__init__(8)

text_to_sequences(7)

fit(6)

transform(6)

index_docs(5)

word_counts(4)

index_word(4)

word_docs(4)

get_feature_names(3)

text_to_matrix(3)

tabeled_tokens_to_matrix(1)

add_row(1)

texts_to_sequence(1)

vocabulary_size(1)

textsToSequences(1)

text_to_sequence(1)

encode(1)

create_tokenizer(1)

do_caps(1)

save(1)

extend(1)

process(1)

proc_text(1)

num_word(1)

num_tokens(1)

labelizeTweets(1)

get_index(1)

get_config(1)

fit_transform(1)

document_count(1)

fitToText(1)

py(1)

Esempio n. 1

Mostra file

    def __init__(self, texts, padding, reverse=False, num_words=None):
        #Inheritance of the Tokenizer class from keras
        Tokenizer.__init__(self, num_words=num_words, char_level=False)

        self.fit_on_texts(texts)

        self.index_to_words = dict(
            zip(self.word_index.values(), self.word_index.keys()))
        self.tokens = self.texts_to_sequences(texts)

        if reverse:
            for count, g in enumerate(self.tokens, 0):
                self.tokens[count] = reversing(g)
            truncating = 'pre'

        else:
            truncating = 'post'

        self.num_tokens = [len(g) for g in self.tokens]
        self.max_tokens = 24

        self.tokens_padded = pad_sequences(self.tokens,
                                           maxlen=self.max_tokens,
                                           padding=padding,
                                           truncating=truncating)

Esempio n. 2

Mostra file

    def __init__(self, texts, num_words=None):
        Tokenizer.__init__(self, num_words=num_words)
        self.fit_on_texts(texts)
        self.tokens = self.texts_to_sequences(texts)
        self.tokens_length = [len(x) for x in self.tokens]
        self.max_tokens = np.mean(self.tokens_length) + \
            2 * np.std(self.tokens_length)
        self.max_tokens = int(self.max_tokens)

        self.tokens_padded = pad_sequences(
            self.tokens, maxlen=self.max_tokens, truncating='post')

Esempio n. 3

Mostra file

File: server.py Progetto: thesagarsehgal/chatbot

    def __init__(self, texts, padding,
                 reverse=False, num_words=None):
        """
        :param texts: List of strings. This is the data-set.
        :param padding: Either 'post' or 'pre' padding.
        :param reverse: Boolean whether to reverse token-lists.
        :param num_words: Max number of words to use.
        """

        Tokenizer.__init__(self, num_words=num_words)

        # Create the vocabulary from the texts.
        self.fit_on_texts(texts)

        # Create inverse lookup from integer-tokens to words.
        self.index_to_word = dict(zip(self.word_index.values(),
                                      self.word_index.keys()))

        # Convert all texts to lists of integer-tokens.
        # Note that the sequences may have different lengths.
        self.tokens = self.texts_to_sequences(texts)

        if reverse:
            # Reverse the token-sequences.
            self.tokens = [list(reversed(x)) for x in self.tokens]
        
            # Sequences that are too long should now be truncated
            # at the beginning, which corresponds to the end of
            # the original sequences.
            truncating = 'pre'
        else:
            # Sequences that are too long should be truncated
            # at the end.
            truncating = 'post'

        # The number of integer-tokens in each sequence.
        self.num_tokens = [len(x) for x in self.tokens]

        # Max number of tokens to use in all sequences.
        # We will pad / truncate all sequences to this length.
        # This is a compromise so we save a lot of memory and
        # only have to truncate maybe 5% of all the sequences.
        self.max_tokens = np.mean(self.num_tokens)                           + 2 * np.std(self.num_tokens)
        self.max_tokens = int(self.max_tokens)

        # Pad / truncate all token-sequences to the given length.
        # This creates a 2-dim numpy matrix that is easier to use.
        self.tokens_padded = pad_sequences(self.tokens,
                                           maxlen=self.max_tokens,
                                           padding=padding,
                                           truncating=truncating)

Esempio n. 4

Mostra file

File: predict_caption.py Progetto: astha2098/Machine_eye

    def __init__(self, texts, num_words=None):
        """
        :param texts: List of strings with the data-set.
        :param num_words: Max number of words to use.
        """

        Tokenizer.__init__(self, num_words=num_words)

        # Create the vocabulary from the texts.
        self.fit_on_texts(texts)

        # Create inverse lookup from integer-tokens to words.
        self.index_to_word = dict(
            zip(self.word_index.values(), self.word_index.keys()))

Esempio n. 5

Mostra file

File: utils.py Progetto: qiaoliuhub/seqCrispr

    def __init__(self, nt):

        Tokenizer.__init__(self)
        if nt == 3:
            self.dic = [
                a + b + c for a in 'ATCG' for b in 'ATCG' for c in 'ATCG'
            ]
        elif nt == 2:
            self.dic = [a + b for a in 'ATCG' for b in 'ATCG']
        elif nt == 1:
            self.dic = [a for a in 'ATCG']
        else:
            self.dic = []
        self.fit_on_texts(self.dic)

Esempio n. 6

Mostra file

File: mt.py Progetto: Floki678/Language-translator

    def __init__(self, texts, padding, reverse=False, num_words=None):

        Tokenizer.__init__(self, num_words=num_words)
        self.fit_on_texts(texts)
        self.index_to_word = dict(
            zip(self.word_index.values(), self.word_index.keys()))
        self.tokens = self.texts_to_sequences(texts)
        if reverse:
            self.tokens = [list(reversed(x)) for x in self.tokens]
            truncating = 'pre'
        else:
            truncating = 'post'
        self.num_tokens = [len(x) for x in self.tokens]
        self.max_tokens = np.mean(
            self.num_tokens) + 2 * np.std(self.num_tokens)
        self.max_tokens = int(self.max_tokens)
        self.tokens_padded = pad_sequences(self.tokens,
                                           maxlen=self.max_tokens,
                                           padding=padding,
                                           truncating=truncating)

Esempio n. 7

Mostra file

File: app.py Progetto: abhishekrwt/ImageCaptioningWebApp

 def __init__(self, texts, num_words=None):
     Tokenizer.__init__(self, num_words=num_words)
     self.fit_on_texts(texts)
     self.index_to_word = dict(zip(self.word_index.values(), self.word_index.keys()))

Esempio n. 8

Mostra file

 def __init__(self, **tokenizer_params):
     Tokenizer.__init__(self, **tokenizer_params)

Esempi in Python per Tokenizer.__init__

Esempi in Python per Tokenizer.init