Python Tokenizer.filter_tokens Examples

Programming Language: Python

Namespace/Package Name: tokenizer

Class/Type: Tokenizer

Method/Function: filter_tokens

Examples at hotexamples.com: 4

Python Tokenizer.filter_tokens - 4 examples found. These are the top rated real world Python examples of tokenizer.Tokenizer.filter_tokens extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Tokenizer(30)

encode(11)

decode(7)

get_next_token(7)

all_tokens(7)

_pos(5)

advance(5)

filter_tokens(4)

fit(4)

batch_encode(4)

discovery_dir(4)

close(3)

curr_token(3)

eat(3)

LoadStrategy(3)

getTokens(3)

__init__(3)

fit_on_texts(3)

from_pretrained(3)

build_vocab(3)

fit_in_parallel(2)

get_baseforms(2)

en_vocab_create(2)

clean_text(2)

process_review(2)

gen_n_grams(2)

getNextToken(2)

tokenized_url(2)

add(2)

getSentences(1)

get_inlined_exception_name(1)

get_chunks(1)

get_blocks(1)

_Tokenizer__next_char(1)

_Tokenizer__unread_char(1)

getToken(1)

getTTL(1)

changeId(1)

get_n_gram_count(1)

getLocations(1)

getLastToken(1)

getJson(1)

getFinal(1)

gentokenize(1)

genclasstokenize(1)

add_consumer(1)

get_inlined_right_value(1)

Tokenize(1)

add_format(1)

print_all(1)

Example #1

Show file

File: test_tokenizer.py Project: sean-tu/NAPS

 def test_filter_tokens(self):
     t = Tokenizer()
     tokens = t.tokenize(string1)
     filtered = t.filter_tokens(tokens)
     self.assertEquals(filtered, [
         'microscopy', 'use', 'microscopes', 'see', 'micro', 'sized',
         'objects'
     ])

Example #2

Show file

File: main.py Project: chakshuahuja/CS839

    def getData():
        TRAIN_DATA = []
        TRAIN_POS, TRAIN_NEG = 0, 0

        TEST_DATA = []
        TEST_POS, TEST_NEG = 0, 0

        for i in range(1, TOTAL_FILE_COUNT + 1):
            fname = get_file_name(i)
            F = Tokenizer("labelled/" + fname + ".txt")
            F.tokenize()
            F.filter_tokens()

            # F.print_tokens()

            d, p, n = F.vectorize()
            for v in d:
                if int(v['fid']) in train_files:
                    TRAIN_DATA.append(v)
                else:
                    TEST_DATA.append(v)

            if int(v['fid']) in train_files:
                TRAIN_POS += p
                TRAIN_NEG += n
            else:
                TEST_POS += p
                TEST_NEG += n

        print('Generating Train Data tokens...')
        print("Token generation completed.")
        print('{0: <10} {1: <10} {2: <10}'.format("Total", "Positive",
                                                  "Negative"))
        print('{0: <10} {1: <10} {2: <10}'.format(len(TRAIN_DATA), TRAIN_POS,
                                                  TRAIN_NEG))

        print('Generating Test Data tokens...')
        print("Token generation completed.")
        print('{0: <10} {1: <10} {2: <10}'.format("Total", "Positive",
                                                  "Negative"))
        print('{0: <10} {1: <10} {2: <10}'.format(len(TEST_DATA), TEST_POS,
                                                  TEST_NEG))

        return TRAIN_DATA, TEST_DATA

Example #3

Show file

File: main.py Project: chakshuahuja/CS839

    def getData(startIndex, endIndex):
        all_data = []
        all_pos = 0
        all_neg = 0
        for i in range(startIndex, endIndex + 1):
            fname = get_file_name(i)
            F = Tokenizer("labelled/" + fname + ".txt")
            F.tokenize()
            F.filter_tokens()

            # F.print_tokens()

            d, p, n = F.vectorize()
            [all_data.append(v) for v in d]
            all_pos += p
            all_neg += n

        print("Token generation completed.")
        print('{0: <10} {1: <10} {2: <10}'.format("Total", "Positive",
                                                  "Negative"))
        print('{0: <10} {1: <10} {2: <10}'.format(len(all_data), all_pos,
                                                  all_neg))

        return all_data

Example #4

Show file

File: test_tokenizer.py Project: sean-tu/NAPS

 def test_tokenizer_long(self):
     t = Tokenizer()
     tokens = t.tokenize(str)
     filtered = t.filter_tokens(tokens)