Ejemplos de tokenize en Python

Lenguaje de programación: Python

Namespace/Package Name: naive_bayes

Método / Función: tokenize

Ejemplos en hotexamples.com: 10

Python tokenize - 10 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de naive_bayes.tokenize extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

def run():

    # Dataset generation
    cls = ['Obama', 'Trump']
    obama_sentences = inaugural.sents('2009-Obama.txt')
    trump_sentences = inaugural.sents('2017-Trump.txt')
    labelled_obama = [(s, cls[0]) for s in obama_sentences]
    labelled_trump = [(s, cls[1]) for s in trump_sentences]
    labelled_data = labelled_obama + labelled_trump

    trump_test = [
        'We', ',', 'the', 'citizens', 'of', 'America', ',', 'are', 'now',
        'joined', 'in', 'a', 'great', 'national', 'effort', 'to', 'rebuild',
        'our', 'country', 'and', 'restore', 'its', 'promise', 'for', 'all',
        'of', 'our', 'people', '.'
    ]
    obama_test = [
        'I', 'stand', 'here', 'today', 'humbled', 'by', 'the', 'task',
        'before', 'us', ',', 'grateful', 'for', 'the', 'trust', 'you', 'have',
        'bestowed', ',', 'mindful', 'of', 'the', 'sacrifices', 'borne', 'by',
        'our', 'ancestors', '.'
    ]

    model = Model(labelled_data, cls)
    model.train()

    while True:
        inp = input("Input a string to test: ")
        doc = tokenize(inp)
        print(model.test_doc(doc))

Ejemplo n.º 2

Mostrar archivo

def words_per_user_mapper(status_update):
    user = status_update["username"]
    for word in tokenize(status_update["text"]):
        yield (user, (word, 1))

Ejemplo n.º 3

Mostrar archivo

def word_count_old(documents):
    """word count not using MapReduce"""
    return Counter(word
                   for document in documents
                   for word in tokenize(document))

Ejemplo n.º 4

Mostrar archivo

def wc_mapper(document):
    """for each word in the document, emit (word,1)"""
    for word in tokenize(document):
        yield (word, 1)

Ejemplo n.º 5

Mostrar archivo

Archivo: mapreduce.py Proyecto: samf14/data_science

def wc_mapper(document):
	'''for each word in the doc, emit (word, 1)'''
	for word in naive_bayes.tokenize(document):
		yield (word, 1)

Ejemplo n.º 6

Mostrar archivo

Archivo: mapreduce.py Proyecto: 1800Blarbo/data-science-from-scratch

def words_per_user_mapper(status_update):
    user = status_update["username"]
    for word in tokenize(status_update["text"]):
        yield (user, (word, 1))

Ejemplo n.º 7

Mostrar archivo

Archivo: mapreduce.py Proyecto: 1800Blarbo/data-science-from-scratch

def word_count_old(documents):
    """word count not using MapReduce"""
    return Counter(word 
        for document in documents 
        for word in tokenize(document))

Ejemplo n.º 8

Mostrar archivo

Archivo: mapreduce.py Proyecto: 1800Blarbo/data-science-from-scratch

def wc_mapper(document):
    """for each word in the document, emit (word,1)"""        
    for word in tokenize(document):
        yield (word, 1)

Ejemplo n.º 9

Mostrar archivo

Archivo: test_naive_bayes.py Proyecto: manurFR/machine-learning-fun

 def test_tokenize(self):
     self.assertEqual({'hello', 'it\'s', 'time', 'for', '1000s', 'of'},
                      naive_bayes.tokenize("Hello it's time for 1000s of HELLO"))

Ejemplo n.º 10

Mostrar archivo

Archivo: word_count_mapping.py Proyecto: rileyL6122428/data-science-from-scratch-notes

def word_count_mapper(document):
    for word in tokenize(document):
        yield (word, 1)