Python tokenize Examples

Programming Language: Python

Namespace/Package Name: model

Method/Function: tokenize

Examples at hotexamples.com: 11

Python tokenize - 11 examples found. These are the top rated real world Python examples of model.tokenize extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: app.py Project: sim-my/intelligent-bot

def sendData():
    if (request.form.get('input-url')):
        value = request.form.get('input-url')
        session['sentence_list'] = tokenize('url', value)
    elif (request.form.get('input-text')):
        value = request.form.get('input-text')
        session['sentence_list'] = tokenize('text', value)
    return render_template('ask.html', sentence_list=session['sentence_list'])

Example #2

Show file

File: updater.py Project: chatch/pinyin-toolkit

 def updatefactalways(self, fact, reading):
     # We better still give it a miss if the update will fail
     if 'reading' not in fact:
         return
 
     # Identify probable pinyin in the user's freeform input, reformat them according to the
     # current rules, and pop the result back into the field
     fact['reading'] = preparetokens(self.config, [model.Word(*model.tokenize(reading))])

Example #3

Show file

File: updater.py Project: idavydov/pinyin-toolkit

    def updatefactalways(self, fact, reading):
        # We better still give it a miss if the update will fail
        if 'reading' not in fact:
            return

        # Identify probable pinyin in the user's freeform input, reformat them according to the
        # current rules, and pop the result back into the field
        fact['reading'] = preparetokens(self.config,
                                        [model.Word(*model.tokenize(reading))])

Example #4

Show file

File: updater.py Project: idavydov/pinyin-toolkit

 def reformataudio(self, audio):
     output = u""
     for recognised, match in utils.regexparse(
             re.compile(ur"\[sound:([^\]]*)\]"), audio):
         if recognised:
             # Must be a sound tag - leave it well alone
             output += match.group(0)
         else:
             # Process as if this non-sound tag were a reading, in order to turn it into some tags
             output += generateaudio(self.notifier, self.mediamanager,
                                     self.config,
                                     [model.Word(*model.tokenize(match))])

Example #5

Show file

File: updater.py Project: chatch/pinyin-toolkit

 def reformataudio(self, audio):
     output = u""
     for recognised, match in utils.regexparse(re.compile(ur"\[sound:([^\]]*)\]"), audio):
         if recognised:
             # Must be a sound tag - leave it well alone
             output += match.group(0)
         else:
             # Process as if this non-sound tag were a reading, in order to turn it into some tags
             output += generateaudio(self.notifier, self.mediamanager, self.config, [model.Word(*model.tokenize(match))])

Example #6

Show file

File: updatergraph.py Project: yinzi/pinyin-toolkit

 def reformatreading(self, reading):
     return preparetokens(self.config, [model.Word(*model.tokenize(reading))])

Example #7

Show file

File: updatergraph.py Project: yinzi/pinyin-toolkit

def unpreparetokens(flat):
    return [model.Word(*model.tokenize(striphtml(flat)))]

Example #8

Show file

File: train.py Project: Trailblazer97/Text-classification

plt.switch_backend('agg')
from keras import backend as K
from keras.engine.topology import Layer
from keras import initializers
#%matplotlib inline
import pickle
import model


texts = []
labels = []



df = pd.read_csv('../dataset/dataset.csv')
df = df.dropna()
df = df.reset_index(drop=True)
print("Information on the dataset")
print('Shape of dataset ', df.shape)
print(df.columns)
print('No. of unique news types: ', len(set(df['Type'])))
print(df.head())


texts, labels, sorted_type, indexed_type = model.df_to_list(df, texts, labels)
pickle.dump(indexed_type, open('indexed_type.sav', 'wb'))
word_index, embedding_matrix, data, labels, sequences = model.tokenize(texts, labels)
model, history = model.model(word_index, embedding_matrix, sorted_type, data, labels)
model.save_model(model)
model.plot(history)

Example #9

Show file

def predict(text: str):
    input_id, attention_mask = model.tokenize(text)
    prediction = model.predict(input_id, attention_mask)
    return {"prediction": prediction}

Example #10

Show file

 def reformatreading(self, reading):
     return preparetokens(self.config, [model.Word(*model.tokenize(reading))])

Example #11

Show file

def unpreparetokens(flat):
    return [model.Word(*model.tokenize(striphtml(flat)))]