Python DCTokenizer Examples

Programming Language: Python

Namespace/Package Name: deepctxt_util

Class/Type: DCTokenizer

Examples at hotexamples.com: 6

Python DCTokenizer - 6 examples found. These are the top rated real world Python examples of deepctxt_util.DCTokenizer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

texts_to_sequences(4)

DCTokenizer(1)

load(1)

Example #1

Show file

File: train_rnn_lstm_ner.py Project: SearchGuru/query_ner

from keras.optimizers import RMSprop
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM
from six.moves import cPickle
import os
import string

import deepctxt_util
from deepctxt_util import DCTokenizer
import encode_category_vector

maxlen = 25 # cut texts after this number of words (among top max_features most common words)
batch_size = 100
epoch = 30

tokenizer = DCTokenizer()
print('Loading tokenizer')
tokenizer.load('./glove.6B.100d.txt')
#tokenizer.load('./glove.42B.300d.txt')
print('Done')

max_features = tokenizer.n_symbols
vocab_dim = tokenizer.vocab_dim

# initialize class name to Id mapping table
className2Id = dict()
className2Id['O'] = 0
className2Id['B_ORGANIZATION'] = 1
className2Id['I_ORGANIZATION'] = 2
className2Id['B_LOCATION'] = 3
className2Id['I_LOCATION'] = 4

Example #2

Show file

File: train_bidirection_lstm_ner.py Project: glinit/query_ner

from keras.optimizers import RMSprop
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM
from six.moves import cPickle
import os
import string

import deepctxt_util
from deepctxt_util import DCTokenizer
import encode_category_vector

maxlen = 25  # cut texts after this number of words (among top max_features most common words)
batch_size = 100
epoch = 30

tokenizer = DCTokenizer()
print('Loading tokenizer')
tokenizer.load('./glove.6B.100d.txt')
#tokenizer.load('./glove.42B.300d.txt')
print('Done')

max_features = tokenizer.n_symbols
vocab_dim = tokenizer.vocab_dim

# initialize class name to Id mapping table
className2Id = dict()
className2Id['O'] = 0
className2Id['B_ORGANIZATION'] = 1
className2Id['I_ORGANIZATION'] = 2
className2Id['B_LOCATION'] = 3
className2Id['I_LOCATION'] = 4

Example #3

Show file

from keras.models import model_from_json
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM
from six.moves import cPickle

import deepctxt_util
from deepctxt_util import DCTokenizer
import encode_category_vector
import model_utils

maxlen = 25  # cut texts after this number of words (among top max_features most common words)
batch_size = 100
epoch = 50

tokenizer = DCTokenizer()
print('Loading tokenizer')
tokenizer.load('./glove.6B.100d.txt')
#tokenizer.load('./glove.42B.300d.txt')
print('Done')

max_features = tokenizer.n_symbols
vocab_dim = tokenizer.vocab_dim

# initialize class name to Id mapping table
className2Id = dict()
className2Id['O'] = 0
className2Id['B_ORGANIZATION'] = 1
className2Id['I_ORGANIZATION'] = 2
className2Id['B_LOCATION'] = 3
className2Id['I_LOCATION'] = 4

Example #4

Show file

File: i_qtype_coarse.py Project: SearchGuru/DNN4Factoid

from keras.preprocessing import sequence
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM
from keras.preprocessing.text import Tokenizer
from six.moves import cPickle
from keras.models import model_from_json

import deepctxt_util
from deepctxt_util import DCTokenizer

maxlen = 25  # cut texts after this number of words (among top max_features most common words)

tokenizer = DCTokenizer()
print('Loading tokenizer')
tokenizer.load('./glove.6B.100d.txt')
#tokenizer.load('./glove.42B.300d.txt')
print('Done')

print('Loading model')
with open("./coarse_type_model_lstm_glove_100b.json", "r") as f:
    json_string = f.readline()
    model = model_from_json(json_string)
print('Done')

print('Compile model')
model.compile(loss='categorical_crossentropy', optimizer='adam')
print('Done')

Example #5

Show file

File: i_qtype_coarse.py Project: glinit/query_ner

from keras.preprocessing import sequence
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM
from keras.preprocessing.text import Tokenizer
from six.moves import cPickle
from keras.models import model_from_json

import deepctxt_util
from deepctxt_util import DCTokenizer

maxlen = 25  # cut texts after this number of words (among top max_features most common words)

tokenizer = DCTokenizer()
print('Loading tokenizer')
tokenizer.load('./glove.6B.100d.txt')
#tokenizer.load('./glove.42B.300d.txt')
print('Done')

print('Loading model')
with open("./coarse_type_model_lstm_glove_100b.json", "r") as f:
    json_string = f.readline()
    model = model_from_json(json_string)
print('Done')

print('Compile model')
model.compile(loss='categorical_crossentropy', optimizer='adam')
print('Done')

Example #6

Show file

File: predict_birnn_lstm_glove.py Project: SearchGuru/query_ner

from keras.models import model_from_json
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM
from six.moves import cPickle

import deepctxt_util
from deepctxt_util import DCTokenizer
import encode_category_vector
import model_utils

maxlen = 25 # cut texts after this number of words (among top max_features most common words)
batch_size = 100
epoch = 50

tokenizer = DCTokenizer()
print('Loading tokenizer')
tokenizer.load('./glove.6B.100d.txt')
#tokenizer.load('./glove.42B.300d.txt')
print('Done')

max_features = tokenizer.n_symbols
vocab_dim = tokenizer.vocab_dim

# initialize class name to Id mapping table
className2Id = dict()
className2Id['O'] = 0
className2Id['B_ORGANIZATION'] = 1
className2Id['I_ORGANIZATION'] = 2
className2Id['B_LOCATION'] = 3
className2Id['I_LOCATION'] = 4