from keras.optimizers import RMSprop from keras.layers.embeddings import Embedding from keras.layers.recurrent import LSTM from six.moves import cPickle import os import string import deepctxt_util from deepctxt_util import DCTokenizer import encode_category_vector maxlen = 25 # cut texts after this number of words (among top max_features most common words) batch_size = 100 epoch = 30 tokenizer = DCTokenizer() print('Loading tokenizer') tokenizer.load('./glove.6B.100d.txt') #tokenizer.load('./glove.42B.300d.txt') print('Done') max_features = tokenizer.n_symbols vocab_dim = tokenizer.vocab_dim # initialize class name to Id mapping table className2Id = dict() className2Id['O'] = 0 className2Id['B_ORGANIZATION'] = 1 className2Id['I_ORGANIZATION'] = 2 className2Id['B_LOCATION'] = 3 className2Id['I_LOCATION'] = 4
from keras.models import model_from_json from keras.layers.core import Dense, Dropout, Activation from keras.layers.embeddings import Embedding from keras.layers.recurrent import LSTM from six.moves import cPickle import deepctxt_util from deepctxt_util import DCTokenizer import encode_category_vector import model_utils maxlen = 25 # cut texts after this number of words (among top max_features most common words) batch_size = 100 epoch = 50 tokenizer = DCTokenizer() print('Loading tokenizer') tokenizer.load('./glove.6B.100d.txt') #tokenizer.load('./glove.42B.300d.txt') print('Done') max_features = tokenizer.n_symbols vocab_dim = tokenizer.vocab_dim # initialize class name to Id mapping table className2Id = dict() className2Id['O'] = 0 className2Id['B_ORGANIZATION'] = 1 className2Id['I_ORGANIZATION'] = 2 className2Id['B_LOCATION'] = 3 className2Id['I_LOCATION'] = 4
from keras.preprocessing import sequence from keras.utils import np_utils from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation from keras.layers.embeddings import Embedding from keras.layers.recurrent import LSTM from keras.preprocessing.text import Tokenizer from six.moves import cPickle from keras.models import model_from_json import deepctxt_util from deepctxt_util import DCTokenizer maxlen = 25 # cut texts after this number of words (among top max_features most common words) tokenizer = DCTokenizer() print('Loading tokenizer') tokenizer.load('./glove.6B.100d.txt') #tokenizer.load('./glove.42B.300d.txt') print('Done') print('Loading model') with open("./coarse_type_model_lstm_glove_100b.json", "r") as f: json_string = f.readline() model = model_from_json(json_string) print('Done') print('Compile model') model.compile(loss='categorical_crossentropy', optimizer='adam') print('Done')