Example #1
0
# Write dictionary
import json
with open('model4_js/vocab/word2idx.json', 'w') as fp:
    json.dump(word2idx, fp)
with open('model4_js/vocab/idx2word.json', 'w') as fp:
    json.dump(idx2word, fp)
with open('model4_js/vocab/idx2tag.json', 'w') as fp:
    json.dump(idx2tag, fp)

from keras.preprocessing.sequence import pad_sequences

# Convert each sentence from list of Token to list of word_index
X = [[word2idx[w[0]] for w in s] for s in sentences]
# Padding each sentence to have the same lenght
X = pad_sequences(maxlen=dataProcessor.getMaxLength(),
                  sequences=X,
                  padding='post',
                  value=word2idx['pad'])

# Convert Tag/Label to tag_index
y = [[tag2idx[w[1]] for w in s] for s in sentences]
# Padding each sentence to have the same lenght
y = pad_sequences(maxlen=dataProcessor.getMaxLength(),
                  sequences=y,
                  padding='post',
                  value=tag2idx['pad'])

from keras.utils import to_categorical

# One-Hot encode