Exemple #1
0
def test_word_one_hot_vector_convertor():       
    from data.simple_chain_engine import SimpleChainEngine
    engine = SimpleChainEngine('0123456789abcdef')
    s, c = engine.get_data()
    print "%s -> %s" %(s,c)
    ss, cs = engine.get_dataset(5)
    for (s, c) in zip(ss, cs):
        print "%s -> %s" %(s,c)
    print engine.get_dictionary() 
    
    convertor = word_one_hot_vector_convertor(engine.get_dictionary())
    for word in engine.get_dictionary():
        print "%s -> %s" %(word, convertor.word2one_hot_vector(word).astype('int8'))
        
    for word in engine.get_dictionary():
        print "%s -> %s" %(word, convertor.one_hot_vector2word(convertor.word2one_hot_vector(word).astype('int8')))
    
    matrixs = []
    for c in cs:
        matrixs.append(convertor.sentence2one_hot_matrix(c))
    
    for c, matrix in zip(cs, matrixs):
        print "%s -> " %(c)
        print matrix.astype('int8')

    for c, matrix in zip(cs, matrixs):
        print "%s -> %s" %(c, convertor.one_hot_matrix2sentence(matrix))
        
    maxlen = len(engine.get_dictionary()) + 10
    matrixs = []
    masks = []
    for c in cs:
        matrix, mask = convertor.sentence2one_hot_matrix(c, maxlen)
        matrixs.append(matrix)
        masks.append(mask)

    for c, matrix, mask in zip(cs, matrixs, masks):
        print "%s -> %s" %(c, convertor.one_hot_matrix2sentence(matrix, mask))
        
    tensor, mask = convertor.sentences2one_hot_tensor(cs, len(engine.get_dictionary()))
    #print 'tensor:'
    #print tensor
    #print 'mask:'
    #print mask
    recs = convertor.one_hot_tensor2sentences(tensor, mask)
    for c ,rec in zip(cs, recs):
        print "%s -> %s" %(c, rec)
Exemple #2
0
    s = ''
    for word in sentence:
        s += word + ' '
    return s


words = [str(i) for i in range(10)] + [chr(i) for i in range(65, 75)]
print('words:', words)

#Dataset
DATA_SIZE = 1000
HIDDEN_SIZE = 128
BATCH_SIZE = 33
MAXLEN = len(words)

engine = SimpleChainEngine(words)
starts, sentences = engine.get_dataset(DATA_SIZE)
for (i, start, sentence) in zip(range(DATA_SIZE), starts, sentences):
    print("%s -> %s" % (sentence2str(start), sentence2str(sentence)))
    if i >= 5:
        break

sinputs = [sentence[:-1] for sentence in sentences]
soutputs = [sentence[1:] for sentence in sentences]
for (i, sinput, soutput) in zip(range(DATA_SIZE), sinputs, soutputs):
    print("%s -> %s" % (sentence2str(sinput), sentence2str(soutput)))
    if i >= 5:
        break

convertor = word_one_hot_vector_convertor(engine.get_dictionary())
D_X, D_mask = convertor.sentences2one_hot_tensor(sinputs, MAXLEN)
Exemple #3
0
from data.simple_chain_engine import SimpleChainEngine
from data.character_data_engine import CharacterDataEngine
from keras import backend as K
from keras.layers.core import Activation, TimeDistributedDense
from keras_layer.shift import Shift
from keras.layers.recurrent import SimpleRNN
import numpy as np
from keras.layers.containers import Graph

TRAINING_SIZE = 100
chars = "0123456789abcdef"

print("Generating data...")
engine = SimpleChainEngine(chars)
starts, chains = engine.get_dataset(TRAINING_SIZE)
print("Total number of data:", len(starts))

print("Vectorization...")
convertor = CharacterDataEngine(chars, maxlen=len(chars) - 1)
initial_value = convertor.encode_dataset(starts, maxlen=1)
y = convertor.encode_dataset(chains)
split_at = len(y) - len(y) / 10
(y_train, y_val) = (y[:split_at], y[split_at:])
(i_train, i_val) = (initial_value[:split_at], initial_value[split_at:])
(X_train, X_val) = (y_train, y_val)
print(i_train.shape)
print(y_train.shape)

print("Build model...")
HIDDEN_SIZE = 128
BATCH_SIZE = 50
Exemple #4
0
def test_word_one_hot_vector_convertor():
    from data.simple_chain_engine import SimpleChainEngine
    engine = SimpleChainEngine('0123456789abcdef')
    s, c = engine.get_data()
    print "%s -> %s" % (s, c)
    ss, cs = engine.get_dataset(5)
    for (s, c) in zip(ss, cs):
        print "%s -> %s" % (s, c)
    print engine.get_dictionary()

    convertor = word_one_hot_vector_convertor(engine.get_dictionary())
    for word in engine.get_dictionary():
        print "%s -> %s" % (word,
                            convertor.word2one_hot_vector(word).astype('int8'))

    for word in engine.get_dictionary():
        print "%s -> %s" % (
            word,
            convertor.one_hot_vector2word(
                convertor.word2one_hot_vector(word).astype('int8')))

    matrixs = []
    for c in cs:
        matrixs.append(convertor.sentence2one_hot_matrix(c))

    for c, matrix in zip(cs, matrixs):
        print "%s -> " % (c)
        print matrix.astype('int8')

    for c, matrix in zip(cs, matrixs):
        print "%s -> %s" % (c, convertor.one_hot_matrix2sentence(matrix))

    maxlen = len(engine.get_dictionary()) + 10
    matrixs = []
    masks = []
    for c in cs:
        matrix, mask = convertor.sentence2one_hot_matrix(c, maxlen)
        matrixs.append(matrix)
        masks.append(mask)

    for c, matrix, mask in zip(cs, matrixs, masks):
        print "%s -> %s" % (c, convertor.one_hot_matrix2sentence(matrix, mask))

    tensor, mask = convertor.sentences2one_hot_tensor(
        cs, len(engine.get_dictionary()))
    #print 'tensor:'
    #print tensor
    #print 'mask:'
    #print mask
    recs = convertor.one_hot_tensor2sentences(tensor, mask)
    for c, rec in zip(cs, recs):
        print "%s -> %s" % (c, rec)
Exemple #5
0
    s = ''
    for word in sentence:
        s += word + ' '
    return s
    

words = [str(i) for i in range(10)] + [chr(i) for i in range(65,75)]
print ('words:', words)

#Dataset
DATA_SIZE = 1000
HIDDEN_SIZE = 128
BATCH_SIZE = 33
MAXLEN = len(words)

engine = SimpleChainEngine(words)
starts, sentences = engine.get_dataset(DATA_SIZE)
for (i, start, sentence) in zip(range(DATA_SIZE), starts, sentences):
    print ("%s -> %s" %(sentence2str(start), sentence2str(sentence)))
    if i>=5:
        break

sinputs = [sentence[:-1] for sentence in sentences]
soutputs = [sentence[1:] for sentence in sentences]
for (i, sinput, soutput) in zip(range(DATA_SIZE), sinputs, soutputs):
    print ("%s -> %s" %(sentence2str(sinput), sentence2str(soutput)))
    if i>=5:
        break

convertor = word_one_hot_vector_convertor(engine.get_dictionary())
D_X, D_mask = convertor.sentences2one_hot_tensor(sinputs, MAXLEN)