def test_word_one_hot_vector_convertor(): from data.simple_chain_engine import SimpleChainEngine engine = SimpleChainEngine('0123456789abcdef') s, c = engine.get_data() print "%s -> %s" % (s, c) ss, cs = engine.get_dataset(5) for (s, c) in zip(ss, cs): print "%s -> %s" % (s, c) print engine.get_dictionary() convertor = word_one_hot_vector_convertor(engine.get_dictionary()) for word in engine.get_dictionary(): print "%s -> %s" % (word, convertor.word2one_hot_vector(word).astype('int8')) for word in engine.get_dictionary(): print "%s -> %s" % ( word, convertor.one_hot_vector2word( convertor.word2one_hot_vector(word).astype('int8'))) matrixs = [] for c in cs: matrixs.append(convertor.sentence2one_hot_matrix(c)) for c, matrix in zip(cs, matrixs): print "%s -> " % (c) print matrix.astype('int8') for c, matrix in zip(cs, matrixs): print "%s -> %s" % (c, convertor.one_hot_matrix2sentence(matrix)) maxlen = len(engine.get_dictionary()) + 10 matrixs = [] masks = [] for c in cs: matrix, mask = convertor.sentence2one_hot_matrix(c, maxlen) matrixs.append(matrix) masks.append(mask) for c, matrix, mask in zip(cs, matrixs, masks): print "%s -> %s" % (c, convertor.one_hot_matrix2sentence(matrix, mask)) tensor, mask = convertor.sentences2one_hot_tensor( cs, len(engine.get_dictionary())) #print 'tensor:' #print tensor #print 'mask:' #print mask recs = convertor.one_hot_tensor2sentences(tensor, mask) for c, rec in zip(cs, recs): print "%s -> %s" % (c, rec)
s = '' for word in sentence: s += word + ' ' return s words = [str(i) for i in range(10)] + [chr(i) for i in range(65, 75)] print('words:', words) #Dataset DATA_SIZE = 1000 HIDDEN_SIZE = 128 BATCH_SIZE = 33 MAXLEN = len(words) engine = SimpleChainEngine(words) starts, sentences = engine.get_dataset(DATA_SIZE) for (i, start, sentence) in zip(range(DATA_SIZE), starts, sentences): print("%s -> %s" % (sentence2str(start), sentence2str(sentence))) if i >= 5: break sinputs = [sentence[:-1] for sentence in sentences] soutputs = [sentence[1:] for sentence in sentences] for (i, sinput, soutput) in zip(range(DATA_SIZE), sinputs, soutputs): print("%s -> %s" % (sentence2str(sinput), sentence2str(soutput))) if i >= 5: break convertor = word_one_hot_vector_convertor(engine.get_dictionary()) D_X, D_mask = convertor.sentences2one_hot_tensor(sinputs, MAXLEN)