def test_word_one_hot_vector_convertor(): from data.simple_chain_engine import SimpleChainEngine engine = SimpleChainEngine('0123456789abcdef') s, c = engine.get_data() print "%s -> %s" % (s, c) ss, cs = engine.get_dataset(5) for (s, c) in zip(ss, cs): print "%s -> %s" % (s, c) print engine.get_dictionary() convertor = word_one_hot_vector_convertor(engine.get_dictionary()) for word in engine.get_dictionary(): print "%s -> %s" % (word, convertor.word2one_hot_vector(word).astype('int8')) for word in engine.get_dictionary(): print "%s -> %s" % ( word, convertor.one_hot_vector2word( convertor.word2one_hot_vector(word).astype('int8'))) matrixs = [] for c in cs: matrixs.append(convertor.sentence2one_hot_matrix(c)) for c, matrix in zip(cs, matrixs): print "%s -> " % (c) print matrix.astype('int8') for c, matrix in zip(cs, matrixs): print "%s -> %s" % (c, convertor.one_hot_matrix2sentence(matrix)) maxlen = len(engine.get_dictionary()) + 10 matrixs = [] masks = [] for c in cs: matrix, mask = convertor.sentence2one_hot_matrix(c, maxlen) matrixs.append(matrix) masks.append(mask) for c, matrix, mask in zip(cs, matrixs, masks): print "%s -> %s" % (c, convertor.one_hot_matrix2sentence(matrix, mask)) tensor, mask = convertor.sentences2one_hot_tensor( cs, len(engine.get_dictionary())) #print 'tensor:' #print tensor #print 'mask:' #print mask recs = convertor.one_hot_tensor2sentences(tensor, mask) for c, rec in zip(cs, recs): print "%s -> %s" % (c, rec)
def test_word_one_hot_vector_convertor(): from data.simple_chain_engine import SimpleChainEngine engine = SimpleChainEngine('0123456789abcdef') s, c = engine.get_data() print "%s -> %s" %(s,c) ss, cs = engine.get_dataset(5) for (s, c) in zip(ss, cs): print "%s -> %s" %(s,c) print engine.get_dictionary() convertor = word_one_hot_vector_convertor(engine.get_dictionary()) for word in engine.get_dictionary(): print "%s -> %s" %(word, convertor.word2one_hot_vector(word).astype('int8')) for word in engine.get_dictionary(): print "%s -> %s" %(word, convertor.one_hot_vector2word(convertor.word2one_hot_vector(word).astype('int8'))) matrixs = [] for c in cs: matrixs.append(convertor.sentence2one_hot_matrix(c)) for c, matrix in zip(cs, matrixs): print "%s -> " %(c) print matrix.astype('int8') for c, matrix in zip(cs, matrixs): print "%s -> %s" %(c, convertor.one_hot_matrix2sentence(matrix)) maxlen = len(engine.get_dictionary()) + 10 matrixs = [] masks = [] for c in cs: matrix, mask = convertor.sentence2one_hot_matrix(c, maxlen) matrixs.append(matrix) masks.append(mask) for c, matrix, mask in zip(cs, matrixs, masks): print "%s -> %s" %(c, convertor.one_hot_matrix2sentence(matrix, mask)) tensor, mask = convertor.sentences2one_hot_tensor(cs, len(engine.get_dictionary())) #print 'tensor:' #print tensor #print 'mask:' #print mask recs = convertor.one_hot_tensor2sentences(tensor, mask) for c ,rec in zip(cs, recs): print "%s -> %s" %(c, rec)
engine = SimpleChainEngine(words) starts, sentences = engine.get_dataset(DATA_SIZE) for (i, start, sentence) in zip(range(DATA_SIZE), starts, sentences): print("%s -> %s" % (sentence2str(start), sentence2str(sentence))) if i >= 5: break sinputs = [sentence[:-1] for sentence in sentences] soutputs = [sentence[1:] for sentence in sentences] for (i, sinput, soutput) in zip(range(DATA_SIZE), sinputs, soutputs): print("%s -> %s" % (sentence2str(sinput), sentence2str(soutput))) if i >= 5: break convertor = word_one_hot_vector_convertor(engine.get_dictionary()) D_X, D_mask = convertor.sentences2one_hot_tensor(sinputs, MAXLEN) D_Y, _ = convertor.sentences2one_hot_tensor(soutputs, MAXLEN) print(D_X.shape, D_Y.shape, D_mask.shape) # Shuffle (X, Y) indices = np.arange(DATA_SIZE) np.random.shuffle(indices) D_X = D_X[indices] D_Y = D_Y[indices] D_mask = D_mask[indices] # Explicitly set apart 10% for validation data that we never train over split_at = DATA_SIZE - DATA_SIZE / 10 (D_X_train, D_X_val) = (D_X[:split_at], D_X[split_at:]) (D_Y_train, D_Y_val) = (D_Y[:split_at], D_Y[split_at:])
engine = SimpleChainEngine(words) starts, sentences = engine.get_dataset(DATA_SIZE) for (i, start, sentence) in zip(range(DATA_SIZE), starts, sentences): print ("%s -> %s" %(sentence2str(start), sentence2str(sentence))) if i>=5: break sinputs = [sentence[:-1] for sentence in sentences] soutputs = [sentence[1:] for sentence in sentences] for (i, sinput, soutput) in zip(range(DATA_SIZE), sinputs, soutputs): print ("%s -> %s" %(sentence2str(sinput), sentence2str(soutput))) if i>=5: break convertor = word_one_hot_vector_convertor(engine.get_dictionary()) D_X, D_mask = convertor.sentences2one_hot_tensor(sinputs, MAXLEN) D_Y, _ = convertor.sentences2one_hot_tensor(soutputs, MAXLEN) print (D_X.shape, D_Y.shape, D_mask.shape) # Shuffle (X, Y) indices = np.arange(DATA_SIZE) np.random.shuffle(indices) D_X = D_X[indices] D_Y = D_Y[indices] D_mask = D_mask[indices] # Explicitly set apart 10% for validation data that we never train over split_at = DATA_SIZE - DATA_SIZE / 10 (D_X_train, D_X_val) = (D_X[:split_at], D_X[split_at:]) (D_Y_train, D_Y_val) = (D_Y[:split_at], D_Y[split_at:])