Python CharacterDataEngine.encode_dataset 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: data.character_data_engine

클래스/타입: CharacterDataEngine

메소드/함수: encode_dataset

hotexamples.com에서의 예제들: 6

Python CharacterDataEngine.encode_dataset - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 data.character_data_engine.CharacterDataEngine.encode_dataset에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

decode(3)

encode_dataset(2)

get_dim(2)

CharacterDataEngine(1)

예제 #1

파일 보기

파일: addition_rnn.py 프로젝트: lxastro/lxnn

INVERT = True
# Try replacing GRU, or SimpleRNN
RNN = recurrent.SimpleRNN
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1
MAXLEN = DIGITS + 1 + DIGITS

print("Generating data...")
engine = NumberDataEngine()
questions, expected = engine.get_dataset(TRAINING_SIZE)
print("Total addition questions:", len(questions))

print("Vectorization...")
convertor = CharacterDataEngine(engine.get_character_set(), maxlen=MAXLEN)
X = convertor.encode_dataset(questions, invert=True)
y = convertor.encode_dataset(expected, maxlen=DIGITS + 1)

# Shuffle (X, y) in unison as the later parts of X will almost all be larger digits
indices = np.arange(len(y))
np.random.shuffle(indices)
X = X[indices]
y = y[indices]

# Explicitly set apart 10% for validation data that we never train over
split_at = len(X) - len(X) / 10
(X_train, X_val) = (slice_X(X, 0, split_at), slice_X(X, split_at))
(y_train, y_val) = (y[:split_at], y[split_at:])

print(X_train.shape)
print(y_train.shape)

예제 #2

파일 보기

파일: addition_rnn_save.py 프로젝트: lchmo444/dlx

TRAINING_SIZE = 50000
DIGITS = 3
INVERT = True
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1
MAXLEN = DIGITS + 1 + DIGITS

print('Generating data...')
engine = NumberDataEngine(min_digits=1, max_digits=DIGITS)
questions, expected = engine.get_dataset(TRAINING_SIZE)
print('Total addition questions:', len(questions))

print('Vectorization...')
convertor = CharacterDataEngine(engine.get_character_set(), maxlen=MAXLEN)
D_X = convertor.encode_dataset(questions, invert=INVERT)
D_y = convertor.encode_dataset(expected, maxlen=DIGITS + 1)

# Shuffle (X, y) in unison as the later parts of X will almost all be larger digits
indices = np.arange(len(D_y))
np.random.shuffle(indices)
D_X = D_X[indices]
D_y = D_y[indices]

# Explicitly set apart 10% for validation data that we never train over
split_at = len(D_X) - len(D_X) / 10
(D_X_train, D_X_val) = (slice_X(D_X, 0, split_at), slice_X(D_X, split_at))
(D_y_train, D_y_val) = (D_y[:split_at], D_y[split_at:])

print(D_X_train.shape)
print(D_y_train.shape)

예제 #3

파일 보기

파일: simple_chain_rnn.py 프로젝트: lxastro/lxnn

from keras_layer.shift import Shift
from keras.layers.recurrent import SimpleRNN
import numpy as np
from keras.layers.containers import Graph

TRAINING_SIZE = 100
chars = "0123456789abcdef"

print("Generating data...")
engine = SimpleChainEngine(chars)
starts, chains = engine.get_dataset(TRAINING_SIZE)
print("Total number of data:", len(starts))

print("Vectorization...")
convertor = CharacterDataEngine(chars, maxlen=len(chars) - 1)
initial_value = convertor.encode_dataset(starts, maxlen=1)
y = convertor.encode_dataset(chains)
split_at = len(y) - len(y) / 10
(y_train, y_val) = (y[:split_at], y[split_at:])
(i_train, i_val) = (initial_value[:split_at], initial_value[split_at:])
(X_train, X_val) = (y_train, y_val)
print(i_train.shape)
print(y_train.shape)

print("Build model...")
HIDDEN_SIZE = 128
BATCH_SIZE = 50
MAXLEN = len(chars) - 1
input_dim = convertor.get_dim()
rnn_layer = SimpleRNN(HIDDEN_SIZE, input_shape=(MAXLEN, convertor.get_dim()), return_sequences=True)
shift_layer = Shift(rnn_layer, initial_value)

예제 #4

파일 보기

파일: addition_attention_without_encoder.py 프로젝트: lchmo444/dlx

INVERT = True
HIDDEN_SIZE = 128
BATCH_SIZE = 256
LAYERS = 1
MAXLEN = DIGITS + 1

print('Generating data...')
engine = BigNumberDataEngine(min_digits=12, max_digits=DIGITS)
As, Bs, expected = engine.get_seperate_dataset(TRAINING_SIZE)
print('Total additions:', len(As))

print('Vectorization...')
convertor = CharacterDataEngine(engine.get_character_set(),
                                maxlen=MAXLEN,
                                soldier=' ')
D_A = convertor.encode_dataset(As, invert=True, index=True)
D_B = convertor.encode_dataset(Bs, invert=True, index=True)
D_y = convertor.encode_dataset(expected, maxlen=MAXLEN, invert=True)

# Shuffle (X, y) in unison as the later parts of X will almost all be larger digits
indices = np.arange(len(D_y))
np.random.shuffle(indices)
D_A = D_A[indices]
D_B = D_B[indices]
D_y = D_y[indices]

# Explicitly set apart 10% for validation data that we never train over
split_at = len(D_A) - len(D_A) / 10
(D_A_train, D_A_val) = (slice_X(D_A, 0, split_at), slice_X(D_A, split_at))
(D_B_train, D_B_val) = (slice_X(D_B, 0, split_at), slice_X(D_B, split_at))
(D_y_train, D_y_val) = (D_y[:split_at], D_y[split_at:])

예제 #5

파일 보기

파일: addition_rnn_save.py 프로젝트: lxastro/dlx

TRAINING_SIZE = 50000
DIGITS = 3
INVERT = True
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1
MAXLEN = DIGITS + 1 + DIGITS

print('Generating data...')
engine = NumberDataEngine(min_digits=1, max_digits=DIGITS)
questions, expected = engine.get_dataset(TRAINING_SIZE)
print('Total addition questions:', len(questions))

print('Vectorization...')
convertor = CharacterDataEngine(engine.get_character_set(), maxlen=MAXLEN)
D_X = convertor.encode_dataset(questions, invert=INVERT)
D_y = convertor.encode_dataset(expected, maxlen=DIGITS + 1)

# Shuffle (X, y) in unison as the later parts of X will almost all be larger digits
indices = np.arange(len(D_y))
np.random.shuffle(indices)
D_X = D_X[indices]
D_y = D_y[indices]

# Explicitly set apart 10% for validation data that we never train over
split_at = len(D_X) - len(D_X) / 10
(D_X_train, D_X_val) = (slice_X(D_X, 0, split_at), slice_X(D_X, split_at))
(D_y_train, D_y_val) = (D_y[:split_at], D_y[split_at:])

print(D_X_train.shape)
print(D_y_train.shape)

예제 #6

파일 보기

파일: addition_attention_without_encoder.py 프로젝트: lxastro/dlx

MIN_DIGITS = 10
DIGITS = 12
INVERT = True
HIDDEN_SIZE = 128
BATCH_SIZE = 256
LAYERS = 1
MAXLEN = DIGITS + 1

print("Generating data...")
engine = BigNumberDataEngine(min_digits=12, max_digits=DIGITS)
As, Bs, expected = engine.get_seperate_dataset(TRAINING_SIZE)
print("Total additions:", len(As))

print("Vectorization...")
convertor = CharacterDataEngine(engine.get_character_set(), maxlen=MAXLEN, soldier=" ")
D_A = convertor.encode_dataset(As, invert=True, index=True)
D_B = convertor.encode_dataset(Bs, invert=True, index=True)
D_y = convertor.encode_dataset(expected, maxlen=MAXLEN, invert=True)

# Shuffle (X, y) in unison as the later parts of X will almost all be larger digits
indices = np.arange(len(D_y))
np.random.shuffle(indices)
D_A = D_A[indices]
D_B = D_B[indices]
D_y = D_y[indices]

# Explicitly set apart 10% for validation data that we never train over
split_at = len(D_A) - len(D_A) / 10
(D_A_train, D_A_val) = (slice_X(D_A, 0, split_at), slice_X(D_A, split_at))
(D_B_train, D_B_val) = (slice_X(D_B, 0, split_at), slice_X(D_B, split_at))
(D_y_train, D_y_val) = (D_y[:split_at], D_y[split_at:])