Example #1
0
data_size = len(data)

train_split_index = int(data_size*90/100)

training_input  = data_input[:train_split_index]
training_output = data_output[:train_split_index]
validation_input = data_input[train_split_index:]
validation_output = data_output[train_split_index:]

# Encoding the data ----------------------

input_encoding, input_decoding, input_dict_size = encoding.build_characters_encoding(data_input)
output_encoding, output_decoding, output_dict_size = encoding.build_characters_encoding(data_output)

encoded_training_input = encoding.transform(input_encoding, training_input, vector_size=MAX_ENGLISH_INPUT_LENGTH)
encoded_training_output = encoding.transform(output_encoding, training_output, vector_size=MAX_KATAKANA_OUTPUT_LENGTH)
encoded_validation_input = encoding.transform(input_encoding, validation_input, vector_size=MAX_ENGLISH_INPUT_LENGTH)
encoded_validation_output = encoding.transform(output_encoding, validation_output, vector_size=MAX_KATAKANA_OUTPUT_LENGTH)

# Building the model ----------------------

training_encoder_input, training_decoder_input, training_decoder_output = \
    model.create_model_data(encoded_training_input, encoded_training_output, output_dict_size)

validation_encoder_input, validation_decoder_input, validation_decoder_output = \
    model.create_model_data(encoded_validation_input, encoded_validation_output, output_dict_size)

# Building the model ----------------------

seq2seq_model = model.create_model(
Example #2
0
print('Evaluating the model on random testing dataset...')

data = pd.read_csv('./dataset/data.csv')
data = data.sample(frac=1, random_state=11)

data_input = [s.lower() for s in data[0]]
data_output = [s.lower() for s in data[1]]

data_size = len(data)
test_split = int(data_size*10/100)

test_input  = data_input[:test_split]
test_output = data_output[:test_split]

encoded_testing_input = encoding.transform(input_encoding, test_input)
encoded_testing_output = encoding.transform(output_encoding, test_output)

test_encoder_input, test_decoder_input, test_decoder_output = \
    model.create_model_data(encoded_testing_input, encoded_testing_output, len(output_decoding) + 1)

testing_model.evaluate(x=[test_encoder_input, test_decoder_input], y=test_decoder_output)

# ===============================================================

print('Evaluating the model on random names...')


def to_katakan(english_text):
    return model.to_katakana(english_text, testing_model, input_encoding, output_decoding)
Example #3
0
data_output = [s.decode('utf-8') for s in data[1]]

data_size = len(data)

training_input  = data_input[data_size*0/100:data_size*90/100]
training_output = data_output[data_size*0/100:data_size*90/100]

validation_input = data_input[data_size*90/100:data_size*100/100]
validation_output = data_output[data_size*90/100:data_size*100/100]

# Encoding the data ----------------------

input_encoding, input_decoding, input_dict_size = encoding.build_characters_encoding(data_input)
output_encoding, output_decoding, output_dict_size = encoding.build_characters_encoding(data_output)

encoded_training_input = encoding.transform(input_encoding, training_input, vector_size=MAX_ENGLISH_INPUT_LENGTH)
encoded_training_output = encoding.transform(output_encoding, training_output, vector_size=MAX_KATAKANA_OUTPUT_LENGTH)
encoded_validation_input = encoding.transform(input_encoding, validation_input, vector_size=MAX_ENGLISH_INPUT_LENGTH)
encoded_validation_output = encoding.transform(output_encoding, validation_output, vector_size=MAX_KATAKANA_OUTPUT_LENGTH)

# Building the model ----------------------

training_encoder_input = encoded_training_input

training_decoder_input = np.zeros_like(encoded_training_output)
training_decoder_input[:, 1:] = encoded_training_output[:,:-1]
training_decoder_input[:, 0] = encoding.CHAR_CODE_START
training_decoder_output = np.eye(output_dict_size)[encoded_training_output.astype('int')]

validation_encoder_input = encoded_validation_input
validation_decoder_input = np.zeros_like(encoded_validation_output)