Ejemplo n.º 1
0
def main():
    train_file_dir = os.getcwd() + m_config.train_file
    valid_file_dir = os.getcwd() + m_config.valid_file
    test_file_dir = os.getcwd()  + m_config.test_file

    data_train_reader_type = RawStringDatasetReader(train_file_dir)
    itera_x_train = data_train_reader_type.itera_x
    itera_y_train = data_train_reader_type.itera_y

    data_valid_reader_type = RawStringDatasetReader(valid_file_dir)
    itera_x_valid = data_valid_reader_type.itera_x
    itera_y_valid = data_valid_reader_type.itera_y

    data_test_reader_type = RawStringDatasetReader(test_file_dir)
    itera_x_test = data_test_reader_type.itera_x
    itera_y_test = data_test_reader_type.itera_y

    train_model = LanguageModel()
    k_model,ppl = train_model.build_lm_model(itera_x_train, itera_y_train)
    valid_loss = k_model.evaluate(x=itera_x_valid, y=itera_y_valid,steps=m_config.valid_steps)[0]
    test_loss = k_model.evaluate(x=itera_x_test, y=itera_y_test,steps=m_config.test_steps)[0]
    print("trainning's ppl:", ppl)
    print("valid's ppl:", math.exp(valid_loss))
    print("test's ppl:", math.exp(test_loss))
Ejemplo n.º 2
0
dataset = Dataset(data_dir, num_words)
dataset.set_batch_size(batch_size)
dataset.set_seq_len(seq_len)
dataset.save('./checkpoints/')

params = {}
params['vocab_size'] = dataset.vocab_size
params['num_classes'] = dataset.vocab_size
params['batch_size'] = batch_size
params['seq_len'] = seq_len
params['hidden_dim'] = hidden_size
params['num_layers'] = num_layers
params['embed_size'] = embed_size

model = LanguageModel(params)
model.compile()
eval_softmax = 5
for epoch in range(num_epochs):
    dataset.set_data_dir(data_dir)
    dataset.set_batch_size(batch_size)
    progbar = generic_utils.Progbar(dataset.token.document_count)
    for X_batch, Y_batch in dataset:
        t0 = time.time()
        loss = model.train_on_batch(X_batch, Y_batch)
        perp = np.exp(np.float32(loss))
        t1 = time.time()
        wps = np.round((batch_size * seq_len) / (t1 - t0))
        progbar.add(len(X_batch),
                    values=[("loss", loss), ("perplexity", perp),
                            ("words/sec", wps)])
Ejemplo n.º 3
0

if __name__ == "__main__":
    uniqueService = UniqueService(APP_DBUS_NAME, APP_OBJECT_NAME)

    app = QApplication(sys.argv)
    tray_icon = SystemTrayIcon(
        QIcon(os.path.join(get_parent_dir(__file__), "image", "trayicon.png")),
        app)
    tray_icon.show()
    (constant.TRAYAREA_TOP,
     constant.TRAYAREA_BOTTOM) = tray_icon.get_trayarea()

    plugin = Plugin()

    source_lang_model = LanguageModel()
    dest_lang_model = LanguageModel()

    word_engine_name = setting_config.get_translate_config("word_engine")
    words_engine_name = setting_config.get_translate_config("words_engine")
    translate_simple = imp.load_source(
        "translate_simple",
        plugin.get_plugin_file(word_engine_name)).Translate()
    translate_long = imp.load_source(
        "translate_long",
        plugin.get_plugin_file(words_engine_name)).Translate()
    word_translate_model = plugin.get_word_model(
        setting_config.get_translate_config("src_lang"),
        setting_config.get_translate_config("dst_lang"))
    words_translate_model = plugin.get_words_model(
        setting_config.get_translate_config("src_lang"),
Ejemplo n.º 4
0
from model import LanguageModel
import argparse

new = LanguageModel()

parser = argparse.ArgumentParser()

parser.add_argument("source", type=str, help="file to train the model")

args = parser.parse_args()

result = new.fit(args.source)

print(result)