def load_data(): from rnn import load_data dev_data, _ = load_data(config, 0, 'dev', chunking=config.value("chunking", ""), seq_ordering="sorted", shuffle_frames_of_nseqs=0) eval_data, _ = load_data(config, 0, 'eval', chunking=config.value("chunking", ""), seq_ordering="sorted", shuffle_frames_of_nseqs=0) train_data, _ = load_data(config, 0, 'train') return dev_data, eval_data, train_data
def create_engine(): dev_data, eval_data, train_data = load_data() engine = Engine() engine.init_train_from_config(config, train_data, dev_data, eval_data) engine.init_train_epoch() engine.train_batches = engine.train_data.generate_batches( recurrent_net=engine.network.recurrent, batch_size=engine.batch_size, max_seqs=engine.max_seqs, max_seq_length=engine.max_seq_length, seq_drop=engine.seq_drop, shuffle_batches=engine.shuffle_batches, used_data_keys=engine.network.used_data_keys) engine.updater.set_learning_rate(engine.learning_rate, session=engine.tf_session) engine.updater.init_optimizer_vars(session=engine.tf_session) return engine
def hello_world(): """Main index page of the site :return html_template taking in the generated text, if any""" text = "" if request.method == "POST": # Retrieve form data model = request.form.get("model_type") history = False if request.form.get("history"): history = True seed_text = request.form.get("seed-text") if seed_text == "": seed_text = "<" # Load the data datapath = WINE_DATA if model == "wine" else JOKE_DATA seq, itoc, ctoi = rnn.load_data(datapath) if history: # Generate text from the same model at different points in training filepaths = WINES if model == "wine" else JOKES models = [] for filepath in filepaths: models.append(torch.load(filepath)) for model in models: text += model.generate_text(ctoi, itoc, seq.shape[1], start_phrase=seed_text) text += "\n\n" else: # Generate text from the best model of the given type path = WINE if model == "wine" else JOKE model = torch.load(path) text += model.generate_text(ctoi, itoc, seq.shape[1], start_phrase=seed_text) return render_template("main.html", text=text)
NUM_LAYERS = 1 EMB_SIZE = 128 HID_SIZE = 256 PAD_CHAR = '#' JOKES = "jokes/jokes.txt" WINE = "wine_data/wine_reviews_short.txt" dev = 'cpu' if torch.cuda.is_available(): dev = 'cuda' # Change this to JOKES to train on and generate jokes filename = JOKES seq, itoc, ctoi = rnn.load_data(filename) if filename == JOKES: X_train, X_test = train_test_split(seq, test_size=0.25, shuffle=False) else: # Decrease the text set even more to decrease training time X_train, X_test = train_test_split(seq, test_size=0.8, shuffle=False) X_test, _ = train_test_split(X_test, test_size=0.98, shuffle=False) charnn = rnn.CharNN(len(itoc), num_layers=NUM_LAYERS, emb_size=EMB_SIZE, hid_size=HID_SIZE, pad_idx=ctoi[PAD_CHAR], max_len=seq.shape[1], device=dev)