def gen_epochs(num_epochs, num_steps, batch_size): """ This method returns the 'num_epochs' number of interators each of shape (batch_size, num_steps) one after the other. """ for i in range(num_epochs): yield reader.ptb_iterator(data, batch_size, num_steps)
def train_model_from_save(): print 'loading and inference ...' train_data, valid_data, test_data, _ = reader.ptb_raw_data(DATA_PATH) data = train_data saver = tf.train.import_meta_graph(os.path.join(MODEL_SAVE_DIR, MODEL_SAVE_NAME+'-1300.meta')) with tf.Session() as session: saver.restore(session, tf.train.latest_checkpoint(MODEL_SAVE_DIR)) graph = tf.get_default_graph() m_cost = tf.get_collection('m_cost')[0] m_final_state = graph.get_tensor_by_name('language_model/RNN/RNN/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/add_137:0') m_train_op = tf.get_collection('m_train_op')[0] m_embedding = tf.get_collection('m_embedding')[0] m_output = tf.get_collection('m_output')[0] m_input_data = tf.get_collection('m_input_data')[0] m_targets = tf.get_collection('m_targets')[0] m_initial_state = graph.get_tensor_by_name('language_model/MultiRNNCellZeroState/DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros:0') for i in range(NUM_EPOCH): print 'In iteration: %d' % (i + 1) total_costs = 0.0 iters = 0 state = session.run(m_initial_state) for step, (x, y) in enumerate(reader.ptb_iterator(data, TRAIN_BATCH_SIZE, TRAIN_NUM_STEP)): cost, state, _, embedding, output = session.run([m_cost, m_final_state, m_train_op, m_embedding, m_output], {m_input_data: x, m_targets: y, m_initial_state: state}) total_costs += cost iters += TRAIN_NUM_STEP if step % 100 == 0: print 'After %d steps, perplexity is %.3f' % (step, np.exp(total_costs / iters))
def run_epoch(session, model, data, train_op, output_log): total_costs = 0.0 iters = 0 state = session.run(model.initial_state) # step = 0 # [x,y] = reader.ptb_producer( data, model.batch_size, model.num_steps ) # coord = tf.train.Coordinator() # tf.train.start_queue_runners(session, coord=coord) for step, (x, y) in enumerate( reader.ptb_iterator(data, model.batch_size, model.num_steps)): # [a,b] = session.run([x,y]) # if a.size != model.batch_size * model.num_steps : # break cost, state, _ = session.run([model.cost, model.final_state, train_op], { model.input_data: x, model.targets: y, model.initial_state: state }) total_costs += cost iters += model.num_steps step += 1 if output_log and step % 100 == 0: print("After %d steps, perplexity is %.3f" % (step, np.exp(total_costs / iters))) return np.exp(total_costs / iters)
def run_epoch(session, m, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = int(((len(data) / m.batch_size) - 1) / m.num_steps) start_time = time.time() costs = 0.0 iters = 0 state = session.run(m.initial_state) #.eval() #sabc=reader.ptb_producer(data,m.batch_size,m.num_steps) for step, (x, y) in enumerate( reader.ptb_iterator(data, m.batch_size, m.num_steps)): #:, batch_size, #num_steps)): #import ipdb;ipdb.set_trace() cost, state, _ = session.run([m.cost, m.final_state, eval_op], { m.input_data: x, m.targets: y, m.initial_state: state }) #print (y) #print (x) costs += cost iters += m.num_steps #print (cost) if verbose and step % (epoch_size / 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def run_one_epoch(m, data, is_training=True, verbose=False): #Define the epoch size based on the length of the data, batch size and the number of steps epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0. iters = 0 m._model.reset_states() #For each step and data point for step, (x, y) in enumerate( reader.ptb_iterator(data, m.batch_size, m.num_steps)): #Evaluate and return cost, state by running cost, final_state and the function passed as parameter #y = tf.keras.utils.to_categorical(y, num_classes=vocab_size) if is_training: loss = m.train_batch(x, y) else: loss = m.test_batch(x, y) #Add returned cost to costs (which keeps track of the total costs for this epoch) costs += loss #Add number of steps to iteration counter iters += m.num_steps if verbose and step % (epoch_size // 10) == 10: print("Itr %d of %d, perplexity: %.3f speed: %.0f wps" % (step, epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) # Returns the Perplexity rating for us to keep track of how the model is evolving return np.exp(costs / iters)
def run_epoch(session, m, data, eval_op, max_steps=None, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 num_batch_steps_completed = 0 for step, (x, y) in enumerate(reader.ptb_iterator(data, m.batch_size, m.num_steps)): cost, state, _ = session.run([m.cost, m.final_state, eval_op], {m.input_data: x, m.targets: y}) costs += cost iters += m.num_steps num_batch_steps_completed += 1 #if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) if iters > max_steps: break return (costs / iters)
def run_epoch(session, model, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = ( (sum([len(i) for i in data]) // model.batch_size) - 1) // model.num_steps start_time = time.time() costs = 0.0 iters = 0 state = model.initial_state.eval() for step, (x, y) in enumerate( reader.ptb_iterator(data, model.batch_size, model.num_steps)): cost, state, _ = session.run([model.cost, model.final_state, eval_op], { model.input_data: x, model.targets: y, model.initial_state: state }) costs += cost iters += model.num_steps if verbose and step % (epoch_size // 100) == 100: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * model.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def run_one_epoch(session, m, data, eval_op, verbose=False): #Define the epoch size based on the length of the data, batch size and the number of steps epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 state = session.run(m.initial_state) #For each step and data point for step, (x, y) in enumerate( reader.ptb_iterator(data, m.batch_size, m.num_steps)): #Evaluate and return cost, state by running cost, final_state and the function passed as parameter cost, state, out_words, _ = session.run( [m.cost, m.final_state, m.final_output_words, eval_op], { m.input_data: x, m.targets: y, m.initial_state: state }) #Add returned cost to costs (which keeps track of the total costs for this epoch) costs += cost #Add number of steps to iteration counter iters += m.num_steps if verbose and step % (epoch_size // 10) == 10: print("Itr %d of %d, perplexity: %.3f speed: %.0f wps" % (step, epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) # Returns the Perplexity rating for us to keep track of how the model is evolving return np.exp(costs / iters)
def run_epoch(session, m, data, eval_op, save_models=False, verbose=False): saver = tf.train.Saver() epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 perps = 0.0 iters = 0 state = m.initial_state.eval() for step, (x, y) in enumerate( reader.ptb_iterator(data, m.batch_size, m.num_steps)): perp, cost, state, _ = session.run( [m.perp, m.cost, m.final_state, eval_op], { m.input_data: x, m.targets: y, m.initial_state: state }) costs += cost perps += perp iters += m.num_steps if verbose and step % (epoch_size // 10) == 10: print("%d / %d exp_cost: %.3f perplexity: %.3f speed: %.0f wps" % (step, epoch_size, np.exp(costs / iters), np.exp(perps / iters), iters * m.batch_size / (time.time() - start_time))) if save_models: saver.save(session, 'saved_models/%s.temp' % m.model_name, write_meta_graph=False) return np.exp(perps / iters)
def run_epoch(session, model, data, is_train=False, verbose=False): """Runs the model on the given data.""" print_('valid data size:', len(data)) epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps start_time = time.time() costs = 0.0 iters = 0 state = session.run(model.initial_state) for step, (x, y) in enumerate(reader.ptb_iterator(data, model.batch_size, model.num_steps)): if is_train: fetches = [model.cost, model.final_state, model.train_op] else: fetches = [model.cost, model.final_state] feed_dict = {} feed_dict[model.input_data] = x feed_dict[model.targets] = y for layer_num, (c, h) in enumerate(model.initial_state): feed_dict[c] = state[layer_num].c feed_dict[h] = state[layer_num].h if is_train: cost, state, _ = session.run(fetches, feed_dict) else: cost, state = session.run(fetches, feed_dict) costs += cost iters += model.num_steps if verbose and step % (epoch_size // 10) == 10: print_(nowStr()+':', "%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * model.batch_size / (time.time() - start_time))) print('Cost:', costs, 'iter:', iters) return np.exp(costs / iters)
def run_epoch(model, data, is_train=False, lr=1.0): """Runs the model on the given data.""" if is_train: model.train() else: model.eval() epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps start_time = time.time() hidden = model.init_hidden() costs = 0.0 iters = 0 for step, (x, y) in enumerate(reader.ptb_iterator(data, model.batch_size, model.num_steps)): inputs = Variable(torch.from_numpy(x.astype(np.int64)).transpose(0, 1).contiguous()).cuda() model.zero_grad() hidden = repackage_hidden(hidden) outputs, hidden = model(inputs, hidden) targets = Variable(torch.from_numpy(y.astype(np.int64)).transpose(0, 1).contiguous()).cuda() tt = torch.squeeze(targets.view(-1, model.batch_size * model.num_steps)) loss = criterion(outputs.view(-1, model.vocab_size), tt) costs += loss.data[0] * model.num_steps iters += model.num_steps if is_train: loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), 0.25) for p in model.parameters(): p.data.add_(-lr, p.grad.data) if step % (epoch_size // 10) == 10: print("{} perplexity: {:8.2f} speed: {} wps".format(step * 1.0 / epoch_size, np.exp(costs / iters), iters * model.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def run_epoch(session, m, data, eval_op, verbose=False, id_to_word=None): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 state = m.initial_state.eval() for step, (x, y) in enumerate(reader.ptb_iterator(data, m.batch_size, m.num_steps)): cost, state, _, logits = session.run([m.cost, m.final_state, eval_op, m.logits], {m.input_data: x, m.targets: y, m.initial_state: state}) if (id_to_word): print("========================================") for i in xrange(m.num_steps): stp = i + 0 word = id_to_word[logits[stp,:].argmax()] print(word.replace("<eos>", "\n"), end=" ") print(" ") print(" ") costs += cost iters += m.num_steps mod = (epoch_size // 10) + 1 if verbose and step % mod == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def run_eval_traversing(model, session, data, batch_size=1, num_steps=120): """ Parameters ---------- model session data batch_size Returns ------- """ costs = 0.0 eval_ppx = 0.0 iters = 0 state = model.initial_state_train.eval() for step, (x, y) in enumerate(reader.ptb_iterator(data, batch_size, num_steps)): cost, ppx, state, _ = session.run([model.cost_valid, model.ppx_valid, model.final_state_valid, model.valid_op], {model.input_data_valid: x, model.targets_valid: y, model.initial_state_valid: state, model.dropout_feed: 0.0}) costs += cost iters += model.num_steps eval_ppx *= ppx eval_cost = costs / iters return eval_cost, eval_ppx
def run_epoch(session, m, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 print('m.initial_state:', m.initial_state) state = session.run(m.initial_state) #.eval() step = 0 for step, (x, y) in enumerate( reader.ptb_iterator(data, m.batch_size, m.num_steps)): cost, state, _ = session.run([m.cost, m.final_state, eval_op], { m.input_data: x, m.targets: y, m.initial_state: state }) costs += cost iters += m.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) print("Time for one epoch, %d iters: %.4f seconds" % (step + 1, time.time() - start_time)) average_batch_time = (time.time() - start_time) / (step + 1) print("Average time per minibatch in this epoch: %.4f seconds" % average_batch_time) return np.exp(costs / iters), average_batch_time
def run_epoch(session, model, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps start_time = time.time() costs = 0.0 accs = 0.0 iters = 0 # 每迭代一整遍数据集,执行op:zero_state一次 # tuple(num_layors*[batch_size,size]) lstm_state_value = session.run(model.initial_state) for step, (x, y) in enumerate( reader.ptb_iterator(data, model.batch_size, model.num_steps)): feed_dict = {} feed_dict[model.input_data] = x feed_dict[model.targets] = y # foreach num = num_layors for i, (c, h) in enumerate(model.initial_state): # feed shape([batch_zie=20,size=200]) feed_dict[c] = lstm_state_value[i].c feed_dict[h] = lstm_state_value[i].h # feed_dict{x,y,c1,h1,c2,h2} cost, acc, lstm_state_value, _ = session.run( [model.cost, model.accuracy, model.final_state, eval_op], feed_dict) accs += acc costs += cost # batch中平均每一个样本的cost iters += model.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * model.batch_size / (time.time() - start_time))) print("Accuracy:", accs / iters) return np.exp(costs / iters), accs / iters
def _run_epoch(self, session, data, eval_op, verbose=False): epoch_size = ((len(data) // self._batch_size) - 1) // self._num_steps start_time = time.time() costs = 0.0 iters = 0 for step, (x, y) in enumerate( reader.ptb_iterator(data, self._batch_size, self._num_steps)): fetches, feed_dict = self._one_loop_setup(eval_op) feed_dict[self._input_data] = x feed_dict[self._targets] = y res = session.run(fetches, feed_dict) self.train_writer.add_summary(res[2], step / 13) cost = res[0] costs += cost iters += self._num_steps if verbose and step % (epoch_size // 10) == 10: sys.stdout.write( "%.3f perplexity: %.3f speed: %.0f wps\n" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * self._batch_size * self._num_steps / (time.time() - start_time))) sys.stdout.flush() return np.exp(costs / iters)
def run_epoch(session, m, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 state = m.initial_state.eval() print(np.shape(data)) for step, (x, y) in enumerate(reader.ptb_iterator(data, m.batch_size, m.num_steps)): print(x) print(y) raise RuntimeError cost, state, _ = session.run([m.cost, m.final_state, eval_op], {m.input_data: x, m.targets: y, m.initial_state: state}) costs += cost iters += m.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) print(x) return np.exp(costs / iters)
def __run_epoch(self, session, model, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps print(epoch_size) start_time = time.time() costs = 0.0 iters = 0 state = session.run(model.initial_state) for step, (x, y) in enumerate( reader.ptb_iterator(data, model.batch_size, model.num_steps)): fetches = [model.cost, model.final_state, eval_op] feed_dict = {} feed_dict[model.input_data] = x feed_dict[model.targets] = y #c是上一个神经元的memory cell,h是history status for i, (c, h) in enumerate(model.initial_state): feed_dict[c] = state[i].c feed_dict[h] = state[i].h cost, state, _ = session.run(fetches, feed_dict) costs += cost iters += model.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * model.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def run_epoch(session, m, data, eval_op, ITERS, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 #state = m.initial_state.eval() state = session.run(m.initial_state) #.eval() step = 0 for step, (x, y) in enumerate( reader.ptb_iterator(data, m.batch_size, m.num_steps)): cost, state, _ = session.run([m.cost, m.final_state, eval_op], { m.input_data: x, m.targets: y, m.initial_state: state }) costs += cost iters += m.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) # few iters for profiling, remove if complete training is needed if step > ITERS - 1: break print("Time for %d iterations %.4f seconds" % (ITERS, time.time() - start_time)) return np.exp(costs / iters)
def run_epoch(model, data, is_train=False, lr=1.0, device=torch.device('cpu')): """Runs the model on the given data.""" if is_train: model.train() else: model.eval() epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps start_time = time.time() hidden = model.init_hidden() costs = 0.0 iters = 0 for step, (x, y) in enumerate(reader.ptb_iterator(data, model.batch_size, model.num_steps, model.direction)): # I think x is the input to the LSTM and y is the expected output inputs = Variable(torch.from_numpy(x.astype(np.int64)).transpose(0, 1).contiguous()).to(device) model.zero_grad() hidden = repackage_hidden(hidden) outputs, hidden = model.forward(inputs=inputs, hidden=hidden) targets = Variable(torch.from_numpy(y.astype(np.int64)).transpose(0, 1).contiguous()).to(device) # Tranposes and puts target words in tensor tt = torch.squeeze(targets.view(-1, model.batch_size * model.num_steps)) loss = criterion(outputs.view(-1, model.vocab_size), tt) # Computes the cross entropy loss costs += loss.item() * model.num_steps # was loss.data[0] saves loss across iterations? iters += model.num_steps if is_train: loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), 0.25) for p in model.parameters(): p.data.add_(-lr, p.grad.data) if step % (epoch_size // 10) == 10: print("{} perplexity: {:8.2f} speed: {} wps".format(step * 1.0 / epoch_size, np.exp(costs / iters), iters * model.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def run_epoch(session, m, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 num_batch_steps_completed = 0 for step, (x, y) in enumerate(reader.ptb_iterator(data, m.batch_size, m.num_steps)): cost, state, _ = session.run([m.cost, m.final_state, eval_op], {m.input_data: x, m.targets: y}) if verbose and step % 100 == 0: print('you successfully completed one entire batch -- cost', cost, 'time is', time.ctime(), 'num_batch_steps_completed:', num_batch_steps_completed) costs += cost iters += m.num_steps num_batch_steps_completed += 1 if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) return (costs / iters)
def run_epoch(session, model, data, is_train=False, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps start_time = time.time() costs = 0.0 iters = 0 state = session.run(model.initial_state) for step, (x, y) in enumerate( reader.ptb_iterator(data, model.batch_size, model.num_steps)): if is_train: fetches = [model.cost, model.final_state, model.train_op] else: fetches = [model.cost, model.final_state] feed_dict = {} feed_dict[model.input_data] = x feed_dict[model.targets] = y for layer_num, (c, h) in enumerate(model.initial_state): feed_dict[c] = state[layer_num].c feed_dict[h] = state[layer_num].h if is_train: cost, state, _ = session.run(fetches, feed_dict) else: cost, state = session.run(fetches, feed_dict) costs += cost iters += model.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * model.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def add_phrase(self, phrase, confidence): # Parse phrase input_ids = [] penalty = 1.0 for word in phrase.lower().split(): if word in self.word_to_id: input_ids.append(self.word_to_id[word]) else: input_ids.append(self.word_to_id["<unk>"]) penalty *= 1.5 input_ids.append(self.word_to_id["<eos>"]) # print input_ids # Calculate perplexity costs = 0.0 iters = 0 state = self.pmm_model.initial_state.eval(session=self.pmm_session) for step, (x, y) in enumerate( reader.ptb_iterator(input_ids, self.pmm_model.batch_size, self.pmm_model.num_steps)): cost, state, = self.pmm_session.run( [self.pmm_model.cost, self.pmm_model.final_state], { self.pmm_model.input_data: x, self.pmm_model.targets: y, self.pmm_model.initial_state: state }) costs += cost iters += self.pmm_model.num_steps perplexity = np.exp(costs / iters)**penalty self.phrases.append((phrase, confidence, perplexity)) self.best_phrase = None return perplexity
def run_epoch(session, m, data, eval_op, epoch=None, writer=None, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps logging.info('epoch size: {}'.format(epoch_size)) start_time = time.time() neg_ELBOs = 0.0 KLs = 0.0 NLLs = 0.0 iters = 0 state = m.initial_state.eval() for step, (x, y) in enumerate(reader.ptb_iterator(data, m.batch_size, m.num_steps)): # write summaries, but only when we're training! if m.is_training: global_step = step + epoch_size * (epoch - 1) if m.is_training and FLAGS.debug and verbose and step % 10 == 0: merged, neg_ELBO, KL_scalar, NLL_scalar, state, _ = session.run( [m.merged, m.neg_ELBO, m.KL_scalar, m.NLL_scalar, m.final_state, eval_op], {m.input_data: x, m.targets: y, m.initial_state: state}) logging.info('adding summary, global step {}'.format(global_step)) writer.add_summary(merged, global_step=global_step) elif m.is_training and not FLAGS.debug and verbose and step % (epoch_size // 10) == 10: merged, neg_ELBO, KL_scalar, NLL_scalar, state, _ = session.run( [m.merged, m.neg_ELBO, m.KL_scalar, m.NLL_scalar, m.final_state, eval_op], {m.input_data: x, m.targets: y, m.initial_state: state}) logging.info('adding summary (non-debug), global step {}'.format(global_step)) writer.add_summary(merged, global_step=global_step) else: neg_ELBO, KL_scalar, NLL_scalar, state, _ = session.run( [m.neg_ELBO, m.KL_scalar, m.NLL_scalar, m.final_state, eval_op], {m.input_data: x, m.targets: y, m.initial_state: state}) # logging.info('NOT adding summary, step {}'.format(step)) neg_ELBOs += neg_ELBO KLs += KL_scalar NLLs += NLL_scalar iters += m.num_steps normalization = iters * m.batch_size info = ("%.3f ELBO: %.3f KL: %.3f NLL: %.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, neg_ELBOs / normalization, KLs / normalization, NLLs / normalization, np.exp(NLLs / normalization), iters * m.batch_size / (time.time() - start_time))) if FLAGS.debug and verbose and step % 10 == 0: logging.info(info) elif not FLAGS.debug and verbose and step % (epoch_size // 10) == 10: logging.info(info) return (neg_ELBOs / normalization, KLs / normalization, NLLs / normalization, np.exp(NLLs / normalization))
def testPtbIterator(self): raw_data = [4, 3, 2, 1, 0, 5, 6, 1, 1, 1, 1, 0, 3, 4, 1] batch_size = 3 num_steps = 2 output = list(reader.ptb_iterator(raw_data, batch_size, num_steps)) self.assertEqual(len(output), 2) o1, o2 = (output[0], output[1]) self.assertEqual(o1[0].shape, (batch_size, num_steps)) self.assertEqual(o1[1].shape, (batch_size, num_steps)) self.assertEqual(o2[0].shape, (batch_size, num_steps)) self.assertEqual(o2[1].shape, (batch_size, num_steps))
def on_batch_end(self, batch, logs={}): if self.batch % self.number_of_batches_between_eval == 0: result = self.model.evaluate_generator( reader.ptb_iterator(valid_data, args.batch_size, args.num_steps, vocab_size, with_jumps=True), steps=len(valid_data) // args.batch_size // 3, max_queue_size=10, workers=1) num_chars_to_generate = 300 # Generate sentences every 10 epochs # b = random.randint(0, n - 1) seed = "וּמֵת אַחַד מֵהֶם וּבֵן אֵין לוֹ לֹא תִהְיֶה אֵשֶׁת הַמֵּת הַחוּצָה לְאִישׁ זָר יְבָמָהּ יָבֹא עָלֶיהָ וּלְקָחָהּ" assert len(seed) < num_chars_to_generate seed = list(seed) seed = [word_to_id[char] for char in seed] seed = np.asarray(seed, dtype=np.float32) gen = generate_seq2(self.model, seed, vocab_size, num_chars_to_generate) formatted_generated_text = '*** [', decode( seed, id_to_word), '] ', decode(gen[len(seed):], id_to_word) print(formatted_generated_text) print() with open(self.logging_file, mode='a') as train_log_file: fieldnames = [ 'batch', 'val_BPC', 'val_loss', 'train_BPC', 'train_loss', 'training_time (seconds)', 'generated_text' ] writer = csv.DictWriter(train_log_file, fieldnames=fieldnames) if batch == 0: writer.writeheader() writer.writerow({ 'batch': self.batch, 'val_BPC': result[1], 'val_loss': result[0], 'train_BPC': logs['BPC'], 'train_loss': logs['loss'], 'training_time (seconds)': time.time() - starting_time, 'generated_text': formatted_generated_text }) print(f'Batch #{self.batch}') print( f'loss: {logs["loss"]} ----- BPC: {logs["BPC"]} ----- val_loss: {result[0]} ----- val_BPC {result[1]}' ) self.batch += 1
def test(): #train, test, vali, vocab = reader.ptb_raw_data(data_path) #print len(train), train[ : 100], vocab output = list(reader.ptb_iterator(data, 3, 2)) #print len(x), x[0], len(y), y[0] print len(output) print len(output[0]), len(output[0][0]), len(output[0][1]) print output[0][0], "\n" print output[0][1], "\n" print type(output) print output
def do_eval(session, m, data): costs = 0.0 iters = 0 state = session.run(m.initial_state) for step, (x, y) in enumerate( reader.ptb_iterator(data, m.batch_size, m.num_steps)): feed = {m.input_data: x, m.targets: y, m.initial_state: state} cost, state = session.run([m.cost, m.final_state], feed_dict=feed) costs += cost iters += m.num_steps return np.exp(costs / iters)
def run_epoch(session, m, data, eval_op, verbose=False): """Runs the model on the given data.""" costs = 0.0 iters = 0 state = m.initial_state.eval() for step, (x, y) in enumerate(reader.ptb_iterator(data, m.batch_size, m.num_steps)): cost, state, _ = session.run([m.cost, m.final_state, eval_op], {m.input_data: x, m.targets: y, m.initial_state: state}) costs += cost iters += m.num_steps return np.exp(costs / iters)
def run_epoch(session, m, data, eval_op): """Runs the model on the given data.""" epoch_size = ((len(data) // m._batch_size) - 1) // m._num_steps costs = 0.0 iters = 0 state = m._init_state.eval() for step, (x, y) in enumerate(reader.ptb_iterator(data, m._batch_size, m._num_steps)): # enumerate every step, training inputs x and targets y. cost, state, _ = session.run([m._cost, m._final_state, eval_op], {m._input_data: x, m._targets: y, m._init_state: state}) costs += cost iters += m._num_steps if step % 10 == 0: print "%.3f perplexity: %.3f " % (step * 1.0 / epoch_size, np.exp(costs / iters)) return np.exp(costs / iters)
def train(self): raw_data = reader.ptb_raw_data("/home/kevin/Documents/Datasets/simple-examples/data") train_data, valid_data, test_data, vocabsize = raw_data print vocabsize saver = tf.train.Saver(max_to_keep=2) for epoch in xrange(10000): total_genloss = 0 total_latentloss = 0 steps = 0 for step, x in enumerate(reader.ptb_iterator(test_data, self.batchsize, self.sentence_length)): x2 = np.copy(x) c = np.zeros((self.batchsize,1), dtype=np.int32) c.fill(10001) x = np.hstack((x[:,1:],c)) # x: input # x2: desired output gen_loss, _ = self.sess.run([self.generation_loss, self.update], feed_dict={self.sentences_in: x, self.sentences_in_decoded: x2}) gl = np.mean(gen_loss) / self.sentence_length total_genloss += gl steps = steps + 1 print "epoch %d genloss %f perplexity %f" % (epoch, total_genloss / steps, np.exp(total_genloss/steps)) total_validloss = 0 validsteps = 0 for step, x in enumerate(reader.ptb_iterator(valid_data, self.batchsize, self.sentence_length)): x2 = np.copy(x) c = np.zeros((self.batchsize,1), dtype=np.int32) c.fill(10001) x = np.hstack((x[:,1:],c)) # x: input # x2: desired output gen_loss, _ = self.sess.run([self.generation_loss, self.update], feed_dict={self.sentences_in: x, self.sentences_in_decoded: x2}) gl = np.mean(gen_loss) / self.sentence_length total_validloss += gl validsteps = validsteps + 1 print "valid %d genloss %f perplexity %f" % (epoch, total_validloss / validsteps, np.exp(total_validloss/validsteps))
def run_epoch(session, m, data, eval_op, verbose=False): """Runs the model on the given data.""" costs = 0.0 iters = 0 state = m.initial_state.eval() for step, (x, y) in enumerate( reader.ptb_iterator(data, m.batch_size, m.num_steps)): cost, state, _ = session.run([m.cost, m.final_state, eval_op], { m.input_data: x, m.targets: y, m.initial_state: state }) costs += cost iters += m.num_steps return np.exp(costs / iters)
def model_run_epoch(sess, model, data, eval_op, verbose=True): """Runs the model for one epoch on the given data""" epoch_size = ((len(data)// model.batch_size) - 1) // model.num_steps start_time = time.time() costs = 0.0 iters = 0 state = sess.run(model.initial_state) for step, (x, y) in enumerate(ptb_iterator(data, model.batch_size, model.num_steps)): feed_dict = {model.input: x, model.target: y, model.initial_state: state} cost, state, _ = sess.run([model.cost, model.final_state, eval_op], feed_dict=feed_dict) costs += cost iters += model.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) return np.exp(costs/iters)
def run_epoch(session, model, data, eval_op): header_pre = [] pre = [] for step, (x, y) in enumerate( reader.ptb_iterator(data, model.batch_size, model.num_steps, FLAGS.embedding_size)): _, prediction = session.run([eval_op, model.prediction], { model.input_data: x, model.targets: y }) if step == 0: header_pre.append(prediction[:model.num_steps - 1, :]) for i in range(model.batch_size): pre.append(prediction[(i + 1) * model.num_steps - 1, :]) return np.concatenate((np.array(header_pre).reshape( -1, FLAGS.embedding_size), np.array(pre)), axis=0)
def predict(self, session, data, word_to_id): def _get_word_fromid(word_to_id, search_id): for word, wid in word_to_id.items(): if wid == search_id: return word for step, (x, y) in enumerate( reader.ptb_iterator(data, self._batch_size, self._num_steps)): fetches, feed_dict = self._one_loop_setup(self._logits) feed_dict[self._input_data] = x feed_dict[self._targets] = y res = session.run(fetches, feed_dict) label = res[1] label = np.argmax(label, 1) y = np.reshape(y, (self._batch_size * self._num_steps)) for pre, real in zip(label, y): sys.stdout.write("Predict %s : Real %s\n" % (_get_word_fromid( word_to_id, pre), _get_word_fromid(word_to_id, real)))
def run_epoch(session, m, data, eval_op, verbose=False, vocabulary=None): """ :param session for computation :param m model object :param data input data :param eval_op :param verbose :param vocabulary Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 state = m.initial_state.eval() for step, (x, y) in enumerate(reader.ptb_iterator(data, m.batch_size, m.num_steps)): cost, state, probs, logits, _ = session.run([m.cost, m.final_state, m.probabilities, m.logits, eval_op], {m.input_data: x, m.targets: y, m.initial_state: state}) costs += cost iters += m.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) chosen_word = np.argmax(probs, 1) print("Probabilities shape: %s, Logits shape: %s" % (probs.shape, logits.shape) ) print(chosen_word) if vocabulary is not None: next_word_id = chosen_word[-1] for word_, word_id_ in vocabulary.iteritems(): if word_id_ == next_word_id: print(word_) print("Batch size: %s, Num steps: %s" % (m.batch_size, m.num_steps)) return np.exp(costs / iters)
def run_epoch(session, model, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = ((sum([len(i) for i in data]) // model.batch_size) - 1) // model.num_steps start_time = time.time() costs = 0.0 iters = 0 state = model.initial_state.eval() for step, (x, y) in enumerate(reader.ptb_iterator(data, model.batch_size, model.num_steps)): cost, state, _ = session.run([model.cost, model.final_state, eval_op], {model.input_data: x, model.targets: y, model.initial_state: state}) costs += cost iters += model.num_steps if verbose and step % (epoch_size // 100) == 100: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * model.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def run_epoch_eval(session, m, data, words, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 state = m.initial_state.eval() for step, (x, y) in enumerate(reader.ptb_iterator(data, m.batch_size, m.num_steps)): cost, state, logits, _ = session.run([m.cost, m.final_state, m.logits, eval_op], {m.input_data: x, m.targets: y, m.initial_state: state}) costs += cost iters += m.num_steps print(words[x],words[y],words[np.argmax(logits)]); if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def train(self): fakedata = np.zeros((2,4)) fakedata[0,:] = [1,1,0,0] fakedata[1,:] = [2,2,0,0] # for i in xrange(1000): # guess, z, z_mean, z_stddev, gen_loss, latent_loss, _ = self.sess.run([self.d, self.z, self.z_mean, self.z_stddev, self.generation_loss, self.latent_loss, self.optimizer], feed_dict={self.sentences_in: fakedata}) # print "%f %f" % (np.mean(gen_loss), np.mean(latent_loss)) # print np.argmax(guess,axis=2) # # print z_mean # # print z_stddev # print z # # print partway.shape # np.set_printoptions(threshold=np.inf) raw_data = reader.ptb_raw_data("/home/kevin/Documents/Datasets/simple-examples/data") train_data, valid_data, test_data, vocabsize = raw_data print vocabsize # print train_data list(reader.ptb_iterator(valid_data, self.batchsize, self.sentence_length)) saver = tf.train.Saver(max_to_keep=2) # saver.restore(self.sess, tf.train.latest_checkpoint(os.getcwd()+"/training/")) ls = 0.1 for epoch in xrange(10000): if epoch > 20: ls = min(1, epoch / 50.0) total_genloss = 0 total_latentloss = 0 steps = 0 for step, x in enumerate(reader.ptb_iterator(test_data, self.batchsize, self.sentence_length)): x2 = np.copy(x) c = np.zeros((self.batchsize,1), dtype=np.int32) c.fill(10001) x = np.hstack((x[:,1:],c)) # x: input # x2: desired output gen_loss, latent_loss, _ = self.sess.run([self.generation_loss, self.latent_loss, self.update], feed_dict={self.sentences_in: x, self.sentences_in_decoded: x2, self.latentscale: ls}) gl = np.mean(gen_loss) / self.sentence_length # print "gen loss: %f latent loss: %f perplexity: %f" % (gl, np.mean(latent_loss), np.exp(gl)) total_genloss += gl total_latentloss += np.mean(latent_loss) steps = steps + 1 print "epoch %d genloss %f perplexity %f latentloss %f" % (epoch, total_genloss / steps, np.exp(total_genloss/steps), total_latentloss) total_validloss = 0 validsteps = 0 for step, x in enumerate(reader.ptb_iterator(valid_data, self.batchsize, self.sentence_length)): x2 = np.copy(x) c = np.zeros((self.batchsize,1), dtype=np.int32) c.fill(10001) x = np.hstack((x[:,1:],c)) # x: input # x2: desired output gen_loss, latent_loss = self.sess.run([self.generation_loss, self.latent_loss], feed_dict={self.sentences_in: x, self.sentences_in_decoded: x2, self.latentscale: ls}) gl = np.mean(gen_loss) / self.sentence_length # print "gen loss: %f latent loss: %f perplexity: %f" % (gl, np.mean(latent_loss), np.exp(gl)) total_validloss += gl validsteps = validsteps + 1 print "valid %d genloss %f perplexity %f" % (epoch, total_validloss / validsteps, np.exp(total_validloss/validsteps)) if epoch % 10 == 0: saver.save(self.sess, os.getcwd()+"/training-reg/train",global_step=epoch)
def train(dim_word=100, # word vector dimensionality dim=1000, # the number of GRU units encoder='gru', max_epochs=5000, finish_after=10000000, # finish after this many updates dispFreq=100, decay_c=0., # L2 weight decay penalty lrate=0.01, n_words=100000, # vocabulary size maxlen=100, # maximum length of the description batch_size=16, valid_batch_size=16, max_grad_norm=5, nlayers=1, data_path=None, use_dropout=False, platoon=False, name=""): # Model options model_options = locals().copy() print 'Loading data' raw_data = reader.ptb_raw_data(data_path) train_data, valid_data, test_data, _ = raw_data pprint.pprint(model_options) print 'Building model' params = init_params(model_options) # create shared variables for parameters tparams = init_tparams(params) if platoon: print "PLATOON: Init ...", from platoon.channel import Worker from platoon.param_sync import ASGD worker = Worker(control_port=5567) print "DONE" print "PLATOON: Initializing shared params ...", worker.init_shared_params(tparams.values(), param_sync_rule=ASGD()) print "DONE" worker.send_req({"type": name}) # build the symbolic computational graph trng, use_noise, \ x, \ opt_ret, \ cost, ups = \ build_model(tparams, model_options) inps = [x] # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, updates=ups) print 'Done' # before any regularizer - will be used to compute ppl print 'Building f_cost...', cost_sum = cost.sum() f_cost = theano.function(inps, cost_sum, updates=ups) print 'Done' cost = cost.mean() # apply L2 regularization on weights if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv ** 2).sum() weight_decay *= decay_c cost += weight_decay print 'Computing gradient...', grads = tensor.grad(cost, wrt=itemlist(tparams)) print 'Done' # compile the optimizer, the actual computational graph is compiled here lr = tensor.scalar(name='lr') print 'Building optimizers...', f_grad_shared, f_update = sgd(lr, tparams, grads, inps, cost, max_grad_norm) print 'Done' print 'Optimization' history_errs = [] history_ppls = [] wpss = [] best_p = None # Training loop uidx = 0 estop = False bad_counter = 0 try: for eidx in xrange(max_epochs): n_samples = 0 tlen = 0 start_time = time.time() for x, y in reader.ptb_iterator(train_data, batch_size, maxlen): if platoon: #print "PLATOON: Copying data from master ...", worker.copy_to_local() #print "DONE" n_samples += len(x) uidx += 1 use_noise.set_value(1.) tlen += (x.shape[0] * x.shape[1]) # pad batch and create mask if x is None: print 'Minibatch with zero sample under length ', maxlen uidx -= 1 continue ud_start = time.time() # compute cost, grads and copy grads to shared variables cost = f_grad_shared(x) # do the update on parameters f_update(lrate) ud = time.time() - ud_start if platoon: #print "PLATOON: Syncing with master ...", worker.sync_params(synchronous=True) #print "DONE" # check for bad numbers if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1. # verbose if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud # finish after this many updates if uidx >= finish_after: print 'Finishing after %d iterations!' % uidx estop = True break current_time = time.time() wps = int(tlen // (current_time - start_time)) print "Current wps", wps wpss.append(wps) print 'Seen %d samples' % n_samples if platoon: print "PLATOON: Sending wps to controller ...", worker.send_req({'wps': wps, 'epoch': eidx}) print "DONE" print "Avg wps, ", numpy.mean(wpss) print "Std avgs,", numpy.std(wpss) use_noise.set_value(0.) finally: if platoon: print "PLATOON: Closing worker ...", worker.send_req('done') worker.close() print "DONE" return 0