def pretrain(self, gen_seq_len=None, save_weights=True): if gen_seq_len is None: gen_seq_len = self.seq_len for epoch in range(self.pretrain_epochs): # x, y = self.get_pretain_batch() x, _ = get_batch(self.seq_len, self.batch_size, start_with_song=False, training=True) loss = self.train_step(tf.constant(x), tf.constant(x)) if self.tb_writer is not None: with self.tb_writer.as_default(): tf.summary.scalar('gen_pre_train_loss', loss, step=epoch) else: print(loss) if epoch % 17 == 0 or epoch == 0: samples = self.gen.generate(gen_seq_len) genned_songs = extract_songs(samples) bleu_score = get_bleu_score(genned_songs) print(self.idx2char[samples[0]]) if save_weights: self.gen.model.save_weights(self.gen.checkpoint_prefix) gen_loss = self.gen.test_step() if self.tb_writer is not None: with self.tb_writer.as_default(): tf.summary.scalar('gen_pre_test_loss', tf.reduce_mean(gen_loss), step=epoch) tf.summary.scalar('bleu_score', tf.reduce_mean(bleu_score), step=epoch)
def test_step(self): x, y = get_batch(self.sequence_length, self.batch_size, start_with_song=False, training=False) y_hat = self.gen_predictions(tf.constant(x)) gen_loss = self.get_pretrain_loss(labels=x, samples=y_hat) return gen_loss
def train(model): model.train() total_loss = 0. start_time = time.time() src_mask = model.generate_square_subsequent_mask(dh.bptt).to(device) for batch, i in enumerate(range(0, train_data.size(0) - 1, dh.bptt)): data, targets = dh.get_batch(train_data, i) optimizer.zero_grad() if data.size(0) != dh.bptt: src_mask = model.generate_square_subsequent_mask( data.size(0)).to(device) output = model(data, src_mask) loss = criterion(output.view(-1, n_tokens), targets) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) optimizer.step() total_loss += loss log_interval = 200 if batch % log_interval == 0 and batch > 0: cur_loss = total_loss / log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | ' 'lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len( train_data) // dh.bptt, scheduler.get_last_lr()[0], elapsed * 1000 / log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() scheduler.step()
def chat(): """ in test mode, we don't to create the backward path """ _, enc_vocab = data_handler.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.enc')) inv_dec_vocab, _ = data_handler.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.dec')) model = ChatBotModel(True, batch_size=1) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) output_file = open( os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+') # Decode from standard input. max_length = config.BUCKETS[-1][0] print( 'Welcome to TensorBro. Say something. Enter to exit. Max length is', max_length) while True: line = _get_user_input() if len(line) > 0 and line[-1] == '\n': line = line[:-1] if line == '': break output_file.write('HUMAN ++++ ' + line + '\n') # Get token-ids for the input sentence. token_ids = data_handler.sentence2id(enc_vocab, str(line)) if (len(token_ids) > max_length): print('Max length I can handle is:', max_length) line = _get_user_input() continue # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data_handler.get_batch( [(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = _construct_response(output_logits, inv_dec_vocab) print(response) output_file.write('BOT ++++ ' + response + '\n') output_file.write('=============================================\n') output_file.close()
def _eval_test_set(sess, model, test_buckets): """ Evaluate on the test set. """ for bucket_id in range(len(config.BUCKETS)): if len(test_buckets[bucket_id]) == 0: print(" Test: empty bucket %d" % (bucket_id)) continue start = time.time() encoder_inputs, decoder_inputs, decoder_masks = data_handler.get_batch( test_buckets[bucket_id], bucket_id, batch_size=config.BATCH_SIZE) _, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) print('Test bucket {}: loss {}, time {}'.format( bucket_id, step_loss, time.time() - start))
def pretrain(self,save_weights=True): for epoch in range(self.pretrain_epochs): fake_samples = self.gen.generate() real_samples = get_batch(self.seq_len, self.batch_size) disc_loss = self.disc.train_step(fake_samples, real_samples) if self.tb_writer is not None: with self.tb_writer.as_default(): tf.summary.scalar('disc_pre_train_loss', tf.reduce_mean(disc_loss), step=epoch) else: print(disc_loss) if epoch % 17 == 0 or epoch == 0: if save_weights: self.disc.model.save_weights(self.disc.checkpoint_prefix) fake_samples = self.gen.generate() real_samples = get_batch(self.seq_len, self.batch_size, training=False) disc_loss = self.disc.test_step(real_samples=real_samples, fake_samples=fake_samples) if self.tb_writer is not None: with self.tb_writer.as_default(): tf.summary.scalar('disc_pre_test_loss', tf.reduce_mean(disc_loss), step=epoch)
def evaluate(eval_model, data_source): #m = np.inner(bench,bench) eval_model.eval() # Turn on the evaluation mode total_loss = 0. src_mask = model.generate_square_subsequent_mask(dh.bptt).to(device) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, dh.bptt): data, targets = dh.get_batch(data_source, i) if data.size(0) != dh.bptt: src_mask = model.generate_square_subsequent_mask( data.size(0)).to(device) output = eval_model(data, src_mask) print() output_flat = output.view(-1, n_tokens) total_loss += len(data) * criterion(output_flat, targets).item() return total_loss / (len(data_source) - 1)
def train(): """ Train the bot """ test_buckets, data_buckets, train_buckets_scale = _get_buckets() # in train mode, we need to create the backward path, so forwrad_only is False model = ChatBotModel(False, config.BATCH_SIZE) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: print('Running session') sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) iteration = model.global_step.eval() print(iteration) total_loss = 0 while True: skip_step = _get_skip_step(iteration) bucket_id = _get_random_bucket(train_buckets_scale) encoder_inputs, decoder_inputs, decoder_masks = data_handler.get_batch( data_buckets[bucket_id], bucket_id, batch_size=config.BATCH_SIZE) start = time.time() _, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, False) total_loss += step_loss iteration += 1 if iteration % skip_step == 0: print('Iter {}: loss {}, time {}'.format( iteration, total_loss / skip_step, time.time() - start)) start = time.time() total_loss = 0 saver.save(sess, os.path.join(config.CPT_PATH, 'chatbot'), global_step=model.global_step) if iteration % (10 * skip_step) == 0: # Run evals on development set and print their loss _eval_test_set(sess, model, test_buckets) start = time.time() sys.stdout.flush()
def run(epoch=0): loss = 0 acc = 0 test_loss = 0 test_acc = 0 def init_state_lstm(batch_size): init_state_zeros = g.get_tensor_by_name( "LSTM_MODEL/init_state_zeros:0") return np.repeat(init_state_zeros.eval(), batch_size, axis=2) def reshape_data(frames, labels): # Reshape input and output _cnn_inputs = np.reshape( frames, (-1, params['INPUT_WIDTH'], params['INPUT_HEIGHT'], params['INPUT_CHANNEL'])) _cnn_labels = np.repeat(labels, params['N_FRAMES'], axis=0) return _cnn_inputs, _cnn_labels def extract_feature(frames, labels, cur_state, fetches): _cnn_inputs, _cnn_labels = reshape_data(frames, labels) # Retrieve/Predict the features from the CNN net _lstm_input = np.empty( [1, params['N_FRAMES'], params['N_FEATURES']], dtype=np.float32) _cnn_feed_dict = { cnn_X: _cnn_inputs, cnn_Y: _cnn_labels, X: _lstm_input, Y: labels, init_state: cur_state } return sess.run(fetches, feed_dict=_cnn_feed_dict) def run_session(frames, labels, features, cur_state, fetches): _cnn_inputs, _cnn_labels = reshape_data(frames, labels) # feed placeholder and train LSTM model _lstm_input = np.reshape( features, (-1, params['N_FRAMES'], params['N_FEATURES'])) _lstm_feed_dict = { cnn_X: _cnn_inputs, cnn_Y: _cnn_labels, X: _lstm_input, Y: labels, init_state: cur_state } return sess.run(fetches, feed_dict=_lstm_feed_dict) if args.mode[0] == 0: # train mode total_batch = (train_len // batch_size) + 1 tmp_loss = 0 tmp_acc = 0 cnn_tmp_loss = 0 cnn_tmp_acc = 0 for batch, clips in get_batch(train_set, batch_size): if batch == 0: continue if batch > total_batch or len(clips) == 0: break frames, labels = get_clip(clips) _current_state = init_state_lstm(len(clips)) _cnn_loss, _cnn_acc, _cnn_features = extract_feature( frames, labels, _current_state, [cnn_loss_op, cnn_acc, cnn_features]) _loss, _acc, _current_state, _summ, _ = run_session( frames, labels, _cnn_features, _current_state, [ loss_op, accuracy, current_state, summary_train, train_op ]) # Sum loss and acc tmp_loss += _loss tmp_acc += _acc cnn_tmp_loss += _cnn_loss cnn_tmp_acc += _cnn_acc loss += _loss acc += _acc # summary data to tensor board tf_writer.add_summary(_summ, epoch * total_batch + batch) # Display if neccessary if batch % display_step == 0: print( "Batch {}/{}: Loss {} - Acc {} - CNN_loss {} - CNN_acc {}" .format(batch, total_batch, tmp_loss / display_step, tmp_acc / display_step, cnn_tmp_loss / display_step, cnn_tmp_acc / display_step)) tmp_loss = 0 tmp_acc = 0 cnn_tmp_loss = 0 cnn_tmp_acc = 0 # compute the average of loss and acc loss /= batch acc /= batch # Valid model if params['USE_VALIDATION_DATASET']: total_batch = (valid_len // batch_size) + 1 for batch, clips in get_batch(valid_set, batch_size): if batch == 0: continue if batch > total_batch or len(clips) == 0: break frames, labels = get_clip(clips) _current_state = init_state_lstm(len(clips)) _cnn_features = extract_feature(frames, labels, _current_state, cnn_features) _loss, _acc, _summ = run_session( frames, labels, _cnn_features, _current_state, [loss_op, accuracy, summary_val]) # Sum loss and acc test_loss += _loss test_acc += _acc tf_writer.add_summary(_summ, epoch * total_batch + batch) # compute the average of loss and acc test_loss /= batch test_acc /= batch return loss, acc, test_loss, test_acc else: # Test mode lbs = [] preds = [] video_records = get_video_test() y_true = [] y_score = [] for path, label in video_records: print("\nvideo: {}".format(path.strip())) _vote_label = [0] * params['N_CLASSES'] _t = time.time() for i, frames in enumerate(get_frames_from_video(path)): if frames == None: continue if len(frames) == 0: break _current_state = init_state_lstm(1) _cnn_features = extract_feature([frames], [label], _current_state, cnn_features) _pred_label = run_session([frames], [label], [_cnn_features], _current_state, prediction) _idx_label = np.argmax(_pred_label[0]) # vote with own weight _vote_label[_idx_label] += _pred_label[0][_idx_label] print("clip {}: {}".format(i, _pred_label)) if frames == None or len(frames) == 0: continue elapsed_time = time.time() - _t # sess.run([acc_update_op, prec_update_op, recall_update_op], feed_dict={metric_label: label, metric_pred: _vote_label}) print( "Result: \n--- score: {} \n--- y_true: {} \n--- y_pred: {} \n--- time: {}(s)" .format(_vote_label, params['CLASSES'][np.argmax(label)], params['CLASSES'][np.argmax(_vote_label)], elapsed_time)) lbs.append(np.argmax(label)) preds.append(np.argmax(_vote_label)) y_true.append(label) y_score.append(_vote_label) # compute confuse matrix _confuse_matrix = tf.confusion_matrix(lbs, preds).eval() # save all y_score y_dict = {'y_true': y_true, 'y_score': y_score} np.save('y_pred.npy', y_dict) # compute accuracy, precision, recall with tf.Session() as metric_sess: metric_label = tf.placeholder(tf.float32, [None]) metric_pred = tf.placeholder(tf.float32, [None]) metric_accuracy, acc_update_op = tf.metrics.accuracy( metric_label, metric_pred) metric_precision, prec_update_op = tf.metrics.precision( metric_label, metric_pred) metric_recall, recall_update_op = tf.metrics.recall( metric_label, metric_pred) metric_sess.run(tf.local_variables_initializer()) metric_sess.run( [acc_update_op, prec_update_op, recall_update_op], feed_dict={ metric_label: lbs, metric_pred: preds }) _accuracy, _precision, _recall = metric_sess.run( [metric_accuracy, metric_precision, metric_recall]) return _confuse_matrix, _accuracy, _precision, _recall
rollout = Rollout( generator=gen, discriminator=disc, batch_size=batch_size, embedding_size=embedding_dim, sequence_length=seq_len, start_token=start_token, rollout_num=rollout_num) for epoch in range(EPOCHS): fake_samples = gen.generate() rewards = rollout.get_reward(samples=fake_samples) gen_loss = gen.train_step(fake_samples, rewards) real_samples, _ = get_batch(seq_len, batch_size) disc_loss = 0 for i in range(disc_steps): disc_loss += disc.train_step(fake_samples, real_samples)/disc_steps with train_summary_writer.as_default(): tf.summary.scalar('gen_train_loss', gen_loss, step=epoch) tf.summary.scalar('disc_train_loss', disc_loss, step=epoch) tf.summary.scalar('total_train_loss', disc_loss + gen_loss, step=epoch) if epoch % 7 == 0 or epoch == 0: disc.model.save_weights(disc.checkpoint_prefix) gen.model.save_weights(disc.checkpoint_prefix) samples = gen.generate(gen_seq_len) genned_songs = extract_songs(samples) bleu_score = get_bleu_score(genned_songs)
def run(epoch=0): loss = 0 acc = 0 test_loss = 0 test_acc = 0 eval_set = test_set def reshape_input(frames, labels): # reshape inputs and labels _input = np.reshape( frames, (-1, params['INPUT_WIDTH'], params['INPUT_HEIGHT'], params['INPUT_CHANNEL'])) _label = np.repeat(labels, params['N_FRAMES'] // clip_stride, axis=0) return _input, _label def run_session(frames, labels, fetches): _input, _label = reshape_input(frames, labels) # feed placeholder and train model _feed_dict = {X: _input, Y: _label} return sess.run(fetches, feed_dict=_feed_dict) if args.mode[0] == 0: # train mode eval_set = valid_set total_batch = (train_len // batch_size) + 1 tmp_loss = 0 tmp_acc = 0 for batch, clips in get_batch(train_set, batch_size): if batch == 0: continue if batch > total_batch or len(clips) == 0: break # get frames from clips - the a small equence in a video frames, labels = get_clip(clips, clip_stride) _loss, _acc, _summ, _ = run_session( frames, labels, [loss_op, accuracy, summary_train, train_op]) # Sum loss and acc tmp_loss += _loss tmp_acc += _acc loss += _loss acc += _acc # summary data to tensor board tf_writer.add_summary(_summ, epoch * total_batch + batch) # Display if neccessary if batch % display_step == 0: print("Batch {}/{}: Loss {} - Acc {}".format( batch, total_batch, tmp_loss / display_step, tmp_acc / display_step)) tmp_loss = 0 tmp_acc = 0 # compute the average of loss and acc loss /= batch acc /= batch # Valid or test model if params['USE_VALIDATION_DATASET'] or args.mode[0] == 1: eval_len = len(eval_set) total_batch = (eval_len // batch_size) + 1 for batch, clips in get_batch(eval_set, batch_size): if batch == 0: continue if batch > total_batch or len(clips) == 0: break # get frames from clips - the a small equence in a video frames, labels = get_clip(clips, clip_stride) _loss, _acc, _summ = run_session( frames, labels, [loss_op, accuracy, summary_val]) # Sum loss and acc test_loss += _loss test_acc += _acc if args.mode[0] == 0: # train mode # summary data to tensor board tf_writer.add_summary(_summ, epoch * total_batch + batch) # compute the average of loss and acc test_loss /= batch test_acc /= batch # return if args.mode[0] == 0: # train mode return loss, acc, test_loss, test_acc else: return test_loss, test_acc