def get_processed_dataset(self, mode="train"): if not os.path.exists("temp"): os.mkdir("temp") dataset_pkl = "temp/" + self.conf.dataset + "_" + self.conf.split_data + ".pkl" if os.path.exists(dataset_pkl): return pickle.load(open(dataset_pkl, 'rb')) if mode == "train": filename = os.path.join( self.opt.dataset_dir, "gap-development.tsv" ) # waby : gap-development.tsv + gap-test.tsv dataset = data_reader.load_data(filename, mode="train") elif mode == "dev": filename = os.path.join(self.opt.dataset_dir, "gap-validation.tsv") dataset = data_reader.load_data(filename, mode="train") else: filename = os.path.join(self.opt.dataset_dir, "test_stage_2.tsv") dataset = data_reader.load_data(filename, mode="test") # validation data # waby : this may not be the right way to rerurn the processed train dataset test_gene_texts = dataset[:, 1] ids = dataset[:, 2] # sequentializing validation data word_index, test_docs = data_reader.tokenizer(test_gene_texts, 20000) pickle.dump([ids, test_docs], open('test2_id2doc.pkl', 'wb')) return [ids, test_docs]
def load_data(self): """Load train and validation data. """ self.train_images, self.train_templates = data_reader.load_data( self.data_dir, self.train_images, self.train_templates, self.num_train_images, self.image_dim, self.template_dim) self.valid_images, self.valid_templates = data_reader.load_data( self.data_dir, self.valid_images, self.valid_templates, self.num_valid_images, self.image_dim, self.template_dim)
def train_model(self, args): """ Train model. Args: dataset, batch_size, epochs. """ X_train, X_test, y_train, y_test = dr.load_data(args.dataset) self._model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy']) self._model.summary() learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=3, verbose=1, factor=0.5, min_lr=0.000001) res = self._model.fit(X_train, y_train, batch_size=args.batch_size, nb_epoch=args.epochs, verbose=1, validation_data=(X_test, y_test), callbacks=[learning_rate_reduction]) print(self._model.evaluate(X_test, y_test)) self._update_history(res) self.plot_history()
def main(_): ''' Loads trained model and evaluates it on test split ''' if FLAGS.load_model is None: print('Please specify checkpoint file to load model from') return -1 if not os.path.exists(FLAGS.load_model + ".index"): print('Checkpoint file not found', FLAGS.load_model) return -1 word_vocab, word_tensors, max_doc_length, label_tensors = \ load_data(FLAGS.data_dir, FLAGS.max_doc_length, FLAGS.max_sen_length) test_reader = DataReader(word_tensors['test'], label_tensors['test'], FLAGS.batch_size) print('initialized test dataset reader') with tf.Graph().as_default(), tf.Session() as session: # tensorflow seed must be inside graph tf.set_random_seed(FLAGS.seed) np.random.seed(seed=FLAGS.seed) ''' build inference graph ''' with tf.variable_scope("Model"): m = build_model(word_vocab) global_step = tf.Variable(0, dtype=tf.int32, name='global_step') saver = tf.train.Saver() saver.restore(session, FLAGS.load_model) print('Loaded model from', FLAGS.load_model, 'saved at global step', global_step.eval()) ''' training starts here ''' count = 0 start_time = time.time() result_scores = None for x, y in test_reader.iter(): count += 1 logits = session.run(m.logits, {m.input: x, m.targets: y}) total_scores = [] for tid, tlogits in enumerate(logits): scores = softmax(tlogits) weights = np.array([0, 1, 0.5]) scores = np.dot(scores, weights) total_scores.append(scores) total_scores = np.transpose(np.asarray(total_scores)) if result_scores is None: result_scores = total_scores else: result_scores = np.vstack((result_scores, total_scores)) save_as = '%s/scores' % (FLAGS.train_dir) np.savetxt(save_as, result_scores, delimiter=' ') time_elapsed = time.time() - start_time print("test samples:", count * FLAGS.batch_size, "time elapsed:", time_elapsed, "time per one batch:", time_elapsed / count)
def load_data(fn, batch_size, token_type, min_count=3, split_size=0.8, debug=False): data = data_reader.load_data(fn) if debug: data = data[:10] # Split data num_train = int(split_size * len(data)) train_data = data[:num_train] encode, decode, vocab_size = data_reader.make_encoder( train_data, token_type, min_count) encoded_data = data_reader.encode_data(data, encode) encoded_data = [(d, cs) for d, cs in encoded_data if len(d) <= 50] encoded_train = encoded_data[:num_train] encoded_valid = encoded_data[num_train:] # Padding width d_len = max([len(d) for d, _ in encoded_data]) c_len = max([max([len(c) for c in cs]) for _, cs in encoded_data]) + 1 print('Padding to {} and {}'.format(d_len, c_len)) train_producer, num_train = data_reader.get_producer( encoded_train, batch_size, d_len, c_len) valid_producer, num_valid = data_reader.get_producer( encoded_valid, batch_size, d_len, c_len) return (train_producer, valid_producer, num_train, num_valid, encode, decode, vocab_size, d_len, c_len)
def __init__(self): # Keep a reference to the "close" line in the data[0] dataseries self.dataclose = self.datas[0].close self.dataopen = self.datas[0].open self.config = parameters() self.data_path = self.config['data_path'] base_price_needed = False # Get all the data for currency conversion if the target pair does not contain your account currency. TODO Should change how this is done if self.config['trading_mode'] == 'forex': if self.config['account_base'] not in self.config['target_symbol']: self.config['base_target_symbol'] = self.config[ 'account_base'] + '_' + self.config['target_symbol'][4:] data_loader = load_data(self.config) self.all_symbols, self.all_data = data_loader.load_all_data() self.base_symbol_data = data_loader.load_symbol( self.all_data, self.all_symbols, self.config['base_target_symbol']) self.target_symbol_data = data_loader.load_symbol( self.all_data, self.all_symbols, self.config['target_symbol']) base_price_needed = True print('ping pong long') # To keep track of pending orders and buy price/commission self.order = None self.buyprice = None self.buycomm = None
def main(data, label_col, max_depth, n_trees, lr, mlflow_tracking_url, experiment_name, build_number): test_data, train_data = reader.load_data(data) pipeline_model = model.train_model(train_data, label_col, max_depth, n_trees, lr) rmse, mae, r2 = model.evaluate_model(pipeline_model, test_data, label_col) print("Model tree model (max_depth=%f, trees=%f, lr=%f):" % (max_depth, n_trees, lr)) print(" RMSE: %s" % rmse) print(" MAE: %s" % mae) print(" R2: %s" % r2) with tracking.TrackML(mlflow_tracking_url, experiment_name, build_number) as track: track.log_params({ "max_depth": max_depth, "n_trees": n_trees, "lr": lr }) track.log_metrics({"RMSE": rmse, "R2": r2, "MAE": mae}) track.log_model("sklearn", pipeline_model, "retail_model")
def test(self): batch_size = 4 num_unroll_steps = 3 char_vocab_size = 51 max_word_length = 11 char_embed_size = 3 _, _, word_data, char_data, _ = load_data('data/', max_word_length) dataset = char_data['train'] self.assertEqual(dataset.shape, (929589, max_word_length)) reader = DataReader(word_data['train'], char_data['train'], batch_size=batch_size, num_unroll_steps=num_unroll_steps) for x, y in reader.iter(): assert x.shape == (batch_size, num_unroll_steps, max_word_length) break self.assertAllClose(X, x) self.assertAllClose(Y, y) with self.test_session() as session: input_ = tf.placeholder( tf.int32, shape=[batch_size, num_unroll_steps, max_word_length], name="input") ''' First, embed characters ''' with tf.variable_scope('Embedding'): char_embedding = tf.get_variable( 'char_embedding', [char_vocab_size, char_embed_size]) # [batch_size x max_word_length, num_unroll_steps, char_embed_size] input_embedded = tf.nn.embedding_lookup(char_embedding, input_) input_embedded = tf.reshape( input_embedded, [-1, max_word_length, char_embed_size]) session.run(tf.assign(char_embedding, EMBEDDING)) ie = session.run(input_embedded, {input_: x}) #print(x.shape) #print(np.transpose(x, (1, 0, 2))) #print(ie.shape) ie = ie.reshape([ batch_size, num_unroll_steps, max_word_length, char_embed_size ]) ie = np.transpose(ie, (1, 0, 2, 3)) #print(ie[0,:,:,:]) self.assertAllClose(IE3, ie[0, :, :, :])
def run(): ''' Loads trained model and evaluates it on test split ''' if FLAGS.load_model is None: print('Please specify checkpoint file to load model from') return -1 if not os.path.exists(FLAGS.load_model + '.meta'): print('Checkpoint file not found', FLAGS.load_model) return -1 word_vocab, char_vocab, word_tensors, char_tensors, max_word_length, words_list = \ load_data(FLAGS.data_dir, FLAGS.max_word_length, FLAGS.num_unroll_steps, eos=FLAGS.EOS) fasttext_model = FasttextModel(fasttext_path=FLAGS.fasttext_model_path).get_fasttext_model() print('initialized test dataset reader') session = tf.Session() # tensorflow seed must be inside graph tf.set_random_seed(FLAGS.seed) np.random.seed(seed=FLAGS.seed) ''' build inference graph ''' with tf.variable_scope("Model"): m = model.inference_graph( char_vocab_size=char_vocab.size, word_vocab_size=word_vocab.size, char_embed_size=FLAGS.char_embed_size, batch_size=FLAGS.batch_size, num_highway_layers=FLAGS.highway_layers, num_rnn_layers=FLAGS.rnn_layers, rnn_size=FLAGS.rnn_size, max_word_length=max_word_length, kernels=eval(FLAGS.kernels), kernel_features=eval(FLAGS.kernel_features), num_unroll_steps=FLAGS.num_unroll_steps, dropout=0, embedding=FLAGS.embedding, fasttext_word_dim=300, acoustic_features_dim=4) # we need global step only because we want to read it from the model global_step = tf.Variable(0, dtype=tf.int32, name='global_step') saver = tf.train.Saver() saver.restore(session, FLAGS.load_model) print('Loaded model from', FLAGS.load_model) ''' training starts here ''' return session, m, fasttext_model, max_word_length, char_vocab, word_vocab
def predict_test_and_save(filename, model, data_dir, test_images, valid_templates, num_test_images, image_dim, template_dim): """Predict test templates and save them to binary file. """ test_images, _ = data_reader.load_data(data_dir, test_images, valid_templates, num_test_images, image_dim, template_dim) predictions = model.predict(test_images, batch_size=1000) f = open(os.path.join(data_dir, filename), 'wb') for i in range(num_test_images): f.write(predictions[i, :]) f.close() print("Predictions saved at " + os.path.join(data_dir, filename))
def load_data(self, task): self.encoder_in, self.decoder_in, self.decoder_out = data_reader.load_data( task=self.data) # TODO: change max_len = 10, larger than y.shape[1] self.src_max_len = self.encoder_in.shape[1] self.tgt_max_len = self.decoder_out.shape[1] if task == "task1": self.src_token_size = np.max(self.encoder_in) + 1 elif task == "control_length": self.src_token_size = np.max( self.encoder_in) + 1 # TODO: Remove this if/else. self.tgt_token_size = np.max(self.decoder_out) + 1 print("(Load data) token_size =", self.src_token_size, self.tgt_token_size) self.cut_validation() self.target = np.zeros( [self.batch_size, self.decoder_out.shape[1], self.tgt_token_size])
def run_test2(session, m, reader): state = session.run(m.initial_rnn_state) tokenNum = 0 word_vocab, char_vocab, word_tensors, char_tensors, max_word_length = \ load_data(FLAGS.data_dir, FLAGS.max_word_length, eos=FLAGS.EOS) train_reader = DataReader(word_tensors['train'], char_tensors['train'],1, 1) i = 1 for x, y in train_reader.iter(): state = session.run([m.final_rnn_state], { m.input: x, m.targets: y, m.initial_rnn_state: state }) # constructs the word_embedding (which is the input node to the LSTM) # NOTE: each element is an index to a character_embedding. thus, it's # actually a matrix word_embedding = x[0][0] output = "" for w in word_embedding: output = output + str(w) + " " output = output.rstrip() + "," #print ("char_embedding[1]:" + str(session.run(m.char_embedding[1]))) i = i + 1 layer1 = state[0][0] layer2 = state[0][1] layer1_hiddens = layer1[1][0] layer2_hiddens = layer2[1][0] for x in layer1_hiddens: output = output + str(x) + " " output = output.rstrip() + "," for x in layer2_hiddens: output = output + str(x) + " " output = output.rstrip() + "\n" print (output)
def get_sample(task, N=100): """Store samples based on their labels and time steps.""" # TODO: Implement reading N from outside and only read N samples for each class. # TODO: Read src as the same way in other functions in this file. x, y, y_output = data_reader.load_data(task) #, mode = 'analysis') # TODO: Change variable's name. split_result = data_reader.data_split(x, y, y_output) x_test = split_result[6] y_test = split_result[7] print('(Get sample) sample =', x_test.shape, y_test.shape) if task == 'task1': # Only 5 classes in this class. container = {0: [], 1: [], 2: [], 3: [], 4: []} position = np.argmax(y, axis = -1) for i, p in enumerate(list(position)): if len(container[p]) < num: container[p].append(i) print('container', [len(container[i]) for i in range(5)]) elif task == "control_length": # Container (dict) stores sample index of each class. Map class to sample index. container = defaultdict(list) # The token of [length = 1] is 8. basic_length = 3 + 5 - 1 for i, sample in enumerate(x_test): length = sample[-1] - basic_length if len(container[length]) < N: container[length].append(i) for key in container.keys(): if len(container[key]) < N: print("Error: Samples for key %d is not enough (%d < N = %d)." % (key, len(container[key], N))) # TODO: Return error here, or remove this key. # pdb.set_trace() # Example: container.keys() = dict_keys([6, 5, 10, 1, 7, 4, 3, 2, 8, 9]) print('(Get sample) Check container: ', [len(container[key]) for key in container.keys()]) return container
import tensorflow as tf from sklearn.metrics import roc_auc_score from tensorflow.python.platform import gfile import numpy as np import os import data_reader from train_model import model x, y, test_data, vocabulary = data_reader.load_data() x_train, x_val = np.concatenate((x[:26319],x[:26319],x[:26319],x[:26319],x[:26319],x[:26319],x[:26319],x[:26319],x[:26319],x[:26319], x[26349:-300])), np.concatenate((x[26319:26349],x[26319:26349],x[26319:26349],x[26319:26349],x[26319:26349],x[26319:26349],x[26319:26349],x[26319:26349],x[26319:26349],x[26319:26349], x[-300:])) y_train, y_val = np.concatenate((y[:26319],y[:26319],y[:26319],y[:26319],y[:26319],y[:26319],y[:26319],y[:26319],y[:26319],y[:26319], y[26349:-300])), np.concatenate((y[26319:26349],y[26319:26349],y[26319:26349],y[26319:26349],y[26319:26349],y[26319:26349],y[26319:26349],y[26319:26349],y[26319:26349],y[26319:26349], y[-300:])) _index = np.random.permutation(np.arange(len(x_train))) x_train = x_train[_index] y_train = y_train[_index] article_length = x_train.shape[1] out_dir = "logs/" sess = tf.Session() with sess.as_default(): model_ = model(article_length=article_length,vocab_size=len(vocabulary)) global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(2e-4) train_op = optimizer.apply_gradients(optimizer.compute_gradients(model_.loss), global_step=global_step) saver = tf.train.Saver(tf.all_variables()) sess.run(tf.initialize_all_variables()) ckpt = tf.train.get_checkpoint_state(os.path.join(out_dir, 'checkpoints'))
def main(_): ''' Loads trained model and evaluates it on test split ''' if FLAGS.load_model is None: print('Please specify checkpoint file to load model from') return -1 if not os.path.exists(FLAGS.load_model + '.meta'): print('Checkpoint file not found', FLAGS.load_model) return -1 word_vocab, char_vocab, word_tensors, char_tensors, max_word_length = \ load_data(FLAGS.data_dir, FLAGS.max_word_length, eos=FLAGS.EOS) print('initialized test dataset reader') with tf.Graph().as_default(), tf.Session() as session: # tensorflow seed must be inside graph tf.set_random_seed(FLAGS.seed) np.random.seed(seed=FLAGS.seed) ''' build inference graph ''' with tf.variable_scope("Model"): m = model.inference_graph(char_vocab_size=char_vocab.size, word_vocab_size=word_vocab.size, char_embed_size=FLAGS.char_embed_size, batch_size=1, num_highway_layers=FLAGS.highway_layers, num_rnn_layers=FLAGS.rnn_layers, rnn_size=FLAGS.rnn_size, max_word_length=max_word_length, kernels=eval(FLAGS.kernels), kernel_features=eval( FLAGS.kernel_features), num_unroll_steps=1, dropout=0) # we need global step only because we want to read it from the model global_step = tf.Variable(0, dtype=tf.int32, name='global_step') saver = tf.train.Saver() saver.restore(session, FLAGS.load_model) print('Loaded model from', FLAGS.load_model, 'saved at global step', global_step.eval()) ''' training starts here ''' rnn_state = session.run(m.initial_rnn_state) logits = np.ones((word_vocab.size, )) rnn_state = session.run(m.initial_rnn_state) for i in range(FLAGS.num_samples): logits = logits / FLAGS.temperature prob = np.exp(logits) prob /= np.sum(prob) prob = prob.ravel() ix = np.random.choice(range(len(prob)), p=prob) word = word_vocab.token(ix) if word == '|': # EOS print('<unk>', end=' ') elif word == '+': print('\n') else: print(word, end=' ') char_input = np.zeros((1, 1, max_word_length)) for i, c in enumerate('{' + word + '}'): char_input[0, 0, i] = char_vocab[c] logits, rnn_state = session.run([m.logits, m.final_rnn_state], { m.input: char_input, m.initial_rnn_state: rnn_state }) logits = np.array(logits)
import sys, os sys.path.append(os.pardir) import numpy as np import matplotlib.pyplot as plt #from dataset.data_reader import load_data from data_reader import load_data from vgg16 import VGG16 from trainer import Trainer data_path = 'dataset/hdf5_file/dataset_s299_fold_4.hdf5' (x_train, t_train), (x_valid, t_valid) = load_data(image_size=224, normalize=True, one_hot_label=False, hdf5_path=data_path) network = VGG16() trainer = Trainer(network, x_train, t_train, x_valid, t_valid, epochs=20, mini_batch_size=5, optimizer='Adam', optimizer_param={'lr': 5e-5}) trainer.train() network.save_params(file_name="Records/params.pkl") print("\nSaved Network Parameters!\n")
def train(config): # Load the data print("Loading data...") data = data_reader.load_data(config.data_fn) if args.debug: data = sorted(data, key=lambda d: len(d[0])) data = data[:10] # Split data num_train = int(0.8 * len(data)) train_data = data[:num_train] if config.use_glove: config.token_type = "glove" config.embed_size = 50 encode, decode, vocab_size, L = data_reader.glove_encoder( config.glove_dir) else: L = None encode, decode, vocab_size = data_reader.make_encoder( train_data, config.token_type) config.encode = encode config.decode = decode config.vocab_size = vocab_size if config.token_type == "chars": max_c_len = 100 max_d_len = 200 else: max_c_len = 15 if args.debug else 25 max_d_len = 50 encoded_data = data_reader.encode_data(data, encode, max_c_len) encoded_data = [(d, cs) for d, cs in encoded_data if len(d) <= max_d_len] encoded_train = encoded_data[:num_train] encoded_valid = encoded_data[num_train:] # Padding width config.d_len = d_len = max([len(d) for d, _ in encoded_data]) config.c_len = c_len = max( [max([len(c) for c in cs]) for _, cs in encoded_data]) + 1 print('Padding to {} and {}'.format(d_len, c_len)) train_producer, num_train = data_reader.get_producer( encoded_train, config.batch_size, d_len, c_len) valid_producer, num_valid = data_reader.get_producer( encoded_valid, config.batch_size, d_len, c_len) print("Done. Building model...") if config.token_type == "chars": config.embed_size = vocab_size # Create a duplicate of the training model for generating text gen_config = deepcopy(config) gen_config.batch_size = 1 gen_config.dropout = 1.0 # Save gen_model config so we can sample later if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) path_to_model = os.path.join(args.save_dir, "config") with open(path_to_model, "wb") as f: pickle.dump(gen_config, f) path_to_index = os.path.join(args.save_dir, "index") with open(path_to_index, "w") as f: f.write("loss per epoch:\n") f.write("---------------\n") # Create training model with tf.variable_scope("LSTM") as scope: model = lstm_ops.seq2seq_model(encoder_seq_length=d_len, decoder_seq_length=c_len, num_layers=config.num_layers, embed_size=config.embed_size, batch_size=config.batch_size, hidden_size=config.hidden_size, vocab_size=vocab_size, dropout=config.dropout, max_grad_norm=config.max_grad_norm, use_attention=args.use_attention, embeddings=L, is_training=True, is_gen_model=False, token_type=config.token_type, reuse=False) gen_model = lstm_ops.seq2seq_model( encoder_seq_length=d_len, decoder_seq_length=1, num_layers=gen_config.num_layers, embed_size=gen_config.embed_size, batch_size=gen_config.batch_size, hidden_size=config.hidden_size, vocab_size=vocab_size, dropout=gen_config.dropout, max_grad_norm=gen_config.max_grad_norm, use_attention=args.use_attention, embeddings=L, is_training=False, is_gen_model=True, token_type=config.token_type, reuse=True) print("Done.") def generate(): return lstm_ops.generate_text_beam_search(session=session, model=gen_model, encode=gen_config.encode, decode=gen_config.decode, description=test_description, d_len=gen_config.d_len, beam=5, stop_length=gen_config.c_len, temperature=args.temperature) sess_config = tf.ConfigProto() sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as session: if args.resume_from is not None: reload_saver = tf.train.Saver() reload_saver.restore( session, tf.train.latest_checkpoint('./' + args.resume_from)) best_val_pp = float('inf') best_val_epoch = 0 session.run(tf.global_variables_initializer()) saver = tf.train.Saver() # Sample some text print(generate()) for epoch in range(config.max_epochs): print('Epoch {}'.format(epoch)) start = timer() # Train on the epoch and validate train_pp = lstm_ops.run_epoch(session, model, train_producer, num_train, args.log_every, args.sample_every, generate) print("Validating:") valid_pp = lstm_ops.run_epoch(session, model, valid_producer, num_valid, args.log_every, args.sample_every, generate, is_training=False) print("Validation loss: {}".format(valid_pp)) # Save the model if validation loss has dropped if valid_pp < best_val_pp: with open(path_to_index, "a") as f: f.write("{}: {}*\n".format(epoch, valid_pp)) best_val_pp = valid_pp best_val_epoch = epoch path_to_ckpt = os.path.join(args.save_dir, "epoch.ckpt") print("Saving model to " + path_to_ckpt) saver.save(session, "./" + path_to_ckpt) # Otherwise just record validation loss in save_dir/index else: with open(path_to_index, "a") as f: f.write("{}: {}\n".format(epoch, valid_pp)) # Stop early if validation loss is getting worse if epoch - best_val_epoch > args.early_stopping: print("Stopping early") break print('Total time: {}\n'.format(timer() - start)) print(generate()) print(generate())
import numpy as np import data_reader import sys import scipy.signal as sig if __name__ == "__main__": for f in sys.argv[1:]: decimate = 6 output_file = f"{f}_decimate_{decimate}.bin" a = data_reader.load_data(f) a = a.reshape(16,16,-1) # create offset of 100, as the sent impulse is part of the signal offset = 100 new_shape = list(a.shape) new_shape[-1] -= offset new_shape = tuple(new_shape) new_a = np.zeros(new_shape) new_a[:] = a[:,:,offset:] new_a = sig.decimate(new_a, decimate, ftype='fir', axis=-1) print(f"Created file <<{output_file}>> with shape {new_a.shape}") new_a.astype(np.float64).tofile(output_file)
def main(_): parser = argparse.ArgumentParser() parser.add_argument("-m", "--model", help="Model to load") args = parser.parse_args() ''' Loads trained model and evaluates it on test split ''' if args.model is None: print('Please specify checkpoint file to load model from') return -1 if not os.path.exists(args.model + '.meta'): print('Checkpoint file not found', args.model) return -1 model_path = args.model word_vocab, char_vocab, word_tensors, char_tensors, max_word_length = \ load_data(FLAGS.data_dir, FLAGS.max_word_length) print('initialized test dataset reader') with tf.Graph().as_default(), tf.Session() as session: # tensorflow seed must be inside graph tf.set_random_seed(FLAGS.seed) np.random.seed(seed=FLAGS.seed) ''' build inference graph ''' with tf.variable_scope("Model"): model = Model(FLAGS, char_vocab, word_vocab, max_word_length, ModelUsage.USE) # we need global step only because we want to read it from the model global_step = tf.Variable(0, dtype=tf.int32, name='global_step') saver = tf.train.Saver() saver.restore(session, model_path) print('Loaded model from', model_path, 'saved at global step', global_step.eval()) ''' test starts here ''' rnn_state = session.run(model.initial_rnn_state) logits = np.ones((word_vocab.size, )) while True: word = input('Enter a word : ') if (len(word) > max_word_length): print('Invalid word, maximum word size is ' + max_word_length) continue char_input = np.zeros((1, 1, max_word_length)) for i, c in enumerate(word): char_input[0, 0, i] = char_vocab[c] logits, rnn_state = session.run( [model.logits, model.final_rnn_state], { model.input: char_input, model.initial_rnn_state: rnn_state }) prob = np.exp(logits) prob /= np.sum(prob) for i in range(5): ix = np.argmax(prob) print( str(i) + " - " + word_vocab.token(ix) + ' : ' + str(prob[0][0][ix])) prob[0][0][ix] = 0.0
def main(_): ''' Trains model from data ''' print("we in main") print(sys.argv[2]) print(FLAGS) if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir) print('Created training directory', FLAGS.train_dir) word_vocab, char_vocab, word_tensors, char_tensors, max_word_length = \ load_data(FLAGS.data_dir, FLAGS.max_word_length, eos=FLAGS.EOS) train_reader = DataReader(word_tensors['train'], char_tensors['train'], FLAGS.batch_size, FLAGS.num_unroll_steps) valid_reader = DataReader(word_tensors['valid'], char_tensors['valid'], FLAGS.batch_size, FLAGS.num_unroll_steps) test_reader = DataReader(word_tensors['test'], char_tensors['test'], FLAGS.batch_size, FLAGS.num_unroll_steps) print('initialized all dataset readers') with tf.Graph().as_default(), tf.Session() as session: # tensorflow seed must be inside graph tf.set_random_seed(FLAGS.seed) np.random.seed(seed=FLAGS.seed) ''' build training graph ''' initializer = tf.random_uniform_initializer(-FLAGS.param_init, FLAGS.param_init) with tf.variable_scope("Model", initializer=initializer): train_model = model.inference_graph( char_vocab_size=char_vocab.size, word_vocab_size=word_vocab.size, char_embed_size=FLAGS.char_embed_size, batch_size=FLAGS.batch_size, num_highway_layers=FLAGS.highway_layers, num_rnn_layers=FLAGS.rnn_layers, rnn_size=FLAGS.rnn_size, max_word_length=max_word_length, kernels=eval(FLAGS.kernels), kernel_features=eval(FLAGS.kernel_features), num_unroll_steps=FLAGS.num_unroll_steps, dropout=FLAGS.dropout) train_model.update(model.loss_graph(train_model.logits, FLAGS.batch_size, FLAGS.num_unroll_steps)) # scaling loss by FLAGS.num_unroll_steps effectively scales gradients by the same factor. # we need it to reproduce how the original Torch code optimizes. Without this, our gradients will be # much smaller (i.e. 35 times smaller) and to get system to learn we'd have to scale learning rate and max_grad_norm appropriately. # Thus, scaling gradients so that this trainer is exactly compatible with the original train_model.update(model.training_graph(train_model.loss * FLAGS.num_unroll_steps, FLAGS.learning_rate, FLAGS.max_grad_norm)) # create saver before creating more graph nodes, so that we do not save any vars defined below saver = tf.train.Saver(max_to_keep=50) ''' build graph for validation and testing (shares parameters with the training graph!) ''' with tf.variable_scope("Model", reuse=True): valid_model = model.inference_graph( char_vocab_size=char_vocab.size, word_vocab_size=word_vocab.size, char_embed_size=FLAGS.char_embed_size, batch_size=FLAGS.batch_size, num_highway_layers=FLAGS.highway_layers, num_rnn_layers=FLAGS.rnn_layers, rnn_size=FLAGS.rnn_size, max_word_length=max_word_length, kernels=eval(FLAGS.kernels), kernel_features=eval(FLAGS.kernel_features), num_unroll_steps=FLAGS.num_unroll_steps, dropout=0.0) valid_model.update(model.loss_graph(valid_model.logits, FLAGS.batch_size, FLAGS.num_unroll_steps)) with tf.variable_scope("Model", reuse=True): test_model = model.inference_graph( char_vocab_size=char_vocab.size, word_vocab_size=word_vocab.size, char_embed_size=FLAGS.char_embed_size, batch_size=1, num_highway_layers=FLAGS.highway_layers, num_rnn_layers=FLAGS.rnn_layers, rnn_size=FLAGS.rnn_size, max_word_length=max_word_length, kernels=eval(FLAGS.kernels), kernel_features=eval(FLAGS.kernel_features), num_unroll_steps=1, dropout=0.0) test_model.update(model.loss_graph(test_model.logits, 1, 1)) if FLAGS.load_model: saver.restore(session, FLAGS.load_model) print('Loaded model from', FLAGS.load_model, 'saved at global step', train_model.global_step.eval()) else: tf.initialize_all_variables().run() print('Created and initialized fresh model. Size:', model.model_size()) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph=session.graph) ''' take learning rate from CLI, not from saved graph ''' session.run( tf.assign(train_model.learning_rate, FLAGS.learning_rate), ) def clear_char_embedding_padding(): char_embedding = session.run(train_model.char_embedding) char_embedding[0,:] = 0.0 session.run(tf.assign(train_model.char_embedding, char_embedding)) char_embedding = session.run(train_model.char_embedding) clear_char_embedding_padding() run_test2(session, test_model, train_reader) #exit(1) ''' training starts here ''' best_valid_loss = None rnn_state = session.run(train_model.initial_rnn_state) for epoch in range(FLAGS.max_epochs): avg_train_loss = 0.0 count = 0 for x, y in train_reader.iter(): count += 1 start_time = time.time() print (x) exit(1) loss, _, rnn_state, gradient_norm, step = session.run([ train_model.loss, train_model.train_op, train_model.final_rnn_state, train_model.global_norm, train_model.global_step, ], { train_model.input : x, train_model.targets: y, train_model.initial_rnn_state: rnn_state }) clear_char_embedding_padding() avg_train_loss += 0.05 * (loss - avg_train_loss) time_elapsed = time.time() - start_time if count % FLAGS.print_every == 0: print('%6d: %d [%5d/%5d], train_loss/perplexity = %6.8f/%6.7f secs/batch = %.4fs, grad.norm=%6.8f' % (step, epoch, count, train_reader.length, loss, np.exp(loss), time_elapsed, gradient_norm)) # epoch done: time to evaluate avg_valid_loss = 0.0 count = 0 rnn_state = session.run(valid_model.initial_rnn_state) for x, y in valid_reader.iter(): count += 1 start_time = time.time() loss, rnn_state = session.run([ valid_model.loss, valid_model.final_rnn_state ], { valid_model.input : x, valid_model.targets: y, valid_model.initial_rnn_state: rnn_state, }) if count % FLAGS.print_every == 0: print("\t> validation loss = %6.8f, perplexity = %6.8f" % (loss, np.exp(loss))) avg_valid_loss += loss / valid_reader.length print("at the end of epoch:", epoch) print("train loss = %6.8f, perplexity = %6.8f" % (avg_train_loss, np.exp(avg_train_loss))) print("validation loss = %6.8f, perplexity = %6.8f" % (avg_valid_loss, np.exp(avg_valid_loss))) save_as = '%s/epoch%03d_%.4f.model' % (FLAGS.train_dir, epoch, avg_valid_loss) saver.save(session, save_as) print('Saved model', save_as) ''' write out summary events ''' summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=avg_train_loss), tf.Summary.Value(tag="valid_loss", simple_value=avg_valid_loss) ]) summary_writer.add_summary(summary, step) ''' decide if need to decay learning rate ''' if best_valid_loss is not None and np.exp(avg_valid_loss) > np.exp(best_valid_loss) - FLAGS.decay_when: print('** validation perplexity did not improve enough, decay learning rate') current_learning_rate = session.run(train_model.learning_rate) print('learning rate was:', current_learning_rate) current_learning_rate *= FLAGS.learning_rate_decay if current_learning_rate < 1.e-5: print('learning rate too small - stopping now') break session.run(train_model.learning_rate.assign(current_learning_rate)) print('new learning rate is:', current_learning_rate) else: best_valid_loss = avg_valid_loss run_test2(session, test_model, train_reader) print ("AGAIN") run_test2(session, test_model, train_reader)
loss_op = slim.losses.softmax_cross_entropy(prediction, Y) tf.summary.scalar('loss',loss_op) optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate) train_op = optimizer.minimize(loss_op) # Evaluate model correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) tf.summary.scalar('accuracy',accuracy) # Initialize the variables (i.e. assign their default value) init = tf.global_variables_initializer() data_X,data_Y = load_data() indices = np.random.permutation(np.arange(data_X.shape[0])) data_X = data_X[indices,:,:] data_Y = data_Y[indices] merged = tf.summary.merge_all() saver = tf.train.Saver() with tf.Session() as sess: train_writer = tf.summary.FileWriter("cnn_logs_8/",
def main(): ''' Trains model from data ''' if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir) print('Created training directory', FLAGS.train_dir) word_vocab, char_vocab, word_tensors, char_tensors, max_word_length = \ load_data(FLAGS.data_dir, FLAGS.max_word_length, eos=FLAGS.EOS) train_reader = DataReader(word_tensors['train'], char_tensors['train'], FLAGS.batch_size, FLAGS.num_unroll_steps) valid_reader = DataReader(word_tensors['valid'], char_tensors['valid'], FLAGS.batch_size, FLAGS.num_unroll_steps) test_reader = DataReader(word_tensors['test'], char_tensors['test'], FLAGS.batch_size, FLAGS.num_unroll_steps) print('initialized all dataset readers') args = FLAGS args.max_word_length = max_word_length args.char_vocab_size = char_vocab.size args.word_vocab_size = word_vocab.size g = tf.Graph() with tf.device("/gpu:0"), g.as_default(): tf.set_random_seed(FLAGS.seed) np.random.seed(seed=FLAGS.seed) ''' build training graph ''' initializer = tf.random_uniform_initializer(-FLAGS.param_init, FLAGS.param_init) with tf.variable_scope("Model", initializer=initializer): train_model = Model(args) saver = tf.train.Saver(tf.all_variables()) #hjq,max_to_keep=50 ''' build graph for validation and testing (shares parameters with the training graph!) ''' args.dropout = 0.0 with tf.variable_scope("Model", reuse=True): valid_model = Model(args) with tf.Session(graph=g) as session: if FLAGS.load_model: saver.restore(session, FLAGS.load_model) print('Loaded model from', FLAGS.load_model, 'saved at global step', train_model.global_step.eval()) else: tf.initialize_all_variables().run() print('Created and initialized fresh model.') summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph=session.graph) ''' take learning rate from CLI, not from saved graph ''' session.run(tf.assign(train_model.learning_rate, FLAGS.learning_rate), ) session.run(train_model.char_embedding) ''' training starts here ''' best_valid_loss = None for epoch in range(FLAGS.max_epochs): start = time.time() rnn_state = session.run(train_model.initial_rnn_state) #hjq avg_train_loss = 0.0 count = 0 for x, y in train_reader.iter(): count += 1 start_time = time.time() rnn_state, loss, step, grad_norm, _, = session.run( [ # sequence order change train_model.final_rnn_state, train_model.loss, train_model.global_step, train_model.global_norm, train_model.train_op, ], { train_model.input_: x, train_model.targets: y, train_model.initial_rnn_state: rnn_state }) session.run(train_model.char_embedding) avg_train_loss += 0.05 * (loss - avg_train_loss) time_elapsed = time.time() - start_time if count % FLAGS.print_every == 0: print( '%6d: %d [%5d/%5d], train_loss/perplexity = %6.8f/%6.7f secs/batch = %.4fs, grad.norm=%6.8f' % (step, epoch, count, train_reader.length, loss, np.exp(loss), time_elapsed, grad_norm)) # epoch done: time to evaluate avg_valid_loss = 0.0 count = 0 # rnn_state = session.run(valid_model.initial_rnn_state) for x, y in valid_reader.iter(): count += 1 rnn_state = session.run(valid_model.initial_rnn_state) loss, rnn_state = session.run( [valid_model.loss, valid_model.final_rnn_state], { valid_model.input_: x, valid_model.targets: y, valid_model.initial_rnn_state: rnn_state, }) if count % FLAGS.print_every == 0: print("\t> validation loss = %6.8f, perplexity = %6.8f" % (loss, np.exp(loss))) avg_valid_loss += loss / valid_reader.length print("at the end of epoch:", epoch) print("train loss = %6.8f, perplexity = %6.8f" % (avg_train_loss, np.exp(avg_train_loss))) print("validation loss = %6.8f, perplexity = %6.8f" % (avg_valid_loss, np.exp(avg_valid_loss))) print("epoch time: %6.4f s" % (time.time() - start)) #hjq save_as = '%s/epoch%03d_%.4f.model' % (FLAGS.train_dir, epoch, avg_valid_loss) saver.save(session, save_as) print('Saved model', save_as) ''' write out summary events ''' summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=avg_train_loss), tf.Summary.Value(tag="valid_loss", simple_value=avg_valid_loss) ]) summary_writer.add_summary(summary, step) ''' decide if need to decay learning rate ''' if best_valid_loss is not None and np.exp(avg_valid_loss) > np.exp( best_valid_loss) - FLAGS.decay_when: print( 'validation perplexity did not improve enough, decay learning rate' ) current_learning_rate = session.run(train_model.learning_rate) print('learning rate was:', current_learning_rate) current_learning_rate *= FLAGS.learning_rate_decay if current_learning_rate < 1.e-5: print('learning rate too small - stopping now') break session.run( train_model.learning_rate.assign(current_learning_rate)) print('new learning rate is:', current_learning_rate) else: best_valid_loss = avg_valid_loss
def test(self): batch_size = 4 num_unroll_steps = 3 char_vocab_size = 51 max_word_length = 11 char_embed_size = 3 _, _, word_data, char_data, _ = load_data('data/', max_word_length) dataset = char_data['train.txt'] self.assertEqual(dataset.shape, (929589, max_word_length)) reader = DataReader(word_data['train.txt'], char_data['train.txt'], batch_size=batch_size, num_unroll_steps=num_unroll_steps) for x, y in reader.iter(): assert x.shape == (batch_size, num_unroll_steps, max_word_length) break self.assertAllClose(X, x) with self.test_session() as session: input_ = tf.placeholder(tf.int32, shape=[batch_size, num_unroll_steps, max_word_length], name="input") ''' First, embed characters ''' with tf.variable_scope('Embedding'): char_embedding = tf.get_variable('char_embedding', [char_vocab_size, char_embed_size]) # [batch_size x max_word_length, num_unroll_steps, char_embed_size] input_embedded = tf.nn.embedding_lookup(char_embedding, input_) input_embedded = tf.reshape(input_embedded, [-1, max_word_length, char_embed_size]) session.run(tf.assign(char_embedding, EMBEDDING)) ie = session.run(input_embedded, { input_: x }) output = tdnn(input_embedded, [2], [2], scope='TDNN') out = session.run(output, { input_embedded: ie, 'TDNN/kernel_2/w:0': np.reshape(np.transpose(KERNEL_2_W), [1, 2, num_unroll_steps, 2]), 'TDNN/kernel_2/b:0': KERNEL_2_B }) out = out.reshape([batch_size, num_unroll_steps, 2]) out = out.transpose([1, 0, 2]) # torch uses time-major order self.assertAllClose(out, np.array([ [[-0.04201929, 0.02275813], [-0.04060676, 0.02283999], [-0.04333816, 0.02333505], [-0.04131923, 0.02480407]], [[-0.04124087, 0.02429205], [-0.04117644, 0.02419558], [-0.04282973, 0.02318067], [-0.04131923, 0.02480407]], [[-0.03877186, 0.0243939 ], [-0.04173752, 0.02552123], [-0.04168687, 0.02385954], [-0.04201929, 0.02454825]]])) print(out.shape) print(out) assert False
def main(_): ''' Loads trained model and evaluates it on test split ''' if FLAGS.load_model is None: print('Please specify checkpoint file to load model from') return -1 if not os.path.exists(FLAGS.load_model): print('Checkpoint file not found', FLAGS.load_model) return -1 word_vocab, char_vocab, word_tensors, char_tensors, max_word_length = load_data(FLAGS.data_dir, FLAGS.max_word_length, eos=FLAGS.EOS) test_reader = DataReader(word_tensors['test'], char_tensors['test'], FLAGS.batch_size, FLAGS.num_unroll_steps) print('initialized test dataset reader') with tf.Graph().as_default(), tf.Session() as session: # tensorflow seed must be inside graph tf.set_random_seed(FLAGS.seed) np.random.seed(seed=FLAGS.seed) ''' build inference graph ''' with tf.variable_scope("Model"): m = model.inference_graph( char_vocab_size=char_vocab.size, word_vocab_size=word_vocab.size, char_embed_size=FLAGS.char_embed_size, batch_size=FLAGS.batch_size, num_highway_layers=FLAGS.highway_layers, num_rnn_layers=FLAGS.rnn_layers, rnn_size=FLAGS.rnn_size, max_word_length=max_word_length, kernels=eval(FLAGS.kernels), kernel_features=eval(FLAGS.kernel_features), num_unroll_steps=FLAGS.num_unroll_steps, dropout=0) m.update(model.loss_graph(m.logits, FLAGS.batch_size, FLAGS.num_unroll_steps)) global_step = tf.Variable(0, dtype=tf.int32, name='global_step') saver = tf.train.Saver() saver.restore(session, FLAGS.load_model) print('Loaded model from', FLAGS.load_model, 'saved at global step', global_step.eval()) ''' training starts here ''' rnn_state = session.run(m.initial_rnn_state) count = 0 avg_loss = 0 start_time = time.time() for x, y in test_reader.iter(): count += 1 loss, rnn_state = session.run([ m.loss, m.final_rnn_state ], { m.input : x, m.targets: y, m.initial_rnn_state: rnn_state }) avg_loss += loss avg_loss /= count time_elapsed = time.time() - start_time print("test loss = %6.8f, perplexity = %6.8f" % (avg_loss, np.exp(avg_loss))) print("test samples:", count*FLAGS.batch_size, "time elapsed:", time_elapsed, "time per one batch:", time_elapsed/count)
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement") tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") FLAGS = tf.flags.FLAGS args = parser.parse_args() print("dc", args.doc_len) max_sen_length = args.sen_len max_doc_length = args.doc_len logging.info('generate config') word_vocab, word_tensors, max_doc_length, label_tensors = \ dr.load_data(args.train_file, max_doc_length, max_sen_length) batch_size = 1 time1 = time.time() test_reader = dr.DataReader(word_tensors['test'], label_tensors['test'], batch_size) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it ''' with tf.variable_scope("Model"):
def main(_): ''' Trains model from data ''' if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir) print('Created training directory', FLAGS.train_dir) word_vocab, word_tensors, max_doc_length, label_tensors = \ load_data(FLAGS.data_dir, FLAGS.max_doc_length, FLAGS.max_sen_length) train_reader = DataReader(word_tensors['train'], label_tensors['train'], FLAGS.batch_size) valid_reader = DataReader(word_tensors['valid'], label_tensors['valid'], FLAGS.batch_size) test_reader = DataReader(word_tensors['test'], label_tensors['test'], FLAGS.batch_size) print('initialized all dataset readers') with tf.Graph().as_default(), tf.Session() as session: # tensorflow seed must be inside graph tf.set_random_seed(FLAGS.seed) np.random.seed(seed=FLAGS.seed) ''' build training graph ''' initializer = tf.random_uniform_initializer(-FLAGS.param_init, FLAGS.param_init) with tf.variable_scope("Model", initializer=initializer): train_model = build_model(word_vocab, max_doc_length, train=True) # create saver before creating more graph nodes, so that we do not save any vars defined below saver = tf.train.Saver(max_to_keep=50) ''' build graph for validation and testing (shares parameters with the training graph!) ''' with tf.variable_scope("Model", reuse=True): valid_model = build_model(word_vocab, max_doc_length, train=False) if FLAGS.load_model: saver.restore(session, FLAGS.load_model) print('Loaded model from', FLAGS.load_model, 'saved at global step', train_model.global_step.eval()) else: tf.global_variables_initializer().run() session.run(train_model.clear_word_embedding_padding) print('Created and initialized fresh model. Size:', model.model_size()) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph=session.graph) ''' take learning rate from CLI, not from saved graph ''' session.run(tf.assign(train_model.learning_rate, FLAGS.learning_rate), ) ''' training starts here ''' best_valid_loss = None #rnn_state = session.run(train_model.initial_rnn_state) for epoch in range(FLAGS.max_epochs): epoch_start_time = time.time() avg_train_loss = 0.0 count = 0 for x, y in train_reader.iter(): count += 1 start_time = time.time() loss, _, gradient_norm, step, _ = session.run( [ train_model.loss, train_model.train_op, train_model.global_norm, train_model.global_step, train_model.clear_word_embedding_padding ], { train_model.input: x, train_model.targets: y, }) avg_train_loss += 0.05 * (loss - avg_train_loss) time_elapsed = time.time() - start_time if count % FLAGS.print_every == 0: print( '%6d: %d [%5d/%5d], train_loss/perplexity = %6.8f/%6.7f secs/batch = %.4fs, grad.norm=%6.8f' % (step, epoch, count, train_reader.length, loss, np.exp(loss), time_elapsed, gradient_norm)) print('Epoch training time:', time.time() - epoch_start_time) # epoch done: time to evaluate avg_valid_loss = 0.0 count = 0 #rnn_state = session.run(valid_model.initial_rnn_state) for x, y in valid_reader.iter(): count += 1 start_time = time.time() loss = session.run(valid_model.loss, { valid_model.input: x, valid_model.targets: y, }) if count % FLAGS.print_every == 0: print("\t> validation loss = %6.8f, perplexity = %6.8f" % (loss, np.exp(loss))) avg_valid_loss += loss / valid_reader.length print("at the end of epoch:", epoch) print("train loss = %6.8f, perplexity = %6.8f" % (avg_train_loss, np.exp(avg_train_loss))) print("validation loss = %6.8f, perplexity = %6.8f" % (avg_valid_loss, np.exp(avg_valid_loss))) save_as = '%s/epoch%03d_%.4f.model' % (FLAGS.train_dir, epoch, avg_valid_loss) saver.save(session, save_as) print('Saved model', save_as) ''' write out summary events ''' summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=avg_train_loss), tf.Summary.Value(tag="valid_loss", simple_value=avg_valid_loss) ]) summary_writer.add_summary(summary, step) ''' decide if need to decay learning rate ''' if best_valid_loss is not None and np.exp(avg_valid_loss) > np.exp( best_valid_loss) - FLAGS.decay_when: print( 'validation perplexity did not improve enough, decay learning rate' ) current_learning_rate = session.run(train_model.learning_rate) print('learning rate was:', current_learning_rate) current_learning_rate *= FLAGS.learning_rate_decay if current_learning_rate < 1.e-5: print('learning rate too small - stopping now') break session.run( train_model.learning_rate.assign(current_learning_rate)) print('new learning rate is:', current_learning_rate) else: best_valid_loss = avg_valid_loss
def main(_): ''' Loads trained model and evaluates it on test split ''' parser = argparse.ArgumentParser() parser.add_argument("-m", "--model", help="Model to load") args = parser.parse_args() ''' Loads trained model and evaluates it on test split ''' if args.model is None: print('Please specify checkpoint file to load model from') return -1 if not os.path.exists(args.model + '.meta'): print('Checkpoint file not found', args.model) return -1 model_path = args.model word_vocab, char_vocab, word_tensors, char_tensors, max_word_length = \ load_data(FLAGS.data_dir, FLAGS.max_word_length, FLAGS.EOS) test_reader = DataReader(word_tensors['test'], char_tensors['test'], FLAGS.batch_size, FLAGS.num_unroll_steps, char_vocab) print('initialized test dataset reader') with tf.Graph().as_default(), tf.Session() as session: # tensorflow seed must be inside graph tf.set_random_seed(FLAGS.seed) np.random.seed(seed=FLAGS.seed) ''' build inference graph ''' with tf.variable_scope("Model"): m = Model(FLAGS, char_vocab, word_vocab, max_word_length, ModelUsage.TEST) # we need global step only because we want to read it from the model global_step = tf.Variable(0, dtype=tf.int32, name='global_step') saver = tf.train.Saver() saver.restore(session, model_path) print('Loaded model from', tf.train.latest_checkpoint(model_path), 'saved at global step', global_step.eval()) ''' test starts here ''' rnn_state = session.run(m.initial_rnn_state) count = 0 avg_loss = 0 start_time = time.time() for x, y in test_reader.iter(): count += 1 loss, rnn_state = session.run([ m.loss, m.final_rnn_state ], { m.input : x, m.targets: y, m.initial_rnn_state: rnn_state }) avg_loss += loss avg_loss /= count time_elapsed = time.time() - start_time print("test loss = %6.8f, perplexity = %6.8f" % (avg_loss, np.exp(avg_loss))) print("test samples:", count*FLAGS.batch_size, "time elapsed:", time_elapsed, "time per one batch:", time_elapsed/count)
def main(_): ''' Loads trained model and evaluates it on test split ''' if FLAGS.load_model is None: print('Please specify checkpoint file to load model from') return -1 if not os.path.exists(FLAGS.load_model): print('Checkpoint file not found', FLAGS.load_model) return -1 word_vocab, char_vocab, word_tensors, char_tensors, max_word_length = \ load_data(FLAGS.data_dir, FLAGS.max_word_length, eos=FLAGS.EOS) print('initialized test dataset reader') with tf.Graph().as_default(), tf.Session() as session: # tensorflow seed must be inside graph tf.set_random_seed(FLAGS.seed) np.random.seed(seed=FLAGS.seed) ''' build inference graph ''' with tf.variable_scope("Model"): m = model.inference_graph( char_vocab_size=char_vocab.size, word_vocab_size=word_vocab.size, char_embed_size=FLAGS.char_embed_size, batch_size=1, num_highway_layers=FLAGS.highway_layers, num_rnn_layers=FLAGS.rnn_layers, rnn_size=FLAGS.rnn_size, max_word_length=max_word_length, kernels=eval(FLAGS.kernels), kernel_features=eval(FLAGS.kernel_features), num_unroll_steps=1, dropout=0) # we need global step only because we want to read it from the model global_step = tf.Variable(0, dtype=tf.int32, name='global_step') saver = tf.train.Saver() saver.restore(session, FLAGS.load_model) print('Loaded model from', FLAGS.load_model, 'saved at global step', global_step.eval()) ''' training starts here ''' rnn_state = session.run(m.initial_rnn_state) logits = np.ones((word_vocab.size,)) rnn_state = session.run(m.initial_rnn_state) for i in range(FLAGS.num_samples): logits = logits / FLAGS.temperature prob = np.exp(logits) prob /= np.sum(prob) prob = prob.ravel() ix = np.random.choice(range(len(prob)), p=prob) word = word_vocab.token(ix) if word == '|': # EOS print('<unk>', end=' ') elif word == '+': print('\n') else: print(word, end=' ') char_input = np.zeros((1, 1, max_word_length)) for i,c in enumerate('{' + word + '}'): char_input[0,0,i] = char_vocab[c] logits, state = session.run([m.logits, m.final_rnn_state], {m.input: char_input, m.initial_rnn_state: rnn_state}) logits = np.array(logits)
def train(w2v_model): # Training # ================================================== max_sen_length = 40 max_doc_length = 90 word_vocab, word_tensors, max_doc_length, label_tensors = \ \ dr.load_data(FLAGS.train_data_file, max_doc_length, max_sen_length) train_reader = dr.DataReader(word_tensors['train'], label_tensors['train'], 1) valid_reader = dr.DataReader(word_tensors['valid'], label_tensors['valid'], 1) test_reader = dr.DataReader(word_tensors['test'], label_tensors['test'], 1) pretrained_embedding = dr.get_embed(word_vocab) print("ppp", pretrained_embedding.shape) #x_train, x_dev, y_train, y_dev ,vocab_siz, pretrained_embedding= load_data(w2v_model) embedding_size = 150 with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): #需修改 ''' cnn = TextCNN( w2v_model, sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=vocab_size, embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) ''' # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it print word_vocab.size Summa = SummaRuNNer(word_vocab.size, embedding_size, pretrained_embedding) #loss_sum = tf.Variable(initial_value=0, dtype=tf.float32) global_step = tf.Variable(0, name="global_step", trainable=False) ''' optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(Summa.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) ''' train_params = tf.trainable_variables() train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize( Summa.loss, var_list=train_params) timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") batches = train_reader valid_batches = valid_reader sess.run(tf.global_variables_initializer()) #step = 0 min_eval_loss = float('Inf') #fetch_list = [Summa.xks, Summa.data_c, Summa.result] for epoch in range(FLAGS.num_epochs): step = 0 loss_sum = 0 #loss_sum.assign(value=0) #value_sum = 0 for x_batch, y_batch in batches.iter(): step += 1 feed_dict = { Summa.x: x_batch[0], Summa.y: y_batch[0], } ''' lucky_boy, lucky_girl, data_cc = sess.run(fetch_list, feed_dict) print ("lucky_boy, ", lucky_boy) print ("lucky_girl, ", lucky_girl) print ("data_cc:", data_cc) ''' sess.run(train_op, feed_dict) loss = sess.run([Summa.loss], feed_dict) predict = sess.run([Summa.y_], feed_dict) loss_sum += loss[0] #print predict #grads_and_vars = optimizer.compute_gradients(Summa.loss) #train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) #print step #print len(y_batch[0]) #print len(predict[0]) #print step % 128 #print step if step % 128 == 0 and step != 0: #print ("here") #logging.info('Epoch ' + str(epoch) + ' Loss: ' + str(loss_sum / 128.0)) print('Epoch ' + str(epoch) + ' Loss: ' + str(loss_sum / 128.0)) loss_sum = 0 if step % 512 == 0 and step != 0: eval_loss = 0 for x_batch, y_batch in valid_batches.iter(): feed_dict = { Summa.x: x_batch[0], Summa.y: y_batch[0], } loss = sess.run([Summa.loss], feed_dict) eval_loss += loss[0] print('epoch ' + str(epoch) + ' Loss in validation: ' + str(eval_loss * 1.0 / valid_reader.length)) if eval_loss < min_eval_loss: min_eval_loss = eval_loss path = saver.save(sess, checkpoint_prefix, global_step=step) print( "Saved model checkpoint to {}\n".format(path))
from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.neural_network import MLPClassifier from sklearn.model_selection import train_test_split from sklearn.model_selection import StratifiedKFold from keras.models import Model from keras.layers import Input, Dense, LSTM, Masking, Dropout, Conv1D, Conv2D, Reshape, AveragePooling1D, GlobalAveragePooling1D, BatchNormalization, Multiply from keras.optimizers import Adam from data_reader import load_data from preprocessing import fill_missing_numerical_data, get_normalizer_from_data, normalize_numerical_data, handle_categorical_data, numerical_to_categorical from callbacks import HospitalisationMetrics, CSVLogger, ModelCheckpoint # Import data. X a 25 columns, Y en a 5. ancien_data_x, ancien_data_y = load_data("C:/Users/Fabien/Documents/Covid", "ancien_data.csv") nouveau_data_x, nouveau_data_y = load_data("C:/Users/Fabien/Documents/Covid", "nouveau_data.csv") prospective_data_x, prospective_data_y = load_data( "C:/Users/Fabien/Documents/Covid", "passages_2020-01-17.csv") prospective_data2_x, prospective_data2_y = load_data( "C:/Users/Fabien/Documents/Covid", "passages_2020-03-17 - Cleaned.csv") # PREPROCESS DATA # GESTION VARIABLES NUMERIQUES ancien_data_x = fill_missing_numerical_data(ancien_data_x) nouveau_data_x = fill_missing_numerical_data(nouveau_data_x) prospective_data_x = fill_missing_numerical_data(prospective_data_x) prospective_data2_x = fill_missing_numerical_data(prospective_data2_x) # drop_numerical=False
from data_reader import load_data from sklearn.ensemble import ExtraTreesRegressor from sklearn.externals import joblib if __name__ == "__main__": (X, Y) = load_data("x_input", "y_input") regressor = joblib.load("dt.pkl") y_pred = regressor.predict(X) rows = y_pred.shape[0] cols = y_pred.shape[1] f = open("output.txt", "w") f.write(str(rows) + " " + str(cols) + "\n") for i in range(rows): for j in range(cols): f.write(str(y_pred[i][j]) + " ") f.write("\n") f.close()
def main(print): ''' Trains model from data ''' if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir) print('Created training directory' + FLAGS.train_dir) # CSV initialize pd.DataFrame(FLAGS.flag_values_dict(), index=range(1)).to_csv(FLAGS.train_dir + '/train_parameters.csv') epochs_results = initialize_epoch_data_dict() fasttext_model_path = None if FLAGS.fasttext_model_path: fasttext_model_path = FLAGS.fasttext_model_path word_vocab, char_vocab, word_tensors, char_tensors, max_word_length, words_list = \ load_data(FLAGS.data_dir, FLAGS.max_word_length, eos=FLAGS.EOS) fasttext_model = None if 'fasttext' in FLAGS.embedding: fasttext_model = FasttextModel( fasttext_path=fasttext_model_path).get_fasttext_model() train_ft_reader = DataReaderFastText( words_list=words_list, batch_size=FLAGS.batch_size, num_unroll_steps=FLAGS.num_unroll_steps, model=fasttext_model, data='train') valid_ft_reader = DataReaderFastText( words_list=words_list, batch_size=FLAGS.batch_size, num_unroll_steps=FLAGS.num_unroll_steps, model=fasttext_model, data='valid') train_reader = DataReader(word_tensors['train'], char_tensors['train'], FLAGS.batch_size, FLAGS.num_unroll_steps) valid_reader = DataReader(word_tensors['valid'], char_tensors['valid'], FLAGS.batch_size, FLAGS.num_unroll_steps) test_reader = DataReader(word_tensors['test'], char_tensors['test'], FLAGS.batch_size, FLAGS.num_unroll_steps) print('initialized all dataset readers') with tf.Graph().as_default(), tf.Session() as session: # tensorflow seed must be inside graph tf.set_random_seed(FLAGS.seed) np.random.seed(seed=FLAGS.seed) ''' build training graph ''' initializer = tf.random_uniform_initializer(-FLAGS.param_init, FLAGS.param_init) with tf.variable_scope("Model", initializer=initializer): train_model = model.inference_graph( char_vocab_size=char_vocab.size, word_vocab_size=word_vocab.size, char_embed_size=FLAGS.char_embed_size, batch_size=FLAGS.batch_size, num_highway_layers=FLAGS.highway_layers, num_rnn_layers=FLAGS.rnn_layers, rnn_size=FLAGS.rnn_size, max_word_length=max_word_length, kernels=eval(FLAGS.kernels), kernel_features=eval(FLAGS.kernel_features), num_unroll_steps=FLAGS.num_unroll_steps, dropout=FLAGS.dropout, embedding=FLAGS.embedding, fasttext_word_dim=300, acoustic_features_dim=4) train_model.update( model.loss_graph(train_model.logits, FLAGS.batch_size, FLAGS.num_unroll_steps)) train_model.update( model.training_graph(train_model.loss * FLAGS.num_unroll_steps, FLAGS.learning_rate, FLAGS.max_grad_norm)) # create saver before creating more graph nodes, so that we do not save any vars defined below saver = tf.train.Saver(max_to_keep=50) ''' build graph for validation and testing (shares parameters with the training graph!) ''' with tf.variable_scope("Model", reuse=True): valid_model = model.inference_graph( char_vocab_size=char_vocab.size, word_vocab_size=word_vocab.size, char_embed_size=FLAGS.char_embed_size, batch_size=FLAGS.batch_size, num_highway_layers=FLAGS.highway_layers, num_rnn_layers=FLAGS.rnn_layers, rnn_size=FLAGS.rnn_size, max_word_length=max_word_length, kernels=eval(FLAGS.kernels), kernel_features=eval(FLAGS.kernel_features), num_unroll_steps=FLAGS.num_unroll_steps, dropout=0.0, embedding=FLAGS.embedding, fasttext_word_dim=300, acoustic_features_dim=4) valid_model.update( model.loss_graph(valid_model.logits, FLAGS.batch_size, FLAGS.num_unroll_steps)) if FLAGS.load_model_for_training: saver.restore(session, FLAGS.load_model_for_training) string = str('Loaded model from' + str(FLAGS.load_model_for_training) + 'saved at global step' + str(train_model.global_step.eval())) print(string) else: tf.global_variables_initializer().run() session.run(train_model.clear_char_embedding_padding) string = str('Created and initialized fresh model. Size:' + str(model.model_size())) print(string) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph=session.graph) ''' take learning rate from CLI, not from saved graph ''' session.run(tf.assign(train_model.learning_rate, FLAGS.learning_rate), ) ''' training starts here ''' best_valid_loss = None rnn_state = session.run(train_model.initial_rnn_state) for epoch in range(FLAGS.max_epochs): epoch_start_time = time.time() avg_train_loss = 0.0 count = 0 if fasttext_model: iter_over = zip(train_reader.iter(), train_ft_reader.iter()) else: iter_over = train_reader.iter() for batch_kim, batch_ft in iter_over: if fasttext_model: x, y = batch_kim else: x, y = batch_kim, batch_ft count += 1 start_time = time.time() if fasttext_model: ft_vectors = fasttext_model.wv[ words_list['train'][count]].reshape( fasttext_model.wv.vector_size, 1) loss, _, rnn_state, gradient_norm, step, _, probas = session.run( [ train_model.loss, train_model.train_op, train_model.final_rnn_state, train_model.global_norm, train_model.global_step, train_model.clear_char_embedding_padding ], { train_model.input2: batch_ft, train_model.input: x, train_model.targets: y, train_model.initial_rnn_state: rnn_state }) else: loss, _, rnn_state, gradient_norm, step, _ = session.run( [ train_model.loss, train_model.train_op, train_model.final_rnn_state, train_model.global_norm, train_model.global_step, train_model.clear_char_embedding_padding ], { train_model.input: x, train_model.targets: y, train_model.initial_rnn_state: rnn_state }) avg_train_loss += 0.05 * (loss - avg_train_loss) time_elapsed = time.time() - start_time if count % FLAGS.print_every == 0: string = str( '%6d: %d [%5d/%5d], train_loss/perplexity = %6.8f/%6.7f secs/batch = %.4fs, grad.norm=%6.8f' % (step, epoch, count, train_reader.length, loss, np.exp(loss), time_elapsed, gradient_norm)) print(string) string = str('Epoch training time:' + str(time.time() - epoch_start_time)) print(string) epochs_results['epoch_training_time'].append( str(time.time() - epoch_start_time)) # epoch done: time to evaluate avg_valid_loss = 0.0 count = 0 rnn_state = session.run(valid_model.initial_rnn_state) for batch_kim, batch_ft in zip(valid_reader.iter(), valid_ft_reader.iter()): x, y = batch_kim count += 1 start_time = time.time() loss, rnn_state = session.run( [valid_model.loss, valid_model.final_rnn_state], { valid_model.input2: batch_ft, valid_model.input: x, valid_model.targets: y, valid_model.initial_rnn_state: rnn_state, }) if count % FLAGS.print_every == 0: string = str( "\t> validation loss = %6.8f, perplexity = %6.8f" % (loss, np.exp(loss))) print(string) avg_valid_loss += loss / valid_reader.length print("at the end of epoch:" + str(epoch)) epochs_results['epoch_number'].append(str(epoch)) print("train loss = %6.8f, perplexity = %6.8f" % (avg_train_loss, np.exp(avg_train_loss))) epochs_results['train_loss'].append(avg_train_loss) epochs_results['train_perplexity'].append(np.exp(avg_train_loss)) print("validation loss = %6.8f, perplexity = %6.8f" % (avg_valid_loss, np.exp(avg_valid_loss))) epochs_results['validation_loss'].append(avg_valid_loss) epochs_results['valid_perplexity'].append(np.exp(avg_valid_loss)) save_as = '%s/epoch%03d_%.4f.model' % (FLAGS.train_dir, epoch, avg_valid_loss) saver.save(session, save_as) print('Saved model' + str(save_as)) epochs_results['model_name'].append(str(save_as)) epochs_results['learning_rate'].append( str(session.run(train_model.learning_rate))) ''' write out summary events ''' summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=avg_train_loss), tf.Summary.Value(tag="train_perplexity", simple_value=np.exp(avg_train_loss)), tf.Summary.Value(tag="valid_loss", simple_value=avg_valid_loss), tf.Summary.Value(tag="valid_perplexity", simple_value=np.exp(avg_valid_loss)), ]) summary_writer.add_summary(summary, step) ''' decide if need to decay learning rate ''' if best_valid_loss is not None and np.exp(avg_valid_loss) > np.exp( best_valid_loss) - FLAGS.decay_when: print( 'validation perplexity did not improve enough, decay learning rate' ) current_learning_rate = session.run(train_model.learning_rate) string = str('learning rate was:' + str(current_learning_rate)) print(string) current_learning_rate *= FLAGS.learning_rate_decay if current_learning_rate < 1.e-3: print('learning rate too small - stopping now') break session.run( train_model.learning_rate.assign(current_learning_rate)) string = str('new learning rate is:' + str(current_learning_rate)) print(string) else: best_valid_loss = avg_valid_loss # Save model performance data pd.DataFrame(epochs_results).to_csv(FLAGS.train_dir + '/train_results.csv')
import tensorflow as tf import numpy as np import matplotlib.pyplot as plt from utils import plot_series, model_forecast from data_reader import load_data model = tf.keras.models.load_model('saved_models/conv_lstm.h5', custom_objects={'tf': tf}) print("\n M O D E L S U M M A R Y \n") print(model.summary()) series, time = load_data() split_time = 2000 time_train = time[:split_time] X_train = series[:split_time] time_valid = time[split_time:] X_valid = series[split_time:] window_size = 20 batch_size = 32 shuffle_buffer_size = 1000 # plot_series(time, series, title = "Original Data") # plt.show() print("\n Please be patient! _()_ This might take some time. \n") # forecast = [] # for time in range(len(series) - window_size):
def main(_): ''' Trains model from data ''' if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir) print('Created training directory', FLAGS.train_dir) word_vocab, char_vocab, word_tensors, char_tensors, max_word_length = \ load_data(FLAGS.data_dir, FLAGS.max_word_length, eos=FLAGS.EOS) train_reader = DataReader(word_tensors['train'], char_tensors['train'], FLAGS.batch_size, FLAGS.num_unroll_steps) valid_reader = DataReader(word_tensors['valid'], char_tensors['valid'], FLAGS.batch_size, FLAGS.num_unroll_steps) test_reader = DataReader(word_tensors['test'], char_tensors['test'], FLAGS.batch_size, FLAGS.num_unroll_steps) print('initialized all dataset readers') minimum_valid_ppl = 1000000 minimum_vl_epoch = 0 text_file = open("train_log.txt", "w") # text_file.write("Purchase Amount: %s" % TotalAmount) with tf.Graph().as_default(), tf.Session() as session: # tensorflow seed must be inside graph tf.set_random_seed(FLAGS.seed) np.random.seed(seed=FLAGS.seed) ''' build training graph ''' initializer = tf.random_uniform_initializer(-FLAGS.param_init, FLAGS.param_init) with tf.variable_scope("Model", initializer=initializer): train_model = model.inference_graph( char_vocab_size=char_vocab.size, word_vocab_size=word_vocab.size, char_embed_size=FLAGS.char_embed_size, batch_size=FLAGS.batch_size, num_highway_layers=FLAGS.highway_layers, num_rnn_layers=FLAGS.rnn_layers, rnn_size=FLAGS.rnn_size, max_word_length=max_word_length, kernels=eval(FLAGS.kernels), kernel_features=eval(FLAGS.kernel_features), num_unroll_steps=FLAGS.num_unroll_steps, dropout=FLAGS.dropout) train_model.update( model.loss_graph(train_model.logits, FLAGS.batch_size, FLAGS.num_unroll_steps)) # scaling loss by FLAGS.num_unroll_steps effectively scales gradients by the same factor. # we need it to reproduce how the original Torch code optimizes. Without this, our gradients will be # much smaller (i.e. 35 times smaller) and to get system to learn we'd have to scale learning rate and max_grad_norm appropriately. # Thus, scaling gradients so that this trainer is exactly compatible with the original train_model.update( model.training_graph(train_model.loss * FLAGS.num_unroll_steps, FLAGS.learning_rate, FLAGS.max_grad_norm)) # create saver before creating more graph nodes, so that we do not save any vars defined below saver = tf.train.Saver(max_to_keep=10) ''' build graph for validation and testing (shares parameters with the training graph!) ''' with tf.variable_scope("Model", reuse=True): valid_model = model.inference_graph( char_vocab_size=char_vocab.size, word_vocab_size=word_vocab.size, char_embed_size=FLAGS.char_embed_size, batch_size=FLAGS.batch_size, num_highway_layers=FLAGS.highway_layers, num_rnn_layers=FLAGS.rnn_layers, rnn_size=FLAGS.rnn_size, max_word_length=max_word_length, kernels=eval(FLAGS.kernels), kernel_features=eval(FLAGS.kernel_features), num_unroll_steps=FLAGS.num_unroll_steps, dropout=0.0) valid_model.update( model.loss_graph(valid_model.logits, FLAGS.batch_size, FLAGS.num_unroll_steps)) if FLAGS.load_model: saver.restore(session, FLAGS.load_model) print('Loaded model from', FLAGS.load_model, 'saved at global step', train_model.global_step.eval()) else: tf.global_variables_initializer().run() session.run(train_model.clear_char_embedding_padding) print('Created and initialized fresh model. Size:', model.model_size()) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph=session.graph) ''' take learning rate from CLI, not from saved graph ''' session.run(tf.assign(train_model.learning_rate, FLAGS.learning_rate), ) ''' training starts here ''' best_valid_loss = None rnn_state = session.run(train_model.initial_rnn_state) for epoch in range(FLAGS.max_epochs): epoch_start_time = time.time() avg_train_loss = 0.0 count = 0 for x, y in train_reader.iter(): count += 1 start_time = time.time() loss, _, rnn_state, gradient_norm, step, _ = session.run( [ train_model.loss, train_model.train_op, train_model.final_rnn_state, train_model.global_norm, train_model.global_step, train_model.clear_char_embedding_padding ], { train_model.input: x, train_model.targets: y, train_model.initial_rnn_state: rnn_state }) avg_train_loss += 0.05 * (loss - avg_train_loss) time_elapsed = time.time() - start_time if count % FLAGS.print_every == 0: print( '%6d: %d [%5d/%5d], train_loss/perplexity = %6.8f/%6.7f secs/batch = %.4fs, grad.norm=%6.8f' % (step, epoch, count, train_reader.length, loss, np.exp(loss), time_elapsed, gradient_norm)) text_file.write( '%6d: %d [%5d/%5d], train_loss/perplexity = %6.8f/%6.7f secs/batch = %.4fs, grad.norm=%6.8f \n' % (step, epoch, count, train_reader.length, loss, np.exp(loss), time_elapsed, gradient_norm)) print('Epoch training time:', time.time() - epoch_start_time) # text_file.write('Epoch training time:'+str( time.time()-epoch_start_time) # epoch done: time to evaluate avg_valid_loss = 0.0 count = 0 rnn_state = session.run(valid_model.initial_rnn_state) for x, y in valid_reader.iter(): count += 1 start_time = time.time() loss, rnn_state = session.run( [valid_model.loss, valid_model.final_rnn_state], { valid_model.input: x, valid_model.targets: y, valid_model.initial_rnn_state: rnn_state, }) if count % FLAGS.print_every == 0: print("\t> validation loss = %6.8f, perplexity = %6.8f" % (loss, np.exp(loss))) avg_valid_loss += loss / valid_reader.length print("at the end of epoch:", epoch) print("train loss = %6.8f, perplexity = %6.8f" % (avg_train_loss, np.exp(avg_train_loss))) print("validation loss = %6.8f, perplexity = %6.8f" % (avg_valid_loss, np.exp(avg_valid_loss))) text_file.write("at the end of epoch:" + str(epoch) + '\n') text_file.write("train loss = %6.8f, perplexity = %6.8f \n" % (avg_train_loss, np.exp(avg_train_loss))) text_file.write("validation loss = %6.8f, perplexity = %6.8f \n" % (avg_valid_loss, np.exp(avg_valid_loss))) if (np.exp(avg_valid_loss) < minimum_valid_ppl): minimum_valid_ppl = np.exp(avg_valid_loss) minimum_vl_epoch = epoch save_as = '%s/epoch%03d_%.4f.model' % (FLAGS.train_dir, epoch, avg_valid_loss) saver.save(session, save_as) print('Saved model', save_as) elif (epoch % 4 == 0): save_as = '%s/epoch%03d_%.4f.model' % (FLAGS.train_dir, epoch, avg_valid_loss) saver.save(session, save_as) print('Saved model', save_as) ''' write out summary events ''' summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=avg_train_loss), tf.Summary.Value(tag="valid_loss", simple_value=avg_valid_loss) ]) summary_writer.add_summary(summary, step) ''' decide if need to decay learning rate ''' if best_valid_loss is not None and np.exp(avg_valid_loss) > np.exp( best_valid_loss) - FLAGS.decay_when: print( 'validation perplexity did not improve enough, decay learning rate' ) current_learning_rate = session.run(train_model.learning_rate) print('learning rate was:', current_learning_rate) current_learning_rate *= FLAGS.learning_rate_decay if current_learning_rate < 1.e-5: print('learning rate too small - stopping now') break session.run( train_model.learning_rate.assign(current_learning_rate)) print('new learning rate is:', current_learning_rate) else: best_valid_loss = avg_valid_loss save_as = '%s/epoch%03d_%.4f.model' % (FLAGS.train_dir, epoch, avg_valid_loss) saver.save(session, save_as) print('Saved model', save_as) print("----------------------------------------------") print( "Minimum Valid PPL is attained in epoch:%d and Validation PPL is %6.8f" % (minimum_vl_epoch, minimum_valid_ppl))
def test(self): batch_size = 4 num_unroll_steps = 3 char_vocab_size = 51 max_word_length = 11 char_embed_size = 3 _, _, word_data, char_data, _ = load_data('data/', max_word_length) dataset = char_data['train'] self.assertEqual(dataset.shape, (929589, max_word_length)) reader = DataReader(word_data['train'], char_data['train'], batch_size=batch_size, num_unroll_steps=num_unroll_steps) for x, y in reader.iter(): assert x.shape == (batch_size, num_unroll_steps, max_word_length) break self.assertAllClose(X, x) with self.test_session() as session: input_ = tf.placeholder(tf.int32, shape=[batch_size, num_unroll_steps, max_word_length], name="input") ''' First, embed characters ''' with tf.variable_scope('Embedding'): char_embedding = tf.get_variable('char_embedding', [char_vocab_size, char_embed_size]) # [batch_size x max_word_length, num_unroll_steps, char_embed_size] input_embedded = tf.nn.embedding_lookup(char_embedding, input_) input_embedded = tf.reshape(input_embedded, [-1, max_word_length, char_embed_size]) session.run(tf.assign(char_embedding, EMBEDDING)) ie = session.run(input_embedded, { input_: x }) output = tdnn(input_embedded, [2], [2], scope='TDNN') out = session.run(output, { input_embedded: ie, 'TDNN/kernel_2/w:0': np.reshape(np.transpose(KERNEL_2_W), [1, 2, num_unroll_steps, 2]), 'TDNN/kernel_2/b:0': KERNEL_2_B }) out = out.reshape([batch_size, num_unroll_steps, 2]) out = out.transpose([1, 0, 2]) # torch uses time-major order self.assertAllClose(out, np.array([ [[-0.04201929, 0.02275813], [-0.04060676, 0.02283999], [-0.04333816, 0.02333505], [-0.04131923, 0.02480407]], [[-0.04124087, 0.02429205], [-0.04117644, 0.02419558], [-0.04282973, 0.02318067], [-0.04131923, 0.02480407]], [[-0.03877186, 0.0243939 ], [-0.04173752, 0.02552123], [-0.04168687, 0.02385954], [-0.04201929, 0.02454825]]])) print(out.shape) print(out) assert False