def train_fn(): rnn_model = model.rnn() # print(model.summary()) rnn_model.compile(optimizer=Adam(lr=config.LEARNING_RATE), loss='binary_crossentropy', metrics=['accuracy']) train_padded, train_labels, test_padded, test_labels = data_preprocess.tokenizer_sequences( ) callbacks = [ tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2) ] history = rnn_model.fit(train_padded, train_labels, validation_data=(test_padded, test_labels), epochs=config.NUM_EPOCHS, verbose=2, callbacks=callbacks) rnn_model.save(f"{config.MODEL_PATH}my_model.h5") np.save(f'{config.MODEL_PATH}my_history.npy', history.history) joblib.dump(test_padded, f"{config.MODEL_PATH}test_padded.pkl") joblib.dump(test_labels, f"{config.MODEL_PATH}test_labels.pkl")
def train_fn(): train_padded, train_label_seq, valid_padded, valid_label_seq = data_preprocess.tokenizer_sequences( ) rnn_model = model.rnn() rnn_model.compile(optimizer=Adam(lr=config.LEARNING_RATE), loss='sparse_categorical_crossentropy', metrics=['accuracy']) # print(model.summary()) callbacks = [ ReduceLROnPlateau(monitor='val_loss', patience=5, cooldown=0), EarlyStopping(monitor='val_accuracy', min_delta=1e-4, patience=5) ] history = rnn_model.fit(train_padded, train_label_seq, validation_data=(valid_padded, valid_label_seq), epochs=config.NUM_EPOCHS, batch_size=config.BATCH_SIZE, verbose=2, callbacks=callbacks) rnn_model.save(f"{config.MODEL_PATH}my_model.h5") np.save(f'{config.MODEL_PATH}my_history.npy', history.history)
tf.app.flags.DEFINE_string('resource_info_file', os.path.abspath(os.path.join(os.path.dirname(__file__), '.', 'resource_info')), 'Filename containing cluster information') tf.app.flags.DEFINE_integer('max_steps', 1000000, """Number of iterations to run for each workers.""") tf.app.flags.DEFINE_integer('log_frequency', 50, """How many steps between two runop logs.""") tf.app.flags.DEFINE_integer('batch_size', 64, """Batch size""") tf.app.flags.DEFINE_boolean('sync', True, '') mnist = input_data.read_data_sets('MNIST_data', one_hot=True) # Build single-GPU rnn model single_gpu_graph = tf.Graph() with single_gpu_graph.as_default(): ops = rnn() train_op = ops['train_op'] loss = ops['loss'] acc = ops['acc'] x = ops['images'] y = ops['labels'] is_training = ops['is_training'] parallax_config = parallax.Config() ckpt_config = parallax.CheckPointConfig(ckpt_dir='parallax_ckpt', save_ckpt_steps=1) parallax_config.ckpt_config = ckpt_config sess, num_workers, worker_id, num_replicas_per_worker = parallax.parallel_run( single_gpu_graph, FLAGS.resource_info_file,
def main(_): with tf.Session() as sess: cells = get_lstm_cells(num_hidden, keep_prob) init_states = cells.zero_state(batch_size, tf.float32) outputs, final_states = rnn(rnn_inputs, cells, num_hidden[-1], num_steps, num_class, init_states) predicts = tf.argmax(outputs, -1, name='predict_op') softmax_out = tf.nn.softmax(outputs, name='softmax_op') top_k = tf.nn.top_k(softmax_out, k=k, sorted=False, name='top_k_op') with tf.variable_scope('train'): loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2( labels=labels, logits=outputs), name='loss_op') global_step = tf.Variable(0, name='global_step', trainable=False, collections=[ tf.GraphKeys.GLOBAL_VARIABLES, tf.GraphKeys.GLOBAL_STEP ]) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) train_op = optimizer.minimize(loss, global_step=global_step, name='train_op') arg_labels = tf.argmax(labels, -1) acc = tf.reduce_mean(tf.cast(tf.equal(predicts, arg_labels), tf.float32), name='acc_op') sess.run(tf.global_variables_initializer()) global_step_tensor = sess.graph.get_tensor_by_name( 'train/global_step:0') train_op = sess.graph.get_operation_by_name('train/train_op') acc_op = sess.graph.get_tensor_by_name('train/acc_op:0') loss_tensor = sess.graph.get_tensor_by_name('train/loss_op:0') print('Start training ...') loss_history = [] acc_history = [] batch_num = 30 a = datetime.now().replace(microsecond=0) for i in range(epochs): total_loss = 0 total_acc = 0 count = 0 current_states = sess.run(init_states, feed_dict={batch_size: batch_num}) for x, y in get_batches(train_encode, batch_num, num_steps): _, loss_value, acc_value, current_states = sess.run( [train_op, loss_tensor, acc_op, final_states], feed_dict={ X: x, Y: y, init_states: current_states, keep_prob: 1 }) total_loss += loss_value total_acc += acc_value count += 1 total_loss /= count total_acc /= count valid_acc = 0 count = 0 current_states = sess.run(init_states, feed_dict={batch_size: batch_num}) for x, y in get_batches(valid_encode, batch_num, num_steps): acc_value, current_states = sess.run([acc_op, final_states], feed_dict={ X: x, Y: y, init_states: current_states }) valid_acc += acc_value count += 1 valid_acc /= count print("Epochs: {}, loss: {:.4f}, acc: {:.4f}, val_acc: {:.4f}". format(i + 1, total_loss, total_acc, valid_acc)) loss_history.append(total_loss) acc_history.append([total_acc, valid_acc]) plt.plot(loss_history) plt.xlabel("epochs") plt.ylabel("BPC") plt.title("Training curve") plt.savefig("Training curve.png", dpi=100) plt.gcf().clear() acc_history = np.array(acc_history).T err_history = 1 - acc_history plt.plot(err_history[0], label='training error') plt.plot(err_history[1], label='validation error') plt.xlabel("epochs") plt.ylabel("Error rate") plt.title("Training error") plt.legend() plt.savefig("Training error.png", dpi=100) # predict 500 words seed = 'Asuka' seed_encode = np.array([vocab_to_int[c] for c in list(seed)]) seed_encode = np.concatenate((seed_encode, np.zeros(num_steps - 5))) current_states = sess.run(init_states, feed_dict={batch_size: 1}) index = 4 for i in range(500): if index == num_steps - 1: candidates, current_states = sess.run([top_k, final_states], feed_dict={ X: seed_encode[None, :], init_states: current_states }) p = candidates.values[0, index] p /= p.sum() rand_idx = np.random.choice(k, p=p) seed_encode = np.append(candidates.indices[0, index, rand_idx], np.zeros(num_steps - 1)) else: candidates = sess.run(top_k, feed_dict={ X: seed_encode[None, :], init_states: current_states }) p = candidates.values[0, index] p /= p.sum() rand_idx = np.random.choice(k, p=p) seed_encode[index + 1] = candidates.indices[0, index, rand_idx] seed += int_to_vocab[candidates.indices[0, index, rand_idx]] index = (index + 1) % num_steps print(seed) b = datetime.now().replace(microsecond=0) print("Time cost:", b - a)
def cv_train(modelname, config, kfold, lstep, n_samples, feature_weight, pkl_filename="data/adni.pkl", json_filename="conf/dataConfig.json"): """ Cross validation training process. Description: In each fold, we split data into train and validation parts. If validation loss has no improvement for continuous 10 epoches, the current fold is stop and we go to the next epoch. In each fold, the parameters would inherit from the saved `best-model` of the past fold. Thus, the first fold would take longer time, and the following folds would be faster. After cross validation is done, we load the best model, and apply it to test data. Arguments --------- modelname: string the name of chosen model class config: dictionary the configuration of training kfold: int the number of folds in cross validation lstep: int the value of step in forward prediction n_samples: int the number of samples to draw in testing stage pkl_filename: string the filename of pkl file storing "demo","dync","max_len" json_filename: string the filename of json file storing data configuration Returns ------- model parameters saved in `save` folder """ """load data and data config The data file *pkl contains three parts: {'demo','dync','max_len'} 'demo' : a list of dataframes storing patients' demographics information 'dync' : a list of dataframes storing patients' dynamic information, including continous features and diganosis 'max_len' : int, the maximum lengths of patient sequence """ adni = _pickle.load(open(pkl_filename, "rb")) dataConfig = json.load(open(json_filename)) max_len = adni["max_len"] """ record val_loss change / trends; the smallest loss value for each fold is given by the best model """ loss_curve = {} # build model graph if modelname == "rnn": model = rnn(len(dataConfig["demo_vars"]), len(dataConfig["input_x_vars"]), len(dataConfig["input_y_vars"]), max_len, config["batch_size"], config["n_h"], config["n_z"], lstep, feature_weight) elif modelname == "stocast": model = stocast(len(dataConfig["demo_vars"]), len(dataConfig["input_x_vars"]), len(dataConfig["input_y_vars"]), max_len, config["batch_size"], config["n_h"], config["n_z"], lstep, feature_weight) elif modelname == "storn": model = storn(len(dataConfig["demo_vars"]), len(dataConfig["input_x_vars"]), len(dataConfig["input_y_vars"]), max_len, config["batch_size"], config["n_h"], config["n_z"], lstep, feature_weight) elif modelname == "retain": model = retain(len(dataConfig["demo_vars"]), len(dataConfig["input_x_vars"]), len(dataConfig["input_y_vars"]), max_len, config["batch_size"], config["n_h"], config["n_z"], lstep, feature_weight) elif modelname == "tlstm": model = tlstm(len(dataConfig["demo_vars"]), len(dataConfig["input_x_vars"]), len(dataConfig["input_y_vars"]), max_len, config["batch_size"], config["n_h"], config["n_z"], lstep, feature_weight) # saving ... dirname = "save/{} {}".format( modelname, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))) if not os.path.exists(dirname): os.makedirs(dirname) with tf.Session() as sess: summary_writer = tf.summary.FileWriter( 'logs/' + datetime.now().isoformat().replace(':', '-'), sess.graph) merged = tf.summary.merge_all() saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) start = time.time() totaltime = 0 for k in range(kfold): """ if k = 0, random initialization params """ """ else, inherit previous best model's params """ if k == 0: tf.global_variables_initializer().run() else: ckpt = tf.train.get_checkpoint_state(dirname) saver.restore(sess, ckpt.model_checkpoint_path) # split into trainining, validation, testing data train_ds, valid_ds = split_data(k, kfold, adni, dataConfig, max_len) # train minVlloss = 1e10 loss_curve[k] = [] n_batchs = int(train_ds.num_examples / config["batch_size"]) # each epoch no_improvement = 0 # number of no improvements e = 0 while e < config["num_epochs"]: sess.run( tf.assign( model.lr, config["learning_rate"] * (config["decay_rate"]**e))) for b in range(n_batchs): wrap = train_ds.next_batch(config["batch_size"]) feed = { model.input_demo: wrap['input_demo'], model.input_x: wrap['input_x'], model.input_y: wrap['input_y'], model.input_dt: wrap['input_dt'], model.seqlens: wrap['seqlens'], model.mask: wrap['mask'] } _, loss, summary = sess.run( [model.train_op, model.loss, merged], feed) summary_writer.add_summary(summary, e * n_batchs + b) # validation vloss = val_loss(sess, model, valid_ds, config["batch_size"]) loss_curve[k].append(vloss) print(" |- FOLD:%d, EPOCH:%d, VLOSS:%.4f" % (k, e, vloss)) if minVlloss > vloss: minVlloss = vloss checkpoint_path = os.path.join( dirname, "best_model_k={}_e={}.ckpt".format(k, e)) saver.save(sess, checkpoint_path, global_step=e * n_batchs + k * config["num_epochs"] * n_batchs) print( " |- Best model saved to {}".format(checkpoint_path)) no_improvement = 0 else: no_improvement += 1 # if the number of improvement reaches 10, stop running if no_improvement < 10: e += 1 continue else: break end = time.time() print("|- %2d fold costs %.4f seconds.\n" % (k, end - start)) totaltime += end - start start = time.time() print("Total train time is %.4f seconds." % totaltime) # testing print("Starting testing") ckpt = tf.train.get_checkpoint_state(dirname) if ckpt: saver.restore(sess, ckpt.model_checkpoint_path) print("Loading model: ", ckpt.model_checkpoint_path) test_ds = DataSet(dataConfig, adni["demo"], adni["dync"], max_len) test_res = test(sess, model, modelname, test_ds, config["batch_size"], max_len, dataConfig["input_y_vars"], lstep, n_samples=n_samples) print("Saving test results...") """The results are saved in the following format. res = pickle.load(open(filename,'rb')) res : a list of dicts, with each dict() stores the prediction results corresponding to a specific patient res[i] : the dict for patient i, {'curr_labels','target_labels','pred_pi','target_features','pred_mu'} 'curr_labels' : a list of labels 'target_labels' : a list of target labels 'pred_pi' : a list of predictions, the length is timesteps - pred_pi[t] is a 1d array for deterministic methods, or a 2d array for stocast with size (n_samples, 3) 'target_features' : list, the length is timesteps - target_features[t] : a 1d array 'pred_mu' : list, the length is timesteps - pred_mu[t] : a 1d array for deterministic methods, or a 2d array for stocast """ dirname = "result_fw={}/{}".format(feature_weight, modelname) if not os.path.exists(dirname): os.makedirs(dirname) _pickle.dump( test_res, open( os.path.join( dirname, "lstep{}_nsamples{}_result.pkl".format(lstep, n_samples)), "wb")) _pickle.dump( loss_curve, open( os.path.join( dirname, "lstep{}_nsamples{}_losses.pkl".format(lstep, n_samples)), "wb"))
from model import rnn from tensorflow.examples.tutorials.mnist import input_data hvd.init() FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_integer( 'max_steps', 1000000, """Number of iterations to run for each workers.""") tf.app.flags.DEFINE_integer('log_frequency', 50, """How many steps between two runop logs.""") tf.app.flags.DEFINE_integer('batch_size', 32, """Batch size""") mnist = input_data.read_data_sets('MNIST_data', one_hot=True) ops = rnn(only_logits=True) logits = ops['logits'] x = ops['images'] y = ops['labels'] is_training = ops['is_training'] global_step = ops['global_step'] loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits)) loss += model.weight_decay * tf.losses.get_regularization_loss() acc = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(logits, axis=1), tf.argmax(y, axis=1)), tf.float32)) optimizer = tf.train.AdamOptimizer(learning_rate=model.learning_rate) optimizer = hvd.DistributedOptimizer(optimizer)
num_workers=args.numworkers, pin_memory=True) else: trainloader = DataLoader(data, batch_size=args.mbsize, shuffle=True, num_workers=args.numworkers) vocabSize = data.vocabularySize() embeddingSize = 300 hiddenSize = 100 momentum = 0.9 if args.type in 'rnn': print('RNN model') model = rnn(vocabSize, embeddingSize, hiddenSize).to(device) elif args.type in 'gru': print('GRU model') model = gru(vocabSize, embeddingSize, hiddenSize).to(device) elif args.type in 'lstm': print('LSTM model') model = lstm(vocabSize, embeddingSize, hiddenSize).to(device) else: print('Invalid entry for model type. Should be one of rnn, lstm or gru') assert False criterion = nn.BCEWithLogitsLoss().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=momentum, nesterov=True)
logging.info(args) ctx = mx.gpu() batch_size = args.batch_size bptt = args.bptt mx.random.seed(args.seed) # data corpus = Corpus(args.data) ntokens = len(corpus.dictionary) train_data = CorpusIter(corpus.train, batch_size, bptt) valid_data = CorpusIter(corpus.valid, batch_size, bptt) test_data = CorpusIter(corpus.test, batch_size, bptt) # model pred, states, state_names = rnn(bptt, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, batch_size, args.tied) loss = softmax_ce_loss(pred) # module module = CustomStatefulModule(loss, states, state_names=state_names, context=ctx) module.bind(data_shapes=train_data.provide_data, label_shapes=train_data.provide_label) module.init_params(initializer=mx.init.Xavier()) optimizer = mx.optimizer.create('sgd', learning_rate=args.lr, rescale_grad=1.0 / batch_size) module.init_optimizer(optimizer=optimizer)
drop = 0.3 epochs = 100 batch = 128 optimizer = 'rmsprop' seq = 5 new_words = 1000 temperature = 0.5 file = inp(text, seq) file.text_seq() x, y = file.rnn_input() rnn = rnn(text, x, y, layer1=layer, dropout=drop, epochs=epochs, batch=batch, optimizer=optimizer) rnn.define() # rnn.load() rnn.train() new = output(file.get_content(), seq=seq, words=new_words, temp=temperature) vocab, dict1, dict2 = file.get_vocab() new_text = new.generate(rnn.get_model(), vocab, dict1, dict2) # print new_text
import numpy as np np.random.RandomState(0) from model import rnn from keras.datasets import imdb from keras.preprocessing import sequence from keras import optimizers from keras.callbacks import EarlyStopping from utils import output_performance, generate_figures, get_args args = get_args() (x_train, y_train), (x_test, y_test) = imdb.load_data(path="imdb.npz", num_words=args.vocab_size, maxlen=args.maxLen) x_train = sequence.pad_sequences(x_train, maxlen=args.maxLen) x_test = sequence.pad_sequences(x_test, maxlen=args.maxLen) model = rnn(vocab_size=args.vocab_size, maxLen=args.maxLen, embedding_dim=args.embed, hidden_dim=args.hidden, output_dim=args.output, batch_size=args.batch, keep_prob=args.keep) model.compile(optimizer=optimizers.Adam(lr=args.lr), loss='binary_crossentropy', metrics=['accuracy']) print(model.summary()) history = model.fit(x_train, y_train, validation_split=args.val_split, batch_size=args.batch, epochs=args.epochs, callbacks=[EarlyStopping(monitor='val_loss')]) y_pred = model.predict(x_test) generate_figures(history=history, model_name=args.model_name, output_dir="figures") output_performance(model=model, y_test=y_test, y_pred=y_pred)
args = parser.parse_args() logging.info(args) ctx = mx.gpu() batch_size = args.batch_size bptt = args.bptt mx.random.seed(args.seed) # data corpus = Corpus(args.data) ntokens = len(corpus.dictionary) train_data = CorpusIter(corpus.train, batch_size, bptt) valid_data = CorpusIter(corpus.valid, batch_size, bptt) test_data = CorpusIter(corpus.test, batch_size, bptt) # model pred, states, state_names = rnn(bptt, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, batch_size, args.tied) loss = softmax_ce_loss(pred) # module module = CustomStatefulModule(loss, states, state_names=state_names, context=ctx) module.bind(data_shapes=train_data.provide_data, label_shapes=train_data.provide_label) module.init_params(initializer=mx.init.Xavier()) optimizer = mx.optimizer.create('sgd', learning_rate=args.lr, rescale_grad=1.0/batch_size) module.init_optimizer(optimizer=optimizer) # metric speedometer = mx.callback.Speedometer(batch_size, args.log_interval) # train logging.info("Training started ... ") for epoch in range(args.epochs):