def test_model_fn(self): params = json.load(open("data/config.json")) train_estimator = tf.estimator.Estimator( model_fn = models.RNN_model_fn, model_dir = "model", params = params) train_estimator.train( lambda: models.input_fn(True, params), steps=1 ) params["batch_size"] = params["n_test_samples"] test_estimator = tf.estimator.Estimator( model_fn = models.RNN_model_fn, model_dir = "model", params = params, warm_start_from = "model") predictions = train_estimator.predict( lambda: models.input_fn(False, params) ) cnt = 0 for pred in predictions: cnt += 1 print(cnt)
def run_inference(id, partition, td=False): features, labels = input_fn(partition, cf.eparams, id) prob = [] feat = [] exp_sens = [] con_sens = [] while True: try: # this will break when input_fn can't make a full 16 times 5 min input # todo: use smaller batch, or zero pad for missing in batch x, y = sess.run([features, labels]) predictions = predict_fn({"input": x}) prob.append(np.transpose(predictions['probabilities'][:, 1])) feat.append(np.transpose(predictions['features'])) if td: exp_sens.append(predictions['experimental_sensitivity']) con_sens.append(predictions['control_sensitivity']) except: #print('{}: done processing {}'.format(partition, id)) break features = np.reshape(np.transpose(np.asarray(feat), [0, 2, 1]), [len(feat) * 4, -1]) if not td: return np.argmax(y[0, :]), np.reshape(np.asarray(prob), [-1]), features else: td_exp = np.reshape( np.transpose(np.asarray(exp_sens), [2, 4, 5, 0, 1, 3]), [2, 625, -1]) td_con = np.reshape( np.transpose(np.asarray(con_sens), [2, 4, 5, 0, 1, 3]), [2, 625, -1]) taylor_decomp = np.stack([td_exp, td_con], axis=3) return np.argmax(y[0, :]), np.reshape( np.asarray(prob), [-1]), features, taylor_decomp
import tensorflow as tf import pandas as pd import models print 'Reading evaluation data...' eval_df = pd.read_csv('data/merged_eval_2016_total.csv', parse_dates=['transactiondate']) models.fillna_df(eval_df) eval_df = models.add_outlier_column(eval_df) eval_df = models.add_sign_column(eval_df) model = models.logsign_classifier results = model.evaluate(input_fn=lambda: models.input_fn(eval_df, 'logsign'), steps=1) # results = model.evaluate(input_fn=lambda: models.input_fn(eval_df_outl), steps=1) # results2 = model.evaluate(input_fn=lambda: models.input_fn(eval_df), steps=1) print 'Results:' print results print model.get_variable_names() # models.print_dnn(model) # print 'Logits:' # for weight in model.get_variable_value('dnn/logits/weights').flatten(): # print ' {: .3f}x + {: .3f}'.format(weight, model.get_variable_value('dnn/logits/biases')[0]) # print model.get_variable_value('dnn/hiddenlayer_0/weights') # print model.get_variable_value('dnn/hiddenlayer_0/biases') input_samples = eval_df.sample(n=20)
import numpy as np import models tf.logging.set_verbosity(tf.logging.ERROR) print 'Reading training data...' train_df = pd.read_csv('data/merged_train_2016_total.csv', parse_dates=['transactiondate']) models.fillna_df(train_df) err_std = train_df['logerror'].std() err_mean = train_df['logerror'].mean() query_outl = '(logerror >= ' + str( err_std + err_mean) + ') or (logerror <= ' + str(err_mean - err_std) + ')' query_norm = '(logerror < ' + str( err_std + err_mean) + ') or (logerror > ' + str(err_mean - err_std) + ')' train_df_outl = train_df.query(query_outl) train_df_norm = train_df.query(query_norm) #feature_columns = [ # tf.contrib.layers.real_valued_column('taxamount', dtype=tf.float64), # tf.contrib.layers.real_valued_column('yearbuilt', dtype=tf.float64) #] model = models.dnn_regressor print 'Training...' for _ in range(1): print 'Iteration: %f' % (_ + 1) model.fit(input_fn=lambda: models.input_fn(train_df_outl), steps=50000) print 'Done.'
import tensorflow as tf import pandas as pd import numpy as np import models print 'Reading training data...' train_df = pd.read_csv('data/merged_train_2016_total.csv', parse_dates=['transactiondate']) models.fillna_df(train_df) train_df = models.add_outlier_column(train_df) train_df = models.add_sign_column(train_df) # err_std = train_df['logerror'].std() # err_mean = train_df['logerror'].mean() # query_outl = '(logerror >= ' + str(err_std + err_mean) + ') or (logerror <= ' + str(err_mean - err_std)+ ')' # query_norm = '(logerror < ' + str(err_std + err_mean) + ') and (logerror > ' + str(err_mean - err_std) + ')' # train_df_outl = train_df.query(query_outl) # train_df_norm = train_df.query(query_norm) #feature_columns = [ # tf.contrib.layers.real_valued_column('taxamount', dtype=tf.float64), # tf.contrib.layers.real_valued_column('yearbuilt', dtype=tf.float64) #] model = models.logsign_classifier print 'Training...' for _ in range(1): print 'Iteration: %f' % (_+1) model.fit(input_fn=lambda: models.input_fn(train_df, 'logsign'), steps=1000) print 'Done.'
smoothed_loss_values = np.ones([2]) test_losses = [] tolerance = 1e-3 early_stop_criterion = False iteration = 0 max_steps = 2e4 #was 1e4 train_steps = 100 #was 100 eval_steps = 500 min_iterations = 5 max_iterations = max_steps // train_steps patience = 3 while True: print('Doing iteration {} of train and eval steps.'.format(iteration)) print('Training:') classifier.train(input_fn=lambda: input_fn('train', cf.eparams), steps=train_steps) print('Evaluation:') eval_results = classifier.evaluate(input_fn=lambda: input_fn('val', cf.eparams), steps=eval_steps) print('Evaluational loss result: {}.'.format(eval_results['loss'])) if iteration == 0: l = deque(eval_results['loss']*np.ones([loss_buffer_size])) test_losses.append(eval_results['loss']) smoothed_loss_values[0] = np.mean(l) l.popleft() l.append(eval_results['loss']) smoothed_loss_values[1] = np.mean(l) iteration += 1
import tensorflow as tf import pandas as pd import models #print 'Reading evaluation data...' eval_df = pd.read_csv('data/merged_eval_2016_total.csv', parse_dates=['transactiondate']) models.fillna_df(eval_df) eval_df = models.add_outlier_column(eval_df) eval_df = models.add_sign_column(eval_df) model = models.logsign_classifier results = model.evaluate(input_fn=lambda: models.input_fn(eval_df, 'logsign'), steps=1) # results = model.evaluate(input_fn=lambda: models.input_fn(eval_df_outl), steps=1) # results2 = model.evaluate(input_fn=lambda: models.input_fn(eval_df), steps=1) print ('Results:') print (results) print (model.get_variable_names()) # models.print_dnn(model) # print 'Logits:' # for weight in model.get_variable_value('dnn/logits/weights').flatten(): # print ' {: .3f}x + {: .3f}'.format(weight, model.get_variable_value('dnn/logits/biases')[0]) # print model.get_variable_value('dnn/hiddenlayer_0/weights') # print model.get_variable_value('dnn/hiddenlayer_0/biases') input_samples = eval_df.sample(n=20) output_samples = list(model.predict(input_fn=lambda: models.input_fn(input_samples, 'logsign'), outputs=None))
print 'Reading evaluation data...' eval_df = pd.read_csv('data/merged_eval_2016_total.csv', parse_dates=['transactiondate']) models.fillna_df(eval_df) err_std = eval_df['logerror'].std() err_mean = eval_df['logerror'].mean() query_outl = '(logerror >= ' + str( err_std + err_mean) + ') or (logerror <= ' + str(err_mean - err_std) + ')' query_norm = '(logerror < ' + str( err_std + err_mean) + ') or (logerror > ' + str(err_mean - err_std) + ')' eval_df_outl = eval_df.query(query_outl) eval_df_norm = eval_df.query(query_norm) model = models.dnn_regressor results = model.evaluate(input_fn=lambda: models.input_fn(eval_df_outl), steps=1) results2 = model.evaluate(input_fn=lambda: models.input_fn(eval_df), steps=1) print 'Results:' print results print results2 print model.get_variable_names() print 'Logits Layer:' for weight in model.get_variable_value('dnn/logits/weights').flatten(): print ' %fx + %f' % (weight, model.get_variable_value('dnn/logits/biases')[0]) print model.get_variable_value('dnn/hiddenlayer_0/weights') print model.get_variable_value('dnn/hiddenlayer_0/biases') input_samples = eval_df_outl.sample(n=20)
def main(): # preprocessing logging.info("preprocessing data.") data_utils.preprocessing() # read config logging.info("loading config.") with open("data/config.json") as f: params = json.load(f) # define run config run_config = tf.estimator.RunConfig( model_dir = "model", save_summary_steps = 1, save_checkpoints_steps = 1 ) # build RNN-Search model logging.info("building train estimator.") train_estimator = tf.estimator.Estimator( model_fn = models.MultiRNN_model_fn, model_dir = "model", params = params, config = run_config ) # define TrainSpec logging.info("defining train spec.") train_spec = tf.estimator.TrainSpec( input_fn = lambda: models.input_fn(True, params), max_steps = 1000 ) # define EarlyStoppingHook logging.info("defining early stopping hook") early_stopping_hook = hooks.EarlyStoppingHook() # define EvalSpec logging.info("defining eval spec.") eval_spec = tf.estimator.EvalSpec( input_fn = lambda: models.input_fn(False, params), hooks = [early_stopping_hook], throttle_secs = 1 ) # train and evaluate RNN-Search model logging.info("training and evaluating.") try: tf.estimator.train_and_evaluate( train_estimator, train_spec, eval_spec ) except ValueError as e: logging.info("training stopped.") # refresh params and rebuild model logging.info("refreshing params.") refresh_params(params) logging.info("rebuilding test estimator.") test_estimator = tf.estimator.Estimator( model_fn = models.MultiRNN_model_fn, model_dir = "model", params = params, warm_start_from = "model") # use RNN-Search model to predict logging.info("predicting.") predictions = test_estimator.predict( lambda: models.input_fn(False, params) ) # evaluate BELU score of the predictions logging.info("evaluating BELU score of predictions.") with open(params["tgt_test_path"]) as f: references = json.load(f) with open(params["eval_belu_path"], "w") as f: f.write("BELU Score: {}".format(evals.BELU(references, predictions)))
def train_prototypes( n_iter=5000, lr=1e1, lmdb=1e-3, lr_decay=5e-4, # if zero removes decay entirely -> constant lmdb lmdb_decay=0, tht=1e-3, #1e-4, # if zero removes decay entirely -> constant lr yps=1e-3, ): tf.reset_default_graph() ckpt = tf.train.get_checkpoint_state(ckptdir) saver = tf.train.import_meta_graph(ckpt.model_checkpoint_path + '.meta') sess = tf.Session() features, labels = input_fn('train', params) saver.restore(sess, ckpt.model_checkpoint_path) prototype = tf.get_collection('prototype') X_p = prototype[0] y_p = prototype[1] logits_p = prototype[2] cost_p = prototype[3] opt_p = prototype[4] lr_p = prototype[5] lambda_p = prototype[6] theta_p = prototype[7] X_mean = prototype[8] Spectra = prototype[9] ypsilon_p = prototype[10] f, l = sess.run([features, labels]) l = np.argmax(l, axis=1) mean = np.zeros([ params['n_classes'], params['n_channels'], params['time_steps'], 1, params['n_epoch_samples'] // params['time_steps'] ]) spec = np.zeros([ params['n_classes'], params['n_epoch_samples'] // params['time_steps'] // 2 + 1, params['n_channels'] ]) for cls in range(params['n_classes']): mean[cls, :, :] = np.reshape( np.transpose(np.mean(f[cls == l, :, :], axis=0), [1, 0]), [2, 1, 1, -1]) spec[cls, :, :] = np.mean(np.abs( np.fft.rfft(f[cls == l, :, :], axis=1))**2, axis=0) spec = np.transpose(spec, [0, 2, 1]) for iter in range(n_iter): decayed_lr = lr * np.exp(-iter * lr_decay) if lr_decay != 0 else lr decayed_lambda_p = lmdb * (np.exp(-iter * lmdb_decay) - np.exp( -iter * 10 * lmdb_decay)) if lmdb_decay != 0 else lmdb _, c = sess.run( [opt_p, cost_p], feed_dict={ lr_p: decayed_lr, lambda_p: decayed_lambda_p, X_mean: mean, theta_p: tht, Spectra: spec, ypsilon_p: yps }) if iter % 100 == 0: print( 'Iteration: {:05d} Cost = {:.9f} Learning Rate = {:.9f} Lambda = {:.9f}' .format(iter, c, decayed_lr, decayed_lambda_p)) prototypes, c = sess.run([X_p, cost_p], feed_dict={ X_mean: mean, Spectra: spec }) print(c) out = sess.run(logits_p) print(out) print(np.argmax(out, axis=2)) f, ax = plt.subplots(params['n_classes'], 2) lim = np.max(prototypes[:]) for i in range(params['n_classes']): for chan in range(params['n_channels']): ax[i, chan].plot(prototypes[i, chan, 0, 0, :]) if i == 0: ax[i, chan].set_title('Control') if i == 1: ax[i, chan].set_title('Stroke') ax[i, chan].set_ylabel('EEG' + str(chan + 1)) ax[i, chan].set_xlabel('Samples') ax[i, chan].set_ylim([-lim, lim])
with open('partitions.pkl', 'wb') as f: pickle.dump(DataHandler.get_partitions(), f) config = tf.estimator.RunConfig( save_checkpoints_steps=params['save_checkpoint_steps'], save_summary_steps=params['save_summary_steps']) model = Model('CRNN', params) classifier = tf.estimator.Estimator( model_fn=lambda features, labels, mode: model(features, labels, mode), model_dir=ckptdir, config=config) if train_model: train_spec = tf.estimator.TrainSpec( input_fn=lambda: input_fn('train', params), max_steps=params['train_steps']) eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn('val', params), steps=params['eval_steps'], throttle_secs=params['throttle_secs']) tf.estimator.train_and_evaluate(classifier, train_spec, eval_spec) if evaluate_model: # Get predictions for test set eval_results = classifier.evaluate( input_fn=lambda: input_fn('test_sequence', params)) print(eval_results) cm = np.asarray([[eval_results["tn"], eval_results["fp"]], [eval_results["fn"], eval_results["tp"]]]) print(cm)