Exemple #1
0
 def test_model_fn(self):
     params = json.load(open("data/config.json"))
     train_estimator = tf.estimator.Estimator(
         model_fn = models.RNN_model_fn,
         model_dir = "model",
         params = params)
     train_estimator.train(
         lambda: models.input_fn(True, params),
         steps=1
     )
     params["batch_size"] = params["n_test_samples"]
     test_estimator = tf.estimator.Estimator(
         model_fn = models.RNN_model_fn,
         model_dir = "model",
         params = params,
         warm_start_from = "model")
     predictions = train_estimator.predict(
         lambda: models.input_fn(False, params)
     )
     cnt = 0
     for pred in predictions:
         cnt += 1
         print(cnt)
Exemple #2
0
    def run_inference(id, partition, td=False):
        features, labels = input_fn(partition, cf.eparams, id)
        prob = []
        feat = []
        exp_sens = []
        con_sens = []
        while True:
            try:
                # this will break when input_fn can't make a full 16 times 5 min input
                # todo: use smaller batch, or zero pad for missing in batch
                x, y = sess.run([features, labels])
                predictions = predict_fn({"input": x})
                prob.append(np.transpose(predictions['probabilities'][:, 1]))
                feat.append(np.transpose(predictions['features']))
                if td:
                    exp_sens.append(predictions['experimental_sensitivity'])
                    con_sens.append(predictions['control_sensitivity'])
            except:
                #print('{}: done processing {}'.format(partition, id))
                break

        features = np.reshape(np.transpose(np.asarray(feat), [0, 2, 1]),
                              [len(feat) * 4, -1])
        if not td:
            return np.argmax(y[0, :]), np.reshape(np.asarray(prob),
                                                  [-1]), features
        else:
            td_exp = np.reshape(
                np.transpose(np.asarray(exp_sens), [2, 4, 5, 0, 1, 3]),
                [2, 625, -1])
            td_con = np.reshape(
                np.transpose(np.asarray(con_sens), [2, 4, 5, 0, 1, 3]),
                [2, 625, -1])
            taylor_decomp = np.stack([td_exp, td_con], axis=3)
            return np.argmax(y[0, :]), np.reshape(
                np.asarray(prob), [-1]), features, taylor_decomp
Exemple #3
0
import tensorflow as tf
import pandas as pd

import models

print 'Reading evaluation data...'
eval_df = pd.read_csv('data/merged_eval_2016_total.csv',
                      parse_dates=['transactiondate'])
models.fillna_df(eval_df)

eval_df = models.add_outlier_column(eval_df)
eval_df = models.add_sign_column(eval_df)

model = models.logsign_classifier
results = model.evaluate(input_fn=lambda: models.input_fn(eval_df, 'logsign'),
                         steps=1)
# results = model.evaluate(input_fn=lambda: models.input_fn(eval_df_outl), steps=1)
# results2 = model.evaluate(input_fn=lambda: models.input_fn(eval_df), steps=1)

print 'Results:'
print results
print model.get_variable_names()
# models.print_dnn(model)
# print 'Logits:'
# for weight in model.get_variable_value('dnn/logits/weights').flatten():
#     print '  {: .3f}x + {: .3f}'.format(weight, model.get_variable_value('dnn/logits/biases')[0])
# print model.get_variable_value('dnn/hiddenlayer_0/weights')
# print model.get_variable_value('dnn/hiddenlayer_0/biases')

input_samples = eval_df.sample(n=20)
Exemple #4
0
import numpy as np

import models
tf.logging.set_verbosity(tf.logging.ERROR)
print 'Reading training data...'

train_df = pd.read_csv('data/merged_train_2016_total.csv',
                       parse_dates=['transactiondate'])
models.fillna_df(train_df)
err_std = train_df['logerror'].std()
err_mean = train_df['logerror'].mean()
query_outl = '(logerror >= ' + str(
    err_std + err_mean) + ') or (logerror <= ' + str(err_mean - err_std) + ')'
query_norm = '(logerror < ' + str(
    err_std + err_mean) + ') or (logerror > ' + str(err_mean - err_std) + ')'
train_df_outl = train_df.query(query_outl)
train_df_norm = train_df.query(query_norm)

#feature_columns = [
#    tf.contrib.layers.real_valued_column('taxamount', dtype=tf.float64),
#    tf.contrib.layers.real_valued_column('yearbuilt', dtype=tf.float64)
#]
model = models.dnn_regressor

print 'Training...'
for _ in range(1):
    print 'Iteration: %f' % (_ + 1)
    model.fit(input_fn=lambda: models.input_fn(train_df_outl), steps=50000)

print 'Done.'
Exemple #5
0
import tensorflow as tf
import pandas as pd
import numpy as np

import models
print 'Reading training data...'

train_df = pd.read_csv('data/merged_train_2016_total.csv', parse_dates=['transactiondate'])
models.fillna_df(train_df)
train_df = models.add_outlier_column(train_df)
train_df = models.add_sign_column(train_df)
# err_std = train_df['logerror'].std()
# err_mean = train_df['logerror'].mean()
# query_outl = '(logerror >= ' + str(err_std + err_mean) + ') or (logerror <= ' + str(err_mean - err_std)+ ')'
# query_norm = '(logerror < ' + str(err_std + err_mean) + ') and (logerror > ' + str(err_mean - err_std) + ')'
# train_df_outl = train_df.query(query_outl)
# train_df_norm = train_df.query(query_norm)

#feature_columns = [
#    tf.contrib.layers.real_valued_column('taxamount', dtype=tf.float64),
#    tf.contrib.layers.real_valued_column('yearbuilt', dtype=tf.float64)
#]
model = models.logsign_classifier

print 'Training...'
for _ in range(1):
    print 'Iteration: %f' % (_+1)
    model.fit(input_fn=lambda: models.input_fn(train_df, 'logsign'), steps=1000)

print 'Done.'
Exemple #6
0
        smoothed_loss_values = np.ones([2])
        test_losses = []
        tolerance = 1e-3
        early_stop_criterion = False
        iteration = 0
        max_steps = 2e4 #was 1e4
        train_steps = 100 #was 100
        eval_steps = 500
        min_iterations = 5
        max_iterations = max_steps // train_steps
        patience = 3
        while True:
            print('Doing iteration {} of train and eval steps.'.format(iteration))

            print('Training:')
            classifier.train(input_fn=lambda: input_fn('train', cf.eparams), steps=train_steps)

            print('Evaluation:')
            eval_results = classifier.evaluate(input_fn=lambda: input_fn('val', cf.eparams), steps=eval_steps)
            print('Evaluational loss result: {}.'.format(eval_results['loss']))

            if iteration == 0:
                l = deque(eval_results['loss']*np.ones([loss_buffer_size]))

            test_losses.append(eval_results['loss'])
            smoothed_loss_values[0] = np.mean(l)
            l.popleft()
            l.append(eval_results['loss'])
            smoothed_loss_values[1] = np.mean(l)
            iteration += 1
Exemple #7
0
import tensorflow as tf
import pandas as pd

import models

#print 'Reading evaluation data...'
eval_df = pd.read_csv('data/merged_eval_2016_total.csv', parse_dates=['transactiondate'])
models.fillna_df(eval_df)

eval_df = models.add_outlier_column(eval_df)
eval_df = models.add_sign_column(eval_df)

model = models.logsign_classifier
results = model.evaluate(input_fn=lambda: models.input_fn(eval_df, 'logsign'), steps=1)
# results = model.evaluate(input_fn=lambda: models.input_fn(eval_df_outl), steps=1)
# results2 = model.evaluate(input_fn=lambda: models.input_fn(eval_df), steps=1)

print ('Results:')
print (results)
print (model.get_variable_names())
# models.print_dnn(model)
# print 'Logits:'
# for weight in model.get_variable_value('dnn/logits/weights').flatten():
#     print '  {: .3f}x + {: .3f}'.format(weight, model.get_variable_value('dnn/logits/biases')[0])
# print model.get_variable_value('dnn/hiddenlayer_0/weights')
# print model.get_variable_value('dnn/hiddenlayer_0/biases')

input_samples = eval_df.sample(n=20)

output_samples = list(model.predict(input_fn=lambda: models.input_fn(input_samples, 'logsign'),
                                    outputs=None))
Exemple #8
0
print 'Reading evaluation data...'
eval_df = pd.read_csv('data/merged_eval_2016_total.csv',
                      parse_dates=['transactiondate'])
models.fillna_df(eval_df)
err_std = eval_df['logerror'].std()
err_mean = eval_df['logerror'].mean()
query_outl = '(logerror >= ' + str(
    err_std + err_mean) + ') or (logerror <= ' + str(err_mean - err_std) + ')'
query_norm = '(logerror < ' + str(
    err_std + err_mean) + ') or (logerror > ' + str(err_mean - err_std) + ')'
eval_df_outl = eval_df.query(query_outl)
eval_df_norm = eval_df.query(query_norm)

model = models.dnn_regressor
results = model.evaluate(input_fn=lambda: models.input_fn(eval_df_outl),
                         steps=1)
results2 = model.evaluate(input_fn=lambda: models.input_fn(eval_df), steps=1)

print 'Results:'
print results
print results2
print model.get_variable_names()
print 'Logits Layer:'
for weight in model.get_variable_value('dnn/logits/weights').flatten():
    print '  %fx + %f' % (weight,
                          model.get_variable_value('dnn/logits/biases')[0])
print model.get_variable_value('dnn/hiddenlayer_0/weights')
print model.get_variable_value('dnn/hiddenlayer_0/biases')

input_samples = eval_df_outl.sample(n=20)
Exemple #9
0
def main():
    # preprocessing
    logging.info("preprocessing data.")
    data_utils.preprocessing()
    # read config
    logging.info("loading config.")
    with open("data/config.json") as f:
        params = json.load(f)
    # define run config
    run_config = tf.estimator.RunConfig(
        model_dir = "model",
        save_summary_steps = 1,
        save_checkpoints_steps = 1
    )
    # build RNN-Search model
    logging.info("building train estimator.")
    train_estimator = tf.estimator.Estimator(
        model_fn = models.MultiRNN_model_fn,
        model_dir = "model",
        params = params,
        config = run_config
    )
    # define TrainSpec
    logging.info("defining train spec.")
    train_spec = tf.estimator.TrainSpec(
        input_fn = lambda: models.input_fn(True, params),
        max_steps = 1000
    )
    # define EarlyStoppingHook
    logging.info("defining early stopping hook")
    early_stopping_hook = hooks.EarlyStoppingHook()
    # define EvalSpec
    logging.info("defining eval spec.")
    eval_spec = tf.estimator.EvalSpec(
        input_fn = lambda: models.input_fn(False, params),
        hooks = [early_stopping_hook],
        throttle_secs = 1
    )
    # train and evaluate RNN-Search model
    logging.info("training and evaluating.")
    try:
        tf.estimator.train_and_evaluate(
            train_estimator, train_spec, eval_spec
        )
    except ValueError as e:
        logging.info("training stopped.")
    # refresh params and rebuild model
    logging.info("refreshing params.")
    refresh_params(params)
    logging.info("rebuilding test estimator.")
    test_estimator = tf.estimator.Estimator(
        model_fn = models.MultiRNN_model_fn,
        model_dir = "model",
        params = params,
        warm_start_from = "model")
    # use RNN-Search model to predict
    logging.info("predicting.")
    predictions = test_estimator.predict(
        lambda: models.input_fn(False, params)
    )
    # evaluate BELU score of the predictions
    logging.info("evaluating BELU score of predictions.")
    with open(params["tgt_test_path"]) as f:
        references = json.load(f)
    with open(params["eval_belu_path"], "w") as f:
        f.write("BELU Score: {}".format(evals.BELU(references, predictions)))
Exemple #10
0
    def train_prototypes(
        n_iter=5000,
        lr=1e1,
        lmdb=1e-3,
        lr_decay=5e-4,  # if zero removes decay entirely -> constant lmdb
        lmdb_decay=0,
        tht=1e-3,  #1e-4,  # if zero removes decay entirely -> constant lr
        yps=1e-3,
    ):
        tf.reset_default_graph()
        ckpt = tf.train.get_checkpoint_state(ckptdir)
        saver = tf.train.import_meta_graph(ckpt.model_checkpoint_path +
                                           '.meta')
        sess = tf.Session()

        features, labels = input_fn('train', params)

        saver.restore(sess, ckpt.model_checkpoint_path)
        prototype = tf.get_collection('prototype')

        X_p = prototype[0]
        y_p = prototype[1]
        logits_p = prototype[2]
        cost_p = prototype[3]
        opt_p = prototype[4]
        lr_p = prototype[5]
        lambda_p = prototype[6]
        theta_p = prototype[7]
        X_mean = prototype[8]
        Spectra = prototype[9]
        ypsilon_p = prototype[10]

        f, l = sess.run([features, labels])
        l = np.argmax(l, axis=1)
        mean = np.zeros([
            params['n_classes'], params['n_channels'], params['time_steps'], 1,
            params['n_epoch_samples'] // params['time_steps']
        ])
        spec = np.zeros([
            params['n_classes'],
            params['n_epoch_samples'] // params['time_steps'] // 2 + 1,
            params['n_channels']
        ])

        for cls in range(params['n_classes']):
            mean[cls, :, :] = np.reshape(
                np.transpose(np.mean(f[cls == l, :, :], axis=0), [1, 0]),
                [2, 1, 1, -1])
            spec[cls, :, :] = np.mean(np.abs(
                np.fft.rfft(f[cls == l, :, :], axis=1))**2,
                                      axis=0)
        spec = np.transpose(spec, [0, 2, 1])

        for iter in range(n_iter):
            decayed_lr = lr * np.exp(-iter * lr_decay) if lr_decay != 0 else lr
            decayed_lambda_p = lmdb * (np.exp(-iter * lmdb_decay) - np.exp(
                -iter * 10 * lmdb_decay)) if lmdb_decay != 0 else lmdb
            _, c = sess.run(
                [opt_p, cost_p],
                feed_dict={
                    lr_p: decayed_lr,
                    lambda_p: decayed_lambda_p,
                    X_mean: mean,
                    theta_p: tht,
                    Spectra: spec,
                    ypsilon_p: yps
                })
            if iter % 100 == 0:
                print(
                    'Iteration: {:05d} Cost = {:.9f} Learning Rate = {:.9f} Lambda = {:.9f}'
                    .format(iter, c, decayed_lr, decayed_lambda_p))
        prototypes, c = sess.run([X_p, cost_p],
                                 feed_dict={
                                     X_mean: mean,
                                     Spectra: spec
                                 })
        print(c)
        out = sess.run(logits_p)
        print(out)
        print(np.argmax(out, axis=2))

        f, ax = plt.subplots(params['n_classes'], 2)
        lim = np.max(prototypes[:])
        for i in range(params['n_classes']):
            for chan in range(params['n_channels']):
                ax[i, chan].plot(prototypes[i, chan, 0, 0, :])
                if i == 0: ax[i, chan].set_title('Control')
                if i == 1: ax[i, chan].set_title('Stroke')
                ax[i, chan].set_ylabel('EEG' + str(chan + 1))
                ax[i, chan].set_xlabel('Samples')
                ax[i, chan].set_ylim([-lim, lim])
Exemple #11
0
    with open('partitions.pkl', 'wb') as f:
        pickle.dump(DataHandler.get_partitions(), f)

config = tf.estimator.RunConfig(
    save_checkpoints_steps=params['save_checkpoint_steps'],
    save_summary_steps=params['save_summary_steps'])
model = Model('CRNN', params)

classifier = tf.estimator.Estimator(
    model_fn=lambda features, labels, mode: model(features, labels, mode),
    model_dir=ckptdir,
    config=config)

if train_model:
    train_spec = tf.estimator.TrainSpec(
        input_fn=lambda: input_fn('train', params),
        max_steps=params['train_steps'])
    eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn('val', params),
                                      steps=params['eval_steps'],
                                      throttle_secs=params['throttle_secs'])
    tf.estimator.train_and_evaluate(classifier, train_spec, eval_spec)

if evaluate_model:
    # Get predictions for test set
    eval_results = classifier.evaluate(
        input_fn=lambda: input_fn('test_sequence', params))
    print(eval_results)
    cm = np.asarray([[eval_results["tn"], eval_results["fp"]],
                     [eval_results["fn"], eval_results["tp"]]])
    print(cm)