def test_time_series(self): import numpy as np ob = rnnmed.data.observations.Observations() data = [[["A"], ["B"]], [["C"], ["D"]]] for item in data: ob.add(item, 1) o_gen = observations.time_observation_generator(ob, n_visits=2) a_gen = observations.time_observation_generator(ob, n_visits=2) b_gen = observations.time_observation_generator(ob, n_visits=2) timeseries = rnnmed.data.io.read_time_series( open("test_data/synthetic_control.txt")) c_gen = rnnmed.data.concatenate_generator( [a_gen, b_gen], concat=lambda x: np.concatenate(x, axis=2)) for x, y in c_gen: print(x) print(y) print(timeseries[0]) generator = ts.timeseries_generator(timeseries) x_a, y_a = rnnmed.data.generate_time_batch(generator, batch_size=2) x_b, _ = rnnmed.data.generate_time_batch(o_gen, batch_size=2) print(x_a)
def test_time_series_observation(self): def week_agg(date): return date.year, date.isocalendar()[1] def month_agg(date): return date.year, date.month ob = rnnmed.data.io.read_time_series_observation( open("/mnt/veracrypt1/EHR_DATA/L270-90-raw-measurements.csv"), min_sparsity=0.1) import random random.seed(10) random.shuffle(ob) n_visits = 10 generator = observations.time_observation_generator(ob, n_visits=n_visits) print(len(ob), ob.n_features) from rnnmed.visit2visit import visit2visit visit2visit(generator, n_features=ob.n_features, n_labels=ob.n_labels, n_timesteps=n_visits, n_hidden=128, max_iter=1000)
def test_time_series_observation(self): ob = rnnmed.data.io.read_time_series_observation( open("/home/isak/D611-90-raw-measurements.csv"), min_sparsity=0.4) import random random.seed(10) random.shuffle(ob) n_labels = ob.n_labels n_features = ob.n_features n_timesteps = 10 generator = observations.time_observation_generator( ob, n_visits=n_timesteps) x_data, y_data = rnnmed.data.make_time_input_output_arrays(generator) print(x_data.shape) print(y_data.shape) aucs = [] skf = StratifiedKFold(n_splits=10, shuffle=True) for fold, (train, test) in enumerate( skf.split(np.zeros(x_data.shape[1]), y_data)): x_train = x_data[:, train, :] y_train = y_data[train, :] x_test = x_data[:, test, :] y_test = y_data[test, :] graph = tf.Graph() with graph.as_default(): X = tf.placeholder(tf.float32, shape=[n_timesteps, None, n_features]) y = tf.placeholder(tf.int32, shape=[None]) drop_prob = tf.placeholder_with_default(1.0, shape=()) hp = HistoryPredictor(X, tf.one_hot(y, depth=n_labels), drop_prob) init = tf.global_variables_initializer() with tf.Session(graph=graph) as sess: sess.run(init) for epoch in range(1000): for idx in batch_generator(x_train.shape[1], 32): _, _loss = sess.run( [hp.optimize, hp.loss], feed_dict={ X: x_train[:, idx, :], y: y_train[idx, :].reshape(-1), drop_prob: 0.4 }) if epoch % 25 == 0: print("Fold {}, epoch {} loss: {}".format( fold, epoch, _loss)) y_pred = sess.run(hp.prediction, feed_dict={X: x_test}) auc = roc_auc_score(y_test.reshape(-1), y_pred[:, 1]) print(auc) aucs.append(auc) print("mean auc:", np.mean(aucs))
def test_med_2_vec_predict(self): observations = rnnmed.data.io.read_labeled_observations( "test_data/mimic_demo.seq") generator = ob.time_observation_generator(observations, n_visits=15) np.set_printoptions(suppress=True) visit2visit.visit2visit(generator, n_labels=observations.n_labels, n_features=observations.n_features, n_timesteps=15, transform=None)