def main(): # Import and preprocess data weather_data = input_data.import_weather_data() train, validation = input_data.preprocess_data(weather_data) input_data.write_to_csv(train, validation, PARAMS) # Instantiate estimator estimator = tf.estimator.Estimator(model_fn=model.model_fn, params=PARAMS) # Define train and eval input functions train_input_fn = lambda: input_data.input_fn( PARAMS['train_file_pattern'], PARAMS, is_train=True) eval_input_fn = lambda: input_data.input_fn( PARAMS['validation_file_pattern'], PARAMS, is_train=False) # Creae train_spec, eval_spec train_spec = tf.estimator.TrainSpec(train_input_fn) eval_spec = tf.estimator.EvalSpec(eval_input_fn) # Run training and evaluation tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) # Generate inferences from estimator train_preds = model.get_inferences(estimator, train_input_fn) validation_preds = model.get_inferences(estimator, eval_input_fn) # Compare the single-layer RNN to a baseline using MAE and R squared model.print_baseline_metrics(PARAMS) model.print_model_metrics(PARAMS, train_preds, validation_preds)
def main(): ###### load data ###### if data_name == 'sz': data, adj = load_sz_data('sz') if data_name == 'los': data, adj = load_los_data('los') time_len = data.shape[0] num_nodes = data.shape[1] data1 = np.mat(data, dtype=np.float32) #### normalization max_value = np.max(data1) data1 = data1 / max_value trainX, trainY, testX, testY = preprocess_data(data1, time_len, train_rate, seq_len, pre_len) totalbatch = int(trainX.shape[0] / batch_size) training_data_count = len(trainX) def TGCN(_X, _weights, _biases): ### cell_1 = tgcnCell(gru_units, adj, num_nodes=num_nodes) cell = tf.nn.rnn_cell.MultiRNNCell([cell_1], state_is_tuple=True) _X = tf.unstack(_X, axis=1) outputs, states = tf.nn.static_rnn(cell, _X, dtype=tf.float32) m = [] for i in outputs: o = tf.reshape(i, shape=[-1, num_nodes, gru_units]) o = tf.reshape(o, shape=[-1, gru_units]) m.append(o) last_output = m[-1] output = tf.matmul(last_output, _weights['out']) + _biases['out'] output = tf.reshape(output, shape=[-1, num_nodes, pre_len]) output = tf.transpose(output, perm=[0, 2, 1]) output = tf.reshape(output, shape=[-1, num_nodes]) return output, m, states ###### placeholders ###### inputs = tf.placeholder(tf.float32, shape=[None, seq_len, num_nodes]) labels = tf.placeholder(tf.float32, shape=[None, pre_len, num_nodes]) # Graph weights weights = { 'out': tf.Variable(tf.random_normal([gru_units, pre_len], mean=1.0), name='weight_o') } biases = {'out': tf.Variable(tf.random_normal([pre_len]), name='bias_o')} if model_name == 'tgcn': pred, ttts, ttto = TGCN(inputs, weights, biases) y_pred = pred ###### optimizer ###### lambda_loss = 0.0015 Lreg = lambda_loss * sum( tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables()) label = tf.reshape(labels, [-1, num_nodes]) ##loss loss = tf.reduce_mean(tf.nn.l2_loss(y_pred - label) + Lreg) ##rmse error = tf.sqrt(tf.reduce_mean(tf.square(y_pred - label))) optimizer = tf.train.AdamOptimizer(lr).minimize(loss) ###### Initialize session ###### variables = tf.global_variables() saver = tf.train.Saver(tf.global_variables()) #sess = tf.Session() gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) sess.run(tf.global_variables_initializer()) out = 'out/%s' % (model_name) #out = 'out/%s_%s'%(model_name,'perturbation') path1 = '%s_%s_lr%r_batch%r_unit%r_seq%r_pre%r_epoch%r' % ( model_name, data_name, lr, batch_size, gru_units, seq_len, pre_len, training_epoch) path = os.path.join(out, path1) if not os.path.exists(path): os.makedirs(path) ###### evaluation ###### def evaluation(a, b): rmse = math.sqrt(mean_squared_error(a, b)) mae = mean_absolute_error(a, b) F_norm = la.norm(a - b, 'fro') / la.norm(a, 'fro') r2 = 1 - ((a - b)**2).sum() / ((a - a.mean())**2).sum() var = 1 - (np.var(a - b)) / np.var(a) return rmse, mae, 1 - F_norm, r2, var x_axe, batch_loss, batch_rmse, batch_pred = [], [], [], [] test_loss,test_rmse,test_mae,test_acc,test_r2,test_var,test_pred = [],[],[],[],[],[],[] for epoch in range(training_epoch): for m in range(totalbatch): mini_batch = trainX[m * batch_size:(m + 1) * batch_size] mini_label = trainY[m * batch_size:(m + 1) * batch_size] _, loss1, rmse1, train_output = sess.run( [optimizer, loss, error, y_pred], feed_dict={ inputs: mini_batch, labels: mini_label }) batch_loss.append(loss1) batch_rmse.append(rmse1 * max_value) # Test completely at every epoch loss2, rmse2, test_output = sess.run([loss, error, y_pred], feed_dict={ inputs: testX, labels: testY }) test_label = np.reshape(testY, [-1, num_nodes]) rmse, mae, acc, r2_score, var_score = evaluation( test_label, test_output) test_label1 = test_label * max_value test_output1 = test_output * max_value test_loss.append(loss2) test_rmse.append(rmse * max_value) test_mae.append(mae * max_value) test_acc.append(acc) test_r2.append(r2_score) test_var.append(var_score) test_pred.append(test_output1) print('Iter:{}'.format(epoch), 'train_rmse:{:.4}'.format(batch_rmse[-1]), 'test_loss:{:.4}'.format(loss2), 'test_rmse:{:.4}'.format(rmse), 'test_acc:{:.4}'.format(acc)) if (epoch % 500 == 0): saver.save(sess, path + '/model_100/TGCN_pre_%r' % epoch, global_step=epoch) time_end = time() print(time_end - time_start, 's') ############## visualization ############### b = int(len(batch_rmse) / totalbatch) batch_rmse1 = [i for i in batch_rmse] train_rmse = [ (sum(batch_rmse1[i * totalbatch:(i + 1) * totalbatch]) / totalbatch) for i in range(b) ] batch_loss1 = [i for i in batch_loss] train_loss = [ (sum(batch_loss1[i * totalbatch:(i + 1) * totalbatch]) / totalbatch) for i in range(b) ] index = test_rmse.index(np.min(test_rmse)) test_result = test_pred[index] var = pd.DataFrame(test_result) var.to_csv(path + '/test_result.csv', index=False, header=False) #plot_result(test_result,test_label1,path) #plot_error(train_rmse,train_loss,test_rmse,test_acc,test_mae,path) print('min_rmse:%r' % (np.min(test_rmse)), 'min_mae:%r' % (test_mae[index]), 'max_acc:%r' % (test_acc[index]), 'r2:%r' % (test_r2[index]), 'var:%r' % test_var[index])
data, adj = load_los_data('los') time_len = data.shape[0] num_nodes = data.shape[1] data1 =np.mat(data,dtype=np.float32) ### Perturbation Analysis #noise = np.random.normal(0,0.2,size=data.shape) #noise = np.random.poisson(16,size=data.shape) #scaler = MinMaxScaler() #scaler.fit(noise) #noise = scaler.transform(noise) #data1 = data1 + noise #### normalization max_value = np.max(data1) data1 = data1/max_value trainX, trainY, testX, testY = preprocess_data(data1, time_len, train_rate, seq_len, pre_len) #trainX, trainY, testX, testY = preprocess_average_data(data1, time_len, train_rate, seq_len, pre_len) totalbatch = int(trainX.shape[0]/batch_size) training_data_count = len(trainX) def TGCN(_X, weights, biases): ### cell_1 = tgcnCell(gru_units, adj, num_nodes=num_nodes) cell = tf.nn.rnn_cell.MultiRNNCell([cell_1], state_is_tuple=True) _X = tf.unstack(_X, axis=1) # print(_X) outputs, states = tf.nn.static_rnn(cell, _X, dtype=tf.float32) # print('000',outputs) # out = outputs[-1]