def gain(data_x, gain_parameters): '''Impute missing values in data_x Args: - data_x: original data with missing values - gain_parameters: GAIN network parameters: - batch_size: Batch size - hint_rate: Hint rate - alpha: Hyperparameter - iterations: Iterations Returns: - imputed_data: imputed data ''' # Define mask matrix data_m = 1 - np.isnan(data_x) # System parameters batch_size = gain_parameters['batch_size'] hint_rate = gain_parameters['hint_rate'] alpha = gain_parameters['alpha'] iterations = gain_parameters['iterations'] # Other parameters no, dim = data_x.shape # Hidden state dimensions h_dim = int(dim) # Normalization norm_data, norm_parameters = normalization(data_x) norm_data_x = np.nan_to_num(norm_data, 0) ## GAIN architecture # Input placeholders # Data vector tf.disable_v2_behavior() X = tf.placeholder(tf.float32, shape=[None, dim]) # Mask vector M = tf.placeholder(tf.float32, shape=[None, dim]) # Hint vector H = tf.placeholder(tf.float32, shape=[None, dim]) # Discriminator variables D_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) # Data + Hint as inputs D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, h_dim])) D_b2 = tf.Variable(tf.zeros(shape=[h_dim])) D_W3 = tf.Variable(xavier_init([h_dim, dim])) D_b3 = tf.Variable(tf.zeros(shape=[dim])) # Multi-variate outputs theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3] #Generator variables # Data + Mask as inputs (Random noise is in missing components) G_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, h_dim])) G_b2 = tf.Variable(tf.zeros(shape=[h_dim])) G_W3 = tf.Variable(xavier_init([h_dim, dim])) G_b3 = tf.Variable(tf.zeros(shape=[dim])) theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3] ## GAIN functions # Generator def generator(x, m): # Concatenate Mask and Data inputs = tf.concat(values=[x, m], axis=1) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2) # MinMax normalized output G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3) return G_prob # Discriminator def discriminator(x, h): # Concatenate Data and Hint inputs = tf.concat(values=[x, h], axis=1) D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2) D_logit = tf.matmul(D_h2, D_W3) + D_b3 D_prob = tf.nn.sigmoid(D_logit) return D_prob ## GAIN structure # Generator G_sample = generator(X, M) # Combine with observed data Hat_X = X * M + G_sample * (1 - M) # Discriminator D_prob = discriminator(Hat_X, H) ## GAIN loss D_loss_temp = -tf.reduce_mean(M * tf.log(D_prob + 1e-8) \ + (1-M) * tf.log(1. - D_prob + 1e-8)) G_loss_temp = -tf.reduce_mean((1 - M) * tf.log(D_prob + 1e-8)) MSE_loss = \ tf.reduce_mean((M * X - M * G_sample)**2) / tf.reduce_mean(M) D_loss = D_loss_temp G_loss = G_loss_temp + alpha * MSE_loss ## GAIN solver D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D) G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G) ## Iterations sess = tf.Session() sess.run(tf.global_variables_initializer()) # Start Iterations for it in tqdm(range(iterations)): # Sample batch batch_idx = sample_batch_index(no, batch_size) X_mb = norm_data_x[batch_idx, :] M_mb = data_m[batch_idx, :] # Sample random vectors Z_mb = uniform_sampler(0, 0.01, batch_size, dim) # Sample hint vectors H_mb_temp = binary_sampler(hint_rate, batch_size, dim) H_mb = M_mb * H_mb_temp # Combine random vectors with observed vectors X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb _, D_loss_curr = sess.run([D_solver, D_loss_temp], feed_dict={ M: M_mb, X: X_mb, H: H_mb }) _, G_loss_curr, MSE_loss_curr = \ sess.run([G_solver, G_loss_temp, MSE_loss], feed_dict = {X: X_mb, M: M_mb, H: H_mb}) ## Return imputed data Z_mb = uniform_sampler(0, 0.01, no, dim) M_mb = data_m X_mb = norm_data_x X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb imputed_data = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0] imputed_data = data_m * norm_data_x + (1 - data_m) * imputed_data # Renormalization imputed_data = renormalization(imputed_data, norm_parameters) # Rounding imputed_data = rounding(imputed_data, data_x) return imputed_data
import os import sys import tensorflow._api.v2.compat.v1 as tf tf.disable_v2_behavior() model_name = sys.argv[1] input_count = int(sys.argv[2]) value_output_count = int(sys.argv[3]) action_output_count = int(sys.argv[4]) path_to_store = sys.argv[5] seed = int(sys.argv[6]) print("INITIALIZING TF MODEL WITH SEED" + str(seed)) hidden_count_1 = 128 Q_output_count = value_output_count output_count = Q_output_count + action_output_count tf.reset_default_graph() tf.random.set_random_seed(seed) Relu = tf.nn.relu Tanh = tf.nn.tanh BatchNormalization = tf.layers.batch_normalization
def main(trainModel=True, buildConfusionMatrix=True, restore=False, buildClassifiedMatrix=True): tf.disable_v2_behavior() input_images = tf.placeholder(tf.float32, [None, 28, 28], name="Input") real = tf.placeholder(tf.float32, [None, CLASSES], name="real_classes") layer1 = create_conv_layer(tf.reshape(input_images, [-1, 28, 28, 1]), 1, 28, [5, 5], [2, 2], name="conv_no_pool") layer2 = create_conv_layer(layer1, 28, 56, [5, 5], [2, 2], name='conv_with_pool') conv_result = tf.reshape(layer2, [-1, 7 * 7 * 56]) relu_layer_weight = tf.Variable(tf.truncated_normal([7 * 7 * 56, 1000], stddev=STDDEV * 2), name='relu_layer_weight') rely_layer_bias = tf.Variable(tf.truncated_normal([1000], stddev=STDDEV / 2), name='rely_layer_bias') relu_layer = tf.matmul(conv_result, relu_layer_weight) + rely_layer_bias relu_layer = tf.nn.relu(relu_layer) relu_layer = tf.nn.dropout(relu_layer, DROPOUT) final_layer_weight = tf.Variable(tf.truncated_normal([1000, CLASSES], stddev=STDDEV * 2), name='final_layer_weight') final_layer_bias = tf.Variable(tf.truncated_normal([CLASSES], stddev=STDDEV / 2), name='final_layer_bias') final_layer = tf.matmul(relu_layer, final_layer_weight) + final_layer_bias predicts = tf.nn.softmax(final_layer) predicts_for_log = tf.clip_by_value(predicts, 1e-9, 0.999999999) #crossEntropy = -tf.reduce_mean(tf.reduce_sum(y * tf.log(y_clipped) + (1 - y) * tf.log(1 - y_clipped), axis=1)) loss = -tf.reduce_mean( tf.reduce_sum(real * tf.log(predicts_for_log) + (1 - real) * tf.log(1 - predicts_for_log), axis=1), axis=0) #test = tf.reduce_sum(real * tf.log(predicts_for_log) + (1 - real) * tf.log(1 - predicts_for_log), axis=1) #loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=final_layer, labels=real)) optimiser = tf.train.GradientDescentOptimizer( learning_rate=LEARNING_RATE).minimize(loss) correct_prediction = tf.equal(tf.argmax(real, axis=1), tf.argmax(predicts, axis=1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) confusion_matrix = tf.confusion_matrix(labels=tf.argmax(real, axis=1), predictions=tf.argmax(predicts, axis=1), num_classes=CLASSES) saver = tf.train.Saver() # dataset = get_mnist_dataset() dataset = get_fashion_dataset() with tf.Session() as session: session.run(tf.global_variables_initializer()) if restore: saver.restore(session, SAVE_PATH) if trainModel: train(input_images, real, session, optimiser, loss, accuracy, saver, dataset) if buildConfusionMatrix: test_cm = session.run(confusion_matrix, feed_dict={ input_images: dataset.test_x, real: dataset.test_y }) draw_confusion_matrix(test_cm) if buildClassifiedMatrix: all_probs = session.run(predicts, feed_dict={ input_images: dataset.test_x, real: dataset.test_y }) max_failure_picture_index = [[(-1, -1.0)] * CLASSES for _ in range(CLASSES)] for i in range(len(all_probs)): real = np.argmax(dataset.test_y[i]) for j in range(CLASSES): if max_failure_picture_index[real][j][1] < all_probs[i][j]: max_failure_picture_index[real][j] = (i, all_probs[i][j]) draw_max_failure_pictures(dataset.test_x, max_failure_picture_index)