def gain_test(data_test, sess, G_sample, X, M): data_m_test = 1 - np.isnan(data_test) no_test, dim_test = data_test.shape norm_data_t, norm_parameters_test = normalization(data_test) norm_data_test = np.nan_to_num(norm_data_t, 0) # Prepare data format Z_mb_test = uniform_sampler(0, 0.01, no_test, dim_test) M_mb_test = data_m_test X_mb_test = norm_data_test X_mb_test = M_mb_test * X_mb_test + (1 - M_mb_test) * Z_mb_test # Impute data test imputed_data_test = sess.run([G_sample], feed_dict={ X: X_mb_test, M: M_mb_test })[0] imputed_data_test = data_m_test * norm_data_test + ( 1 - data_m_test) * imputed_data_test # Renormalization imputed_data_test = renormalization(imputed_data_test, norm_parameters_test) # Rounding imputed_data_test = rounding(imputed_data_test, data_test) return imputed_data_test
def test(data_m, data_x, dim, generator, no, norm_data_x, norm_parameters, ori_data_x, test_index): # Return imputed data Z_mb = uniform_sampler(0, 0.01, no, dim) M_mb = data_m X_mb = norm_data_x X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb imputed_data = generator(torch.Tensor(X_mb), torch.Tensor(M_mb)).detach().numpy() imputed_data = data_m * norm_data_x + (1 - data_m) * imputed_data # Renormalization imputed_data = renormalization(imputed_data, norm_parameters) # Rounding imputed_data = rounding(imputed_data, data_x) rmse, rmse_mean = rmse_loss(ori_data_x[test_index], imputed_data[test_index], data_m[test_index]) rmse_full, rmse_full_mean = rmse_loss(ori_data_x, imputed_data, data_m) print(f'RMSE Performance (mean): {rmse_mean:.4f} (test), {rmse_full_mean:.4f} (full).') # print(f'RMSE Performance: {rmse:.4f} (test), {rmse_full:.4f} (full).') return rmse
def gain(data_x, feature_name, onehotencoder, ori_data_dim, gain_parameters): '''Impute missing values in data_x Args: - data_x: original data with missing values - feature_name: feature namelist of original data - onehotencoder: onehotencoder of this data - ori_data_dim: dimensions of original data - gain_parameters: GAIN network parameters: - data_name: the file name of dataset - batch_size: Batch size - hint_rate: Hint rate - alpha: Hyperparameter - iterations: Iterations - onehot: the number of feature for onehot encoder (start from first feature) - predict: option for prediction mode Returns: - imputed_data: imputed data ''' # Define mask matrix data_m = 1 - np.isnan(data_x) # System parameters data_name = gain_parameters['data_name'] batch_size = gain_parameters['batch_size'] hint_rate = gain_parameters['hint_rate'] alpha = gain_parameters['alpha'] iterations = gain_parameters['iterations'] onehot = gain_parameters['onehot'] predict = gain_parameters['predict'] # Model Path model_path = 'model/' + data_name # Other parameters no, dim = data_x.shape # Hidden state dimensions h_dim = int(dim) # Normalization norm_data, norm_parameters = normalization(data_x) norm_data_x = np.nan_to_num(norm_data, 0) ## GAIN architecture # Input placeholders # Data vector q X = tf.placeholder(tf.float32, shape=[None, dim], name='X') # Mask vector M = tf.placeholder(tf.float32, shape=[None, dim], name='M') # Hint vector H = tf.placeholder(tf.float32, shape=[None, dim], name='H') # Discriminator variables D_W1 = tf.Variable(xavier_init([dim * 2, h_dim]), name='D_W1') # Data + Hint as inputs D_b1 = tf.Variable(tf.zeros(shape=[h_dim]), name='D_b1') D_W2 = tf.Variable(xavier_init([h_dim, h_dim]), name='D_W2') D_b2 = tf.Variable(tf.zeros(shape=[h_dim]), name='D_b2') D_W3 = tf.Variable(xavier_init([h_dim, dim]), name='D_W3') D_b3 = tf.Variable(tf.zeros(shape=[dim]), name='D_b3') # Multi-variate outputs theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3] #Generator variables # Data + Mask as inputs (Random noise is in missing components) G_W1 = tf.Variable(xavier_init([dim * 2, h_dim]), name='G_W1') G_b1 = tf.Variable(tf.zeros(shape=[h_dim]), name='G_b1') G_W2 = tf.Variable(xavier_init([h_dim, h_dim]), name='G_W2') G_b2 = tf.Variable(tf.zeros(shape=[h_dim]), name='G_b2') G_W3 = tf.Variable(xavier_init([h_dim, dim]), name='G_W3') G_b3 = tf.Variable(tf.zeros(shape=[dim]), name='G_b3') theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3] ## GAIN functions # Generator def generator(x, m): # Concatenate Mask and Data inputs = tf.concat(values=[x, m], axis=1) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2) # MinMax normalized output G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3) return G_prob # Discriminator def discriminator(x, h): # Concatenate Data and Hint inputs = tf.concat(values=[x, h], axis=1) D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2) D_logit = tf.matmul(D_h2, D_W3) + D_b3 D_prob = tf.nn.sigmoid(D_logit) return D_prob ## GAIN structure # Generator G_sample = generator(X, M) # Combine with observed data Hat_X = X * M + G_sample * (1 - M) # Discriminator D_prob = discriminator(Hat_X, H) ## GAIN loss D_loss_temp = -tf.reduce_mean(M * tf.log(D_prob + 1e-8) \ + (1-M) * tf.log(1. - D_prob + 1e-8)) G_loss_temp = -tf.reduce_mean((1 - M) * tf.log(D_prob + 1e-8)) MSE_loss = \ tf.reduce_mean((M * X - M * G_sample)**2) / tf.reduce_mean(M) D_loss = D_loss_temp G_loss = G_loss_temp + alpha * MSE_loss ## GAIN solver D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D) G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G) ## Iterations sess = tf.Session() saver = tf.train.Saver() if predict is True and os.path.exists(model_path + '.ckpt.meta'): print("Model Restore") saver.restore(sess, model_path + '.ckpt') else: sess.run(tf.global_variables_initializer()) # Start Iterations for it in tqdm(range(iterations)): # Sample batch batch_idx = sample_batch_index(no, batch_size) X_mb = norm_data_x[batch_idx, :] M_mb = data_m[batch_idx, :] # Sample random vectors Z_mb = uniform_sampler(0, 0.01, batch_size, dim) # Sample hint vectors H_mb_temp = binary_sampler(hint_rate, batch_size, dim) H_mb = M_mb * H_mb_temp # Combine random vectors with observed vectors X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb _, D_loss_curr = sess.run([D_solver, D_loss_temp], feed_dict={ M: M_mb, X: X_mb, H: H_mb }) _, G_loss_curr, MSE_loss_curr = \ sess.run([G_solver, G_loss_temp, MSE_loss], feed_dict = {X: X_mb, M: M_mb, H: H_mb}) if predict is False: save_path = saver.save(sess, model_path + '.ckpt') ## Return imputed data Z_mb = uniform_sampler(0, 0.01, no, dim) M_mb = data_m X_mb = norm_data_x X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb imputed_data = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0] imputed_data = data_m * norm_data_x + (1 - data_m) * imputed_data # Renormalization imputed_data = renormalization(imputed_data, norm_parameters) # Rounding imputed_data = rounding(imputed_data, data_x) # Reverse encoding if onehot > 0: imputed_data = reverse_encoding(imputed_data, feature_name, onehotencoder, onehot, ori_data_dim) return imputed_data
def gain(data_x, gain_parameters): '''Impute missing values in data_x Args: - data_x: original data with missing values - gain_parameters: GAIN network parameters: - batch_size: Batch size - hint_rate: Hint rate - alpha: Hyperparameter - iterations: Iterations Returns: - imputed_data: imputed data ''' # Define mask matrix data_m = 1 - np.isnan(data_x) # System parameters batch_size = gain_parameters['batch_size'] hint_rate = gain_parameters['hint_rate'] alpha = gain_parameters['alpha'] iterations = gain_parameters['iterations'] # Other parameters no, dim = data_x.shape # Hidden state dimensions h_dim = int(dim) # Normalization norm_data, norm_parameters = normalization(data_x) norm_data_x = np.nan_to_num(norm_data, 0) ## GAIN architecture # Input placeholders # Data vector X = tf.placeholder(tf.float32, shape=[None, dim]) # Mask vector M = tf.placeholder(tf.float32, shape=[None, dim]) # Hint vector H = tf.placeholder(tf.float32, shape=[None, dim]) # Discriminator variables D_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) # Data + Hint as inputs D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, h_dim])) D_b2 = tf.Variable(tf.zeros(shape=[h_dim])) D_W3 = tf.Variable(xavier_init([h_dim, dim])) D_b3 = tf.Variable(tf.zeros(shape=[dim])) # Multi-variate outputs theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3] #Generator variables # Data + Mask as inputs (Random noise is in missing components) G_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, h_dim])) G_b2 = tf.Variable(tf.zeros(shape=[h_dim])) G_W3 = tf.Variable(xavier_init([h_dim, dim])) G_b3 = tf.Variable(tf.zeros(shape=[dim])) theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3] ## GAIN functions # Generator def generator(x, m): # Concatenate Mask and Data inputs = tf.concat(values=[x, m], axis=1) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2) # MinMax normalized output G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3) return G_prob # Discriminator def discriminator(x, h): # Concatenate Data and Hint inputs = tf.concat(values=[x, h], axis=1) D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2) D_logit = tf.matmul(D_h2, D_W3) + D_b3 D_prob = tf.nn.sigmoid(D_logit) return D_prob ## GAIN structure # Generator G_sample = generator(X, M) # Combine with observed data Hat_X = X * M + G_sample * (1 - M) # Discriminator D_prob = discriminator(Hat_X, H) ## GAIN loss D_loss_temp = -tf.reduce_mean(M * tf.log(D_prob + 1e-8) \ + (1-M) * tf.log(1. - D_prob + 1e-8)) G_loss_temp = -tf.reduce_mean((1 - M) * tf.log(D_prob + 1e-8)) MSE_loss = \ tf.reduce_mean((M * X - M * G_sample)**2) / tf.reduce_mean(M) D_loss = D_loss_temp G_loss = G_loss_temp + alpha * MSE_loss ## GAIN solver D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D) G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G) ## Iterations sess = tf.Session() sess.run(tf.global_variables_initializer()) # Start Iterations for it in tqdm(range(iterations)): # Sample batch batch_idx = sample_batch_index(no, batch_size) X_mb = norm_data_x[batch_idx, :] M_mb = data_m[batch_idx, :] # Sample random vectors Z_mb = uniform_sampler(0, 0.01, batch_size, dim) # Sample hint vectors H_mb_temp = binary_sampler(hint_rate, batch_size, dim) H_mb = M_mb * H_mb_temp # Combine random vectors with observed vectors X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb _, D_loss_curr = sess.run([D_solver, D_loss_temp], feed_dict={ M: M_mb, X: X_mb, H: H_mb }) _, G_loss_curr, MSE_loss_curr = \ sess.run([G_solver, G_loss_temp, MSE_loss], feed_dict = {X: X_mb, M: M_mb, H: H_mb}) ## Return imputed data Z_mb = uniform_sampler(0, 0.01, no, dim) M_mb = data_m X_mb = norm_data_x X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb imputed_data = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0] imputed_data = data_m * norm_data_x + (1 - data_m) * imputed_data # Renormalization imputed_data = renormalization(imputed_data, norm_parameters) # Rounding imputed_data = rounding(imputed_data, data_x) return imputed_data
def PC_GAIN (incomplete_data_x , gain_parameters , data_m): '''Impute missing values in incomplete_data_x Args: - incomplete_data_x: original data with missing values - gain_parameters: PC_GAIN network parameters: - batch_size: Batch size,64 - hint_rate: Hint rate,0.9 - alpha: Hyperparameter,200 - beta: Hyperparameter,20 - lambda_: Hyperparameter,0.2 - k: Hyperparameter,4 - iterations: Iterations,10000 Returns: - imputed_data: imputed data ''' # System parameters batch_size = gain_parameters['batch_size'] hint_rate = gain_parameters['hint_rate'] alpha = gain_parameters['alpha'] beta = gain_parameters['beta'] lambda_ = gain_parameters['lambda_'] k = gain_parameters['k'] iterations = gain_parameters['iterations'] cluster_species = gain_parameters['cluster_species'] # Other parameters no, dim = incomplete_data_x.shape # Hidden state dimensions h_dim = int(dim) # Normalization norm_data , norm_parameters = normalization(incomplete_data_x) norm_data_x = np.nan_to_num(norm_data, 0) ## PC_GAIN architecture X = tf.placeholder(tf.float32, shape = [None, dim]) M = tf.placeholder(tf.float32, shape = [None, dim]) H = tf.placeholder(tf.float32, shape = [None, dim]) Z = tf.placeholder(tf.float32, shape = [None, dim]) Y = tf.placeholder(tf.float32, shape = [None, k]) # Discriminator variables D_W1 = tf.Variable(xavier_init([dim*2, h_dim])) # Data + Hint as inputs D_b1 = tf.Variable(tf.zeros(shape = [h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, h_dim])) D_b2 = tf.Variable(tf.zeros(shape = [h_dim])) D_W3 = tf.Variable(xavier_init([h_dim, dim])) D_b3 = tf.Variable(tf.zeros(shape = [dim])) # Multi-variate outputs theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3] #Generator variables # Data + Mask as inputs (Random noise is in missing components) G_W1 = tf.Variable(xavier_init([dim*2, h_dim])) G_b1 = tf.Variable(tf.zeros(shape = [h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, h_dim])) G_b2 = tf.Variable(tf.zeros(shape = [h_dim])) G_W3 = tf.Variable(xavier_init([h_dim, dim])) G_b3 = tf.Variable(tf.zeros(shape = [dim])) theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3] C_W1 = tf.Variable(xavier_init([dim, h_dim])) C_b1 = tf.Variable(tf.zeros(shape = [h_dim])) C_W2 = tf.Variable(xavier_init([h_dim, h_dim])) C_b2 = tf.Variable(tf.zeros(shape = [h_dim])) C_W3 = tf.Variable(xavier_init([h_dim, k])) C_b3 = tf.Variable(tf.zeros(shape = [k])) # 分类器 theta_C = [C_W1, C_b1, C_W2, C_b2, C_W3, C_b3] ## PC_GAIN functions # Generator def generator(x,m): # Concatenate Mask and Data inputs = tf.concat(values = [x, m], axis = 1) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2) # MinMax normalized output G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3) return G_prob # Discriminator def discriminator(x, h): # Concatenate Data and Hint inputs = tf.concat(values = [x, h], axis = 1) D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2) D_logit = tf.matmul(D_h2, D_W3) + D_b3 D_prob = tf.nn.sigmoid(D_logit) return D_prob, D_logit # Classer (neural network classifier mentioned in the paper) def classer(feature): C_h1 = tf.nn.relu(tf.matmul(feature, C_W1) + C_b1) C_h2 = tf.nn.relu(tf.matmul(C_h1, C_W2) + C_b2) C_h3 = tf.matmul(C_h2, C_W3) + C_b3 C_prob = tf.nn.softmax(C_h3) return C_prob ## PC_GAIN structure # Generator G_sample = generator(X, M) # Combine with observed data Hat_X = X * M + G_sample * (1-M) # Discriminator D_prob, D_logit = discriminator(Hat_X, H) ## PC_GAIN loss D_loss_temp = -tf.reduce_mean(M * tf.log(D_prob + 1e-8) + (1-M) * tf.log(1. - D_prob + 1e-8)) G_loss_temp = -tf.reduce_mean((1-M) * tf.log(D_prob + 1e-8)) G_loss_with_C = -tf.reduce_mean(Y * tf.log(Y + 1e-8)) MSE_loss = tf.reduce_mean((M * X - M * G_sample) * (M * X - M * G_sample)) / tf.reduce_mean(M) D_loss = D_loss_temp G_loss_pre = G_loss_temp + alpha * MSE_loss G_loss = G_loss_temp + alpha * MSE_loss + beta * G_loss_with_C ## PC_GAIN solver D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D) G_solver_pre = tf.train.AdamOptimizer().minimize(G_loss_pre, var_list=theta_G) G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G) ## Iterations init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) ##Select pre-training data loss_rate = [] for i in range(no): index = 0 for j in range(dim): if data_m[i,j] == 0: index = index + 1 loss_rate.append([index , i]) loss_rate = sorted(loss_rate,key=(lambda x:x[0])) no_x_L = int(no * lambda_) index_x_L = [] for i in range(no_x_L): index_x_L.append(loss_rate[i][1]) norm_data_x_L = norm_data_x[index_x_L, :] data_m_L = data_m[index_x_L, :] ##Pre-training print('...Pre-training') for it in tqdm(range(int(iterations * 0.7))): batch_idx = sample_batch_index(no_x_L, batch_size) X_mb = norm_data_x_L[batch_idx, :] M_mb = data_m_L[batch_idx, :] Z_mb = uniform_sampler(0, 0.01, batch_size, dim) H_mb_temp = binary_sampler(hint_rate, batch_size, dim) H_mb = M_mb * H_mb_temp X_mb = M_mb * X_mb + (1-M_mb) * Z_mb _, D_loss_curr, D_logit_curr, D_prob_curr = sess.run([D_solver, D_loss_temp, D_logit, D_prob], feed_dict = {M: M_mb, X: X_mb, H:H_mb}) _, G_loss_curr, MSE_loss_curr = sess.run([G_solver_pre, G_loss_temp, MSE_loss], feed_dict = {X: X_mb, M: M_mb, H:H_mb}) Z_mb = uniform_sampler(0, 0.01, no_x_L, dim) M_mb = data_m_L X_mb = norm_data_x_L X_mb = M_mb * X_mb + (1-M_mb) * Z_mb imputed_data_L = sess.run([G_sample], feed_dict = {X: X_mb, M: M_mb})[0] imputed_data_L = data_m_L * norm_data_x_L + (1 - data_m_L) * imputed_data_L ## Select different clustering methods if cluster_species == 'KM': data_c , data_class = KM(imputed_data_L, k) elif cluster_species == 'SC': data_c , data_class = SC(imputed_data_L, k) elif cluster_species == 'AC': data_c , data_class = AC(imputed_data_L, k) elif cluster_species == 'KMPP': data_c , data_class = KMPP(imputed_data_L, k) else: exit('have not this cluster methods') ## Pseudo-label training multi-classification SVM ## You can also choose other classifiers, ## such as the neural network classifier mentioned in the paper coder = preprocessing.OneHotEncoder() model = svm.SVC(kernel="linear", decision_function_shape="ovo") coder.fit(data_class.reshape(-1,1)) model.fit(imputed_data_L, data_class) ## Updata the generator G and the discriminator D ## To avoid the effects of pre-training, ## you can also choose to reinitialize the generator parameters for it in tqdm(range(iterations)): batch_idx = sample_batch_index(no, batch_size) X_mb = norm_data_x[batch_idx, :] M_mb = data_m[batch_idx, :] Z_mb = uniform_sampler(0, 0.01, batch_size, dim) H_mb_temp = binary_sampler(hint_rate, batch_size, dim) H_mb = M_mb * H_mb_temp X_mb = M_mb * X_mb + (1-M_mb) * Z_mb _, D_loss_curr, D_logit_curr, D_prob_curr = sess.run([D_solver, D_loss_temp, D_logit, D_prob], feed_dict = {M: M_mb, X: X_mb, H:H_mb}) ## Introducing pseudo label supervision Hat_X_curr = sess.run(Hat_X, feed_dict = {X: X_mb, M: M_mb, H:H_mb}) y_pred = model.predict(Hat_X_curr) sample_prob = coder.transform(y_pred.reshape(-1,1)).toarray() _, G_loss_curr, MSE_loss_curr , G_loss_with_C_curr = sess.run([G_solver, G_loss_temp, MSE_loss, G_loss_with_C], feed_dict = {X: X_mb, M: M_mb, H:H_mb , Y:sample_prob}) ## Return imputed data Z_mb = uniform_sampler(0, 0.01, no, dim) M_mb = data_m X_mb = norm_data_x X_mb = M_mb * X_mb + (1-M_mb) * Z_mb imputed_data = sess.run([G_sample], feed_dict = {X: X_mb, M: M_mb})[0] imputed_data = data_m * norm_data_x + (1-data_m) * imputed_data imputed_data = renormalization(imputed_data, norm_parameters) return imputed_data
def egain(miss_data_x, gain_parameters): #def Egain(miss_data_x, gain_parameters): '''Impute missing values in data_x Args: - miss_data_x: missing data - gain_parameters: GAIN network parameters: - batch_size: Batch size - alpha: Hyperparameter - iterations: Iterations Returns: - imputed_data: imputed data ''' # Define mask matrix m = 1 - np.isnan(miss_data_x) # System parameters batch_size = gain_parameters['batch_size'] # hint_rate = gain_parameters['hint_rate'] alpha = gain_parameters['alpha'] iterations = gain_parameters['iterations'] loss_type = ['trickLogD', 'minimax', 'ls'] nloss = 3 beta = 1.0 ncandi = 1 #1#3 nbest = 1 #1#3 nD = 1 # # of discrim updates for each gen update # Other parameters no, dim = miss_data_x.shape # Hidden state dimensions h_dim = int(dim) # Normalization norm_data, norm_parameters = normalization(miss_data_x) norm_data_x = np.nan_to_num(norm_data, 0) ## GAIN architecture #tf.reset_default_graph() tf.compat.v1.get_default_graph() # Input placeholders # Data vector X = tf1.placeholder(tf.float32, shape=[None, dim]) # Mask vector M = tf1.placeholder(tf.float32, shape=[None, dim]) # B vector B = tf1.placeholder(tf.float32, shape=[None, dim]) # Discriminator variables D_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) # Data + Hint as inputs D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, h_dim])) D_b2 = tf.Variable(tf.zeros(shape=[h_dim])) D_W3 = tf.Variable(xavier_init([h_dim, dim])) D_b3 = tf.Variable(tf.zeros(shape=[dim])) # Multi-variate outputs theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3] # Generator variables # Data + Mask as inputs (Random noise is in missing components) G_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, h_dim])) G_b2 = tf.Variable(tf.zeros(shape=[h_dim])) G_W3 = tf.Variable(xavier_init([h_dim, dim])) G_b3 = tf.Variable(tf.zeros(shape=[dim])) theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3] ## GAIN functions # Generator def generator(x, m): # Concatenate Mask and Data inputs = tf.concat(values=[x, m], axis=1) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2) # MinMax normalized output G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3) return G_prob # Discriminator def discriminator(x, h): # Concatenate Data and Hint inputs = tf.concat(values=[x, h], axis=1) D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2) D_logit = tf.matmul(D_h2, D_W3) + D_b3 D_prob = tf.nn.sigmoid(D_logit) return D_prob ## GAIN structure # Hint vector H = B * M + 0.5 * (1 - B) # 0.5 => 0.1 # Generator G_sample = generator(X, M) D_prob_g = discriminator(X * M + G_sample * (1 - M), H) # Combine with observed data fake_X = tf1.placeholder(tf.float32, shape=[None, dim]) # Hint vector Hat_X = X * M + fake_X * (1 - M) # D loss D_prob = discriminator(Hat_X, H) D_loss_temp = -tf.reduce_mean( (M * tf1.log(D_prob + 1e-8) + (1 - M) * tf1.log(1. - D_prob + 1e-8))) D_loss = D_loss_temp # Updated parramter D_solver = tf1.train.AdamOptimizer(learning_rate=0.002, beta1=0.5, beta2=0.99).minimize(D_loss, var_list=theta_D) # G loss #Update loss function G_loss_logD = -tf.reduce_mean((1 - M) * 1 / 2 * tf1.log(D_prob_g + 1e-8)) G_loss_minimax = tf.reduce_mean( (1 - M) * 1 / 2 * tf1.log(1. - D_prob_g + 1e-8)) G_loss_ls = tf1.reduce_mean((1 - M) * tf1.square(D_prob_g - 1)) MSE_loss = tf.reduce_mean((M * X - M * G_sample)**2) / tf.reduce_mean(M) G_loss_logD_all = G_loss_logD + alpha * MSE_loss G_loss_minimax_all = G_loss_minimax + alpha * MSE_loss G_loss_ls_all = G_loss_ls + alpha * MSE_loss #Update parramter G_solver_logD = tf1.train.AdamOptimizer(learning_rate=0.002, beta1=0.5, beta2=0.99).minimize( G_loss_logD_all, var_list=theta_G) G_solver_minimax = tf1.train.AdamOptimizer(learning_rate=0.002, beta1=0.5, beta2=0.99).minimize( G_loss_minimax_all, var_list=theta_G) G_solver_ls = tf1.train.AdamOptimizer(learning_rate=0.002, beta1=0.5, beta2=0.99).minimize( G_loss_ls_all, var_list=theta_G) # Fitness function Fq_score = tf.reduce_mean((1 - M) * D_prob) Fd_score = -tf1.log( tf.reduce_sum(tf.square(tf.gradients(D_loss_temp, theta_D[0]))) + tf.reduce_sum(tf.square(tf.gradients(D_loss_temp, theta_D[1]))) + tf.reduce_sum(tf.square(tf.gradients(D_loss_temp, theta_D[2]))) + tf.reduce_sum(tf.square(tf.gradients(D_loss_temp, theta_D[3]))) + tf.reduce_sum(tf.square(tf.gradients(D_loss_temp, theta_D[4]))) + tf.reduce_sum(tf.square(tf.gradients(D_loss_temp, theta_D[5])))) ## Iterations sess = tf1.Session() # Start Iterations gen_new_params = [] fitness_best = np.zeros(nbest) fitness_candi = np.zeros(ncandi) # for it in tqdm(range(iterations)): for it in tqdm(range(iterations)): # Train candidates G if it == 0: for can_i in range(0, ncandi): sess.run(tf1.global_variables_initializer()) batch_idx = sample_batch_index(no, batch_size) X_mb = norm_data_x[batch_idx, :] M_mb = m[batch_idx, :] Z_mb = uniform_sampler(0.0, 0.01, batch_size, dim) X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb B_mb = sample_batch_binary(dim, batch_size) gen_samples = sess.run([G_sample], feed_dict={ X: X_mb, M: M_mb })[0] fq_score, fd_score = sess.run([Fq_score, Fd_score], feed_dict={ X: X_mb, M: M_mb, fake_X: gen_samples, B: B_mb }) fitness = fq_score + beta * fd_score fitness_best[can_i] = fitness params = [] for param in theta_G: params.append(sess.run(param)) gen_new_params.append(params) gen_best_params = copy.deepcopy(gen_new_params) else: # generate new candidate gen_old_params = copy.deepcopy(gen_new_params) # print(gen_old_params[0][-1]) # print(it) for can_i in range(ncandi): for type_i in range(nloss): batch_idx = sample_batch_index(no, batch_size) X_mb = norm_data_x[batch_idx, :] M_mb = m[batch_idx, :] Z_mb = uniform_sampler(0.0, 0.01, batch_size, dim) # update 1.0 ==> 0.01 X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb B_mb = sample_batch_binary(dim, batch_size) # Load and update weights for i in range(len(theta_G)): theta_G[i].load(gen_old_params[can_i][i], sess) loss = loss_type[type_i] if loss == 'trickLogD': sess.run([G_solver_minimax], feed_dict={ X: X_mb, M: M_mb, B: B_mb }) elif loss == 'minimax': sess.run([G_solver_logD], feed_dict={ X: X_mb, M: M_mb, B: B_mb }) elif loss == 'ls': sess.run([G_solver_ls], feed_dict={ X: X_mb, M: M_mb, B: B_mb }) # calculate fitness score gen_samples = sess.run([G_sample], feed_dict={ X: X_mb, M: M_mb })[0] fq_score, fd_score = sess.run([Fq_score, Fd_score], feed_dict={ X: X_mb, M: M_mb, fake_X: gen_samples, B: B_mb }) fitness = fq_score + beta * fd_score # print(fitness) gap = fitness_best - fitness if min(gap) < 0: idx_replace = np.argmin(gap) params = [] for param in theta_G: params.append(sess.run(param)) gen_best_params[idx_replace] = params fitness_best[idx_replace] = fitness if can_i * nloss + type_i < ncandi: idx = can_i * nloss + type_i params = [] for param in theta_G: params.append(sess.run(param)) gen_new_params[idx] = params fitness_candi[idx] = fitness else: gap = fitness_candi - fitness if min(gap) < 0: idx_replace = np.argmin(gap) params = [] for param in theta_G: params.append(sess.run(param)) gen_new_params[idx_replace] = params fitness_candi[idx_replace] = fitness # Train D for i in range(nD): batch_idx = sample_batch_index(no, batch_size) X_mb = norm_data_x[batch_idx, :] M_mb = m[batch_idx, :] Z_mb = uniform_sampler(0.0, 0.01, batch_size, dim) # 1.0 ==> 0.01 X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb B_mb = sample_batch_binary(dim, batch_size) # impute data for each candidat for can_i in range(ncandi): for w in range(len(theta_G)): theta_G[w].load(gen_new_params[can_i][w], sess) if can_i == ncandi - 1: gen_samples_cani = sess.run( [G_sample], feed_dict={ X: X_mb[can_i * batch_size // ncandi:], M: M_mb[can_i * batch_size // ncandi:] })[0] else: gen_samples_cani = sess.run( [G_sample], feed_dict={ X: X_mb[can_i * batch_size // ncandi:(can_i + 1) * batch_size // ncandi], M: M_mb[can_i * batch_size // ncandi:(can_i + 1) * batch_size // ncandi] })[0] # print(gen_samples_cani.shape) if can_i == 0: gen_samples = gen_samples_cani else: gen_samples = np.append(gen_samples, gen_samples_cani, axis=0) sess.run([D_solver], feed_dict={ X: X_mb, M: M_mb, fake_X: gen_samples, B: B_mb }) ## Return imputed data idx = np.argmax(fitness_best) # print(idx) for i in range(len(theta_G)): theta_G[i].load(gen_best_params[idx][i], sess) Z_mb = uniform_sampler(0.0, 0.01, no, dim) M_mb = m X_mb = norm_data_x X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb imputed_data = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0] sess.close() imputed_data = m * norm_data_x + (1 - m) * imputed_data # Renormalization imputed_data = renormalization(imputed_data, norm_parameters) # Rounding imputed_data = rounding(imputed_data, miss_data_x) return imputed_data
def train(train_directories, epoch): print('start') # Logging information if not os.path.exists(os.path.join(project_dir, 'logs')): os.makedirs(os.path.join(project_dir, 'logs')) summary_writer = tensorboardX.SummaryWriter('./logs/') # Dataloader dataset = Dataset(train_directories, also_valid=True) loaded_training_data = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=train_conf['num_workers']) valid_dataset = dataset.clone_for_validation() loaded_valid_data = DataLoader(dataset=valid_dataset, batch_size=1) # Loading SuperFAN model vgg_feature = nn.Sequential(*list(vgg.features)[:-1]).cuda() FAN = load_FAN(train_conf['num_FAN_modules']).cuda() preprocess_for_FAN = upsample().cuda() generator = Generator().cuda() discriminator = Discriminator().cuda() MSE_loss = nn.MSELoss().cuda() if os.path.exists(save_path_G): generator.load_state_dict(torch.load(save_path_G)) print('reading generator checkpoints...') if os.path.exists(save_path_D): discriminator.load_state_dict(torch.load(save_path_D)) print('reading discriminator checkpoints...') if not os.path.exists(os.path.join(project_dir, 'validation')): os.makedirs(os.path.join(project_dir, 'validation')) # Setting learning rate decay D_start = train_conf['start_decay'] learning_rate = train_conf['start_lr'] final_lr = train_conf['final_lr'] decay = (learning_rate - final_lr) / D_start print('train with MSE and perceptual loss') for epoch in range(epoch): G_optimizer = optim.RMSprop(generator.parameters(), lr=learning_rate) for i, data in enumerate(loaded_training_data): lr, gt, _ = data gt = gt.float() lr = lr.cuda() gt = gt.cuda() # Forwarding sr = generator(lr) # Initialization G_optimizer.zero_grad() # vgg loss mse_loss = MSE_loss(sr, gt) sr_vgg = vgg_feature(sr) gt_vgg = vgg_feature(gt) vgg_loss = MSE_loss(sr_vgg, gt_vgg) * train_conf['lambda_vgg'] sr_FAN = FAN(preprocess_for_FAN(sr))[-1] gt_FAN = FAN(preprocess_for_FAN(gt))[-1].detach() FAN_loss = MSE_loss(sr_FAN, gt_FAN) g_loss = mse_loss + vgg_loss + FAN_loss if epoch >= D_start: fake_logit = discriminator(sr).mean() G_adv_loss = -fake_logit g_loss += G_adv_loss * train_conf['lambda_adv'] g_loss.backward() G_optimizer.step() if i % 10 == 0: print("loss at %d : %d ==>\tmse: %.6f vgg: %.6f FAN: %.6f" % (epoch, i, mse_loss, vgg_loss, FAN_loss)) summary_writer.add_scalar( 'mse_loss', mse_loss.data.cpu().numpy(), epoch * len(loaded_training_data) + i) summary_writer.add_scalar( 'vgg_loss', vgg_loss.data.cpu().numpy(), epoch * len(loaded_training_data) + i) summary_writer.add_scalar( 'FAN_loss', FAN_loss.data.cpu().numpy(), epoch * len(loaded_training_data) + i) if epoch >= D_start: print("\t\t\t\tD_loss: %.6f G_loss: %.6f" % (d_loss.data.cpu(), G_adv_loss.data.cpu())) summary_writer.add_scalar( 'D_loss', d_loss.data.cpu().numpy(), epoch * len(loaded_training_data) + i) summary_writer.add_scalar( 'G_loss', G_adv_loss.data.cpu().numpy(), epoch * len(loaded_training_data) + i) if epoch % 1 == 0: validation = os.path.join(project_dir, 'validation', str(epoch)) if not os.path.isdir(validation): os.makedirs(validation) total_mse = 0 total_ssim = 0 total_psnr = 0 for _, val_data in enumerate(loaded_valid_data): lr, gt, img_name = val_data sr = generator(lr) # Evaluate images mse, ssim, psnr = eval(gt.data.cpu().numpy(), sr.data.cpu().numpy()) total_mse += mse / len(loaded_valid_data) total_ssim += ssim / len(loaded_valid_data) total_psnr += psnr / len(loaded_valid_data) # Save images sr = sr[0] sr = renormalization(sr) sr = sr.cpu().detach().numpy() sr = sr.transpose(1, 2, 0) img_name = img_name[0] filename = os.path.join(validation, img_name + '.png') cv2.imwrite(filename=filename, img=sr) # Save logs summary_writer.add_scalar('valid/mse', total_mse, epoch) summary_writer.add_scalar('valid/ssim', total_ssim, epoch) summary_writer.add_scalar('valid/psnr', total_psnr, epoch) # Save checkpoints torch.save(generator.state_dict(), save_path_G) # decay learning rate after one epoch if epoch >= D_start: learning_rate = train_conf['start_lr'] else: learning_rate -= decay # Save models torch.save(generator.state_dict(), save_path_G) torch.save(discriminator.state_dict(), save_path_D) print('training finished.')
def gain(data_x, gain_parameters): '''Impute missing values in data_x Args: - data_x: original data with missing values - gain_parameters: GAIN network parameters: - batch_size: Batch size - hint_rate: Hint rate - alpha: Hyperparameter - iterations: Iterations Returns: - imputed_data: imputed data ''' # Define mask matrix data_m = (1 - np.isnan(data_x)).astype(float) # System parameters batch_size = gain_parameters['batch_size'] hint_rate = gain_parameters['hint_rate'] alpha = gain_parameters['alpha'] iterations = gain_parameters['iterations'] # Other parameters no, dim = data_x.shape # Hidden state dimensions h_dim = int(dim) # Normalization norm_data, norm_parameters = normalization(data_x) norm_data_x = np.nan_to_num(norm_data, 0) # parameter initialization X = tf.convert_to_tensor(norm_data_x) X = tf.dtypes.cast(X, tf.float32) M = tf.convert_to_tensor(data_m) M = tf.dtypes.cast(M, tf.float32) X_input = tf.concat(values=[X, M], axis=1) ## GAIN architecture # Generator class Generator(tf.keras.Model): def __init__(self): super().__init__() self.flatten = layers.Flatten(input_shape=[dim * 2]) self.dense1 = layers.Dense(h_dim, activation='relu') self.dense2 = layers.Dense(h_dim, activation='relu') self.dense_output = layers.Dense(dim, activation='sigmoid') return def call(self, inputs, training=None): x = self.flatten(inputs) x = self.dense1(x) x = self.dense2(x) x = self.dense_output(x) return x # Discriminator class Discriminator(tf.keras.Model): def __init__(self): super().__init__() self.flatten = layers.Flatten(input_shape=[dim * 2]) self.dense1 = layers.Dense(h_dim, activation='relu') self.dense2 = layers.Dense(h_dim, activation='relu') self.dense_output = layers.Dense(dim, activation='sigmoid') return def call(self, inputs, training=None): x = self.flatten(inputs) x = self.dense1(x) x = self.dense2(x) x = self.dense_output(x) return x ## GAIN loss # Generator def generator_loss(generator, discriminator, x, m): generator.trainable = True discriminator.trainable = False G_input = tf.concat(values=[x, m], axis=1) G_sample = generator(G_input) MSE_loss = tf.reduce_mean( (m * x - m * G_sample)**2) / tf.reduce_mean(m) D_input = tf.concat(values=[G_sample, m], axis=1) D_prob = discriminator(D_input) G_loss_tmp = -tf.reduce_mean((1 - m) * tf.math.log(D_prob + 1e-8)) return G_loss_tmp + alpha * MSE_loss # Discriminator def discriminator_loss(generator, discriminator, x, m, h): generator.trainable = False discriminator.trainable = True G_input = tf.concat(values=[x, m], axis=1) G_sample = generator(G_input) x_hat = x * m + G_sample * (1 - m) D_input = tf.concat(values=[x_hat, h], axis=1) D_prob = discriminator(D_input) return -tf.reduce_mean(m * tf.math.log(D_prob + 1e-8) \ + (1-m) * tf.math.log(1. - D_prob + 1e-8)) # Build generator = Generator() generator.build(input_shape=(None, 2 * dim)) g_optimizer = tf.keras.optimizers.Adam() discriminator = Discriminator() discriminator.build(input_shape=(None, 2 * dim)) d_optimizer = tf.keras.optimizers.Adam() # Training one_tensor = tf.constant(1., shape=(batch_size, dim), dtype=float) for _ in tqdm(range(iterations)): # Sample batch batch_idx = sample_batch_index(no, batch_size) X_mb = tf.gather(X, batch_idx) M_mb = tf.gather(M, batch_idx) Z_mb = tf.convert_to_tensor(uniform_sampler(0, 0.01, batch_size, dim), dtype=float) H_mb_tmp = tf.convert_to_tensor(binary_sampler(hint_rate, batch_size, dim), dtype=float) H_mb = tf.math.multiply(M_mb, H_mb_tmp) # Combine random vectors with observed vectors # X_mb = M_mb * X_mb + (1-M_mb) * Z_mb X_mb = tf.math.add(tf.math.multiply(M_mb, X_mb), \ tf.math.multiply(tf.math.subtract(one_tensor, M_mb), Z_mb)) # training Discriminator with tf.GradientTape() as tape: d_loss = discriminator_loss(generator, discriminator, X_mb, M_mb, H_mb) grads = tape.gradient(d_loss, discriminator.trainable_variables) d_optimizer.apply_gradients( zip(grads, discriminator.trainable_variables)) # training Generator with tf.GradientTape() as tape: g_loss = generator_loss(generator, discriminator, X_mb, M_mb) grads = tape.gradient(g_loss, generator.trainable_variables) g_optimizer.apply_gradients(zip(grads, generator.trainable_variables)) ## Return imputed data imputed_data = np.array([]).reshape(0, dim) train_data = tf.data.Dataset.from_tensor_slices(X_input).batch(batch_size) train_data_iter = iter(train_data) while True: try: batch = next(train_data_iter) except StopIteration: break X_tmp = generator(batch).numpy() imputed_data = np.vstack([imputed_data, X_tmp]) # Renormalization imputed_data = renormalization(imputed_data, norm_parameters) # Recovery imputed_data = data_m * np.nan_to_num(data_x) + (1 - data_m) * imputed_data # Rounding imputed_data = rounding(imputed_data, data_x) return imputed_data
def gain(data_x, gain_parameters): '''Impute missing values in data_x Args: - data_x: original data with missing values - gain_parameters: GAIN network parameters: - batch_size: Batch size - hint_rate: Hint rate - alpha: Hyperparameter - iterations: Iterations Returns: - imputed_data: imputed data ''' # Define mask matrix data_m = 1 - np.isnan(data_x) # System parameters batch_size = gain_parameters['batch_size'] hint_rate = gain_parameters['hint_rate'] alpha = gain_parameters['alpha'] iterations = gain_parameters['iterations'] checkpoint_dir = gain_parameters['checkpoint_dir'] data_name = gain_parameters['data_name'] # Other parameters no, dim = data_x.shape # Hidden state dimensions h_dim = int(dim) # Normalization norm_data, norm_parameters = normalization(data_x) norm_data_x = np.nan_to_num(norm_data, 0) ## GAIN architecture # Input placeholders # Data vector X = tf.placeholder(tf.float32, shape=[None, dim]) # Mask vector M = tf.placeholder(tf.float32, shape=[None, dim]) # Hint vector H = tf.placeholder(tf.float32, shape=[None, dim]) # Discriminator variables D_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) # Data + Hint as inputs D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, h_dim])) D_b2 = tf.Variable(tf.zeros(shape=[h_dim])) D_W3 = tf.Variable(xavier_init([h_dim, dim])) D_b3 = tf.Variable(tf.zeros(shape=[dim])) # Multi-variate outputs theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3] #Generator variables # Data + Mask as inputs (Random noise is in missing components) G_W1 = tf.Variable(xavier_init([dim * 2, h_dim]), name='G_W1') G_b1 = tf.Variable(tf.zeros(shape=[h_dim]), name='G_b1') G_W2 = tf.Variable(xavier_init([h_dim, h_dim]), name='G_W2') G_b2 = tf.Variable(tf.zeros(shape=[h_dim]), name='G_b2') G_W3 = tf.Variable(xavier_init([h_dim, dim]), name='G_W3') G_b3 = tf.Variable(tf.zeros(shape=[dim]), name='G_b3') theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3] ## GAIN functions # Generator def generator(x, m): # Concatenate Mask and Data inputs = tf.concat(values=[x, m], axis=1) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2) # MinMax normalized output G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3) return G_prob # Discriminator def discriminator(x, h): # Concatenate Data and Hint inputs = tf.concat(values=[x, h], axis=1) D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2) D_logit = tf.matmul(D_h2, D_W3) + D_b3 D_prob = tf.nn.sigmoid(D_logit) return D_prob # save models def save_model(sess, checkpoint_dir): model_name = "gain_model" model_dir = "%s" % (data_name) checkpoint_dir = os.path.join(checkpoint_dir, model_dir) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver.save(sess, os.path.join(checkpoint_dir, model_name)) # ## GAIN structure # Generator G_sample = generator(X, M) # # Combine with observed data # Hat_X = X * M + G_sample * (1-M) # # Discriminator # D_prob = discriminator(Hat_X, H) # ## GAIN loss # D_loss_temp = -tf.reduce_mean(M * tf.log(D_prob + 1e-8) \ # + (1-M) * tf.log(1. - D_prob + 1e-8)) # G_loss_temp = -tf.reduce_mean((1-M) * tf.log(D_prob + 1e-8)) # MSE_loss = \ # tf.reduce_mean((M * X - M * G_sample)**2) / tf.reduce_mean(M) # D_loss = D_loss_temp # G_loss = G_loss_temp + alpha * MSE_loss # ## GAIN solver # D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D) # G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G) ## Iterations saver = tf.train.Saver(max_to_keep=1) sess = tf.Session() sess.run(tf.global_variables_initializer()) # Start Iterations # for it in tqdm(range(iterations)): # # Sample batch # batch_idx = sample_batch_index(no, batch_size) # X_mb = norm_data_x[batch_idx, :] # M_mb = data_m[batch_idx, :] # # Sample random vectors # Z_mb = uniform_sampler(0, 0.01, batch_size, dim) # # Sample hint vectors # H_mb_temp = binary_sampler(hint_rate, batch_size, dim) # H_mb = M_mb * H_mb_temp # # Combine random vectors with observed vectors # X_mb = M_mb * X_mb + (1-M_mb) * Z_mb # _, D_loss_curr = sess.run([D_solver, D_loss_temp], # feed_dict = {M: M_mb, X: X_mb, H: H_mb}) # _, G_loss_curr, MSE_loss_curr = \ # sess.run([G_solver, G_loss_temp, MSE_loss], # feed_dict = {X: X_mb, M: M_mb, H: H_mb}) # save_model(sess, checkpoint_dir) print('testing mode') # resore the model # G_sample = load(sess, checkpoint_dir) print(" [*] Reading checkpoint...") # model_dir = "%s" % (data_name) # checkpoint_dir = os.path.join(checkpoint_dir, model_dir) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print('The model loaded successfully') ckpt_name = os.path.basename(ckpt.model_checkpoint_path) saver.restore(sess, os.path.join(checkpoint_dir, ckpt_name)) # print(sess.run(G_b1)) G_W1 = sess.run(G_W1) G_b1 = sess.run(G_b1) G_W2 = sess.run(G_W2) G_b2 = sess.run(G_b2) G_W3 = sess.run(G_W3) G_b3 = sess.run(G_b3) else: print('failed to load the model, check model path') ## Return imputed data Z_mb = uniform_sampler(0, 0.01, no, dim) M_mb = data_m X_mb = norm_data_x X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb imputed_data = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0] imputed_data = data_m * norm_data_x + (1 - data_m) * imputed_data # Renormalization imputed_data = renormalization(imputed_data, norm_parameters) # Rounding imputed_data = rounding(imputed_data, data_x) return imputed_data
def gain(miss_data_x, gain_parameters): '''Impute missing values in data_x Args: - miss_data_x: missing data - gain_parameters: GAIN network parameters: - batch_size: Batch size - alpha: Hyperparameter - iterations: Iterations Returns: - imputed_data: imputed data ''' # Define mask matrix m = 1 - np.isnan(miss_data_x) # System parameters batch_size = gain_parameters['batch_size'] # hint_rate = gain_parameters['hint_rate'] alpha = gain_parameters['alpha'] iterations = gain_parameters['iterations'] # Other parameters no, dim = miss_data_x.shape # Hidden state dimensions h_dim = int(dim) # Normalization norm_data, norm_parameters = normalization(miss_data_x) norm_data_x = np.nan_to_num(norm_data, 0) ## GAIN architecture tf1.reset_default_graph() # Input placeholders # Data vector X = tf1.placeholder(tf.float32, shape=[None, dim]) # Mask vector M = tf1.placeholder(tf.float32, shape=[None, dim]) # # Hint vector # H = tf.placeholder(tf.float32, shape = [None, dim]) # B vector B = tf1.placeholder(tf.float32, shape=[None, dim]) # Discriminator variables D_W1 = tf1.Variable(xavier_init([dim * 2, h_dim])) # Data + Hint as inputs D_b1 = tf1.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf1.Variable(xavier_init([h_dim, h_dim])) D_b2 = tf1.Variable(tf.zeros(shape=[h_dim])) D_W3 = tf1.Variable(xavier_init([h_dim, dim])) D_b3 = tf1.Variable(tf.zeros(shape=[dim])) # Multi-variate outputs theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3] #Generator variables # Data + Mask as inputs (Random noise is in missing components) G_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, h_dim])) G_b2 = tf.Variable(tf.zeros(shape=[h_dim])) G_W3 = tf.Variable(xavier_init([h_dim, dim])) G_b3 = tf.Variable(tf.zeros(shape=[dim])) theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3] ## GAIN functions # Generator def generator(x, m): # Concatenate Mask and Data inputs = tf.concat(values=[x, m], axis=1) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2) # MinMax normalized output G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3) return G_prob # Discriminator def discriminator(x, h): # Concatenate Data and Hint inputs = tf.concat(values=[x, h], axis=1) D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2) D_logit = tf.matmul(D_h2, D_W3) + D_b3 D_prob = tf.nn.sigmoid(D_logit) return D_prob ## GAIN structure # Generator G_sample = generator(X, M) H = B * M + 0.5 * (1 - B) D_prob_g = discriminator(X * M + G_sample * (1 - M), H) fake_X = tf1.placeholder(tf.float32, shape=[None, dim]) # Hint vector Hat_X = X * M + fake_X * (1 - M) # Discriminator D_prob = discriminator(Hat_X, H) # GAIN loss # D_loss_temp = -tf.reduce_mean((1-B)*(M * tf.log(D_prob + 1e-8) \ # + (1-M) * tf.log(1. - D_prob + 1e-8))) \ # / tf.reduce_mean(1-B) # # G_loss_temp = -tf.reduce_mean((1-B)*(1-M) * tf.log(D_prob + 1e-8)) / tf.reduce_mean(1-B) D_loss_temp = -tf.reduce_mean((M * tf1.log(D_prob + 1e-8) \ + (1-M) * tf1.log(1. - D_prob + 1e-8))) G_loss_temp = -tf.reduce_mean((1 - M) * tf1.log(D_prob_g + 1e-8)) MSE_loss = tf.reduce_mean((M * X - M * G_sample)**2) / tf.reduce_mean(M) D_loss = D_loss_temp G_loss = G_loss_temp + alpha * MSE_loss ## GAIN solver D_solver = tf1.train.AdamOptimizer().minimize(D_loss, var_list=theta_D) G_solver = tf1.train.AdamOptimizer().minimize(G_loss, var_list=theta_G) ## Iterations sess = tf1.Session() sess.run(tf1.global_variables_initializer()) gen_new_params = [] params = [] for param in theta_G: params.append(sess.run(param)) gen_new_params.append(params) for it in range(iterations): # for it in tqdm(range(iterations)): # Sample batch # print(sess.run(theta_G[-1])) gen_old_params = copy.deepcopy(gen_new_params) batch_idx = sample_batch_index(no, batch_size) X_mb = norm_data_x[batch_idx, :] M_mb = m[batch_idx, :] # Sample random vectors Z_mb = uniform_sampler(0.0, 0.01, batch_size, dim) # Sample hint vectors # H_mb_temp = binary_sampler(0.9, batch_size, dim) # H_mb = M_mb * H_mb_temp # H_mb_temp = binary_sampler(hint_rate, batch_size, dim) B_mb = sample_batch_binary(dim, batch_size) # H_mb = B_mb*M_mb + 0.5*(1-B_mb) # Combine random vectors with observed vectors X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb f_mb = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0] # print(f_mb) for w in range(len(theta_G)): theta_G[w].load(gen_new_params[0][w], sess) _, D_loss_curr = sess.run([D_solver, D_loss_temp], feed_dict={ X: X_mb, M: M_mb, fake_X: f_mb, B: B_mb }) batch_idx = sample_batch_index(no, batch_size) X_mb = norm_data_x[batch_idx, :] M_mb = m[batch_idx, :] # Sample random vectors Z_mb = uniform_sampler(0.0, 0.01, batch_size, dim) # Sample hint vectors # H_mb_temp = binary_sampler(0.9, batch_size, dim) # H_mb = M_mb * H_mb_temp # H_mb_temp = binary_sampler(hint_rate, batch_size, dim) B_mb = sample_batch_binary(dim, batch_size) # H_mb = B_mb*M_mb + 0.5*(1-B_mb) # Combine random vectors with observed vectors X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb for w in range(len(theta_G)): theta_G[w].load(gen_old_params[0][w], sess) _, G_loss_curr, MSE_loss_curr = \ sess.run([G_solver, G_loss_temp, MSE_loss], feed_dict = {X: X_mb, M: M_mb, B: B_mb}) params = [] for param in theta_G: params.append(sess.run(param)) gen_new_params[0] = params ## Return imputed data Z_mb = uniform_sampler(0.0, 0.01, no, dim) M_mb = m X_mb = norm_data_x X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb for w in range(len(theta_G)): theta_G[w].load(gen_new_params[0][w], sess) imputed_data = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0] sess.close() imputed_data = m * norm_data_x + (1 - m) * imputed_data # Renormalization imputed_data = renormalization(imputed_data, norm_parameters) # Rounding imputed_data = rounding(imputed_data, miss_data_x) return imputed_data