Example #1
0
def gain (data_x, gain_parameters):
  # Define mask matrix
  data_m = 1-np.isnan(data_x)
  
  # System parameters
  batch_size = gain_parameters['batch_size']
  hint_rate = gain_parameters['hint_rate']
  iterations = gain_parameters['iterations']

  # Hyperparameters
  alpha, beta, delta, gamma = 10, 0.01, 0.1, 0.5
  
  # Other parameters
  no, dim = data_x.shape
  
  # Hidden state dimensions
  h_dim = int(dim)
  
  # Normalization
  norm_data, norm_parameters = normalization(data_x)
  norm_data_x = np.nan_to_num(norm_data, 0)
  
  ## GAIN architecture   
  # Input placeholders
  X_dim = 99
  z_dim = 60
  noise_factor = 0.25
  
  dropout = tf.placeholder(tf.int32, shape = [1])
  
  # Data vector
  X = tf.placeholder(tf.float32, shape = [None, dim])
  z = tf.placeholder(tf.float32, shape=[None, z_dim])
  
  # Encoded vector
  X_e = tf.placeholder(tf.float32, shape=(None, dim))
  
  # Mask vector 
  M = tf.placeholder(tf.float32, shape = [None, dim])
  # Hint vector
  H = tf.placeholder(tf.float32, shape = [None, dim])
  
  """ Q(X|z) """
  Q_W1 = tf.Variable(xavier_init([dim, h_dim]))
  Q_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

  Q_W2_mu = tf.Variable(xavier_init([h_dim, z_dim]))
  Q_b2_mu = tf.Variable(tf.zeros(shape=[z_dim]))

  Q_W2_sigma = tf.Variable(xavier_init([h_dim, z_dim]))
  Q_b2_sigma = tf.Variable(tf.zeros(shape=[z_dim]))
  
  """ P(X|z) """
  P_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
  P_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

  P_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
  P_b2 = tf.Variable(tf.zeros(shape=[X_dim]))
  
  theta_E = [Q_W1, Q_b1, Q_W2_mu, Q_b2_mu, Q_W2_sigma, Q_b2_sigma, P_W1, P_b1, P_W2, P_b2]
  
  # Discriminator variables
  D_W1 = tf.Variable(xavier_init([dim*2, h_dim])) # Data + Hint as inputs
  D_b1 = tf.Variable(tf.zeros(shape = [h_dim]))
  
  D_W2 = tf.Variable(xavier_init([h_dim, h_dim]))
  D_b2 = tf.Variable(tf.zeros(shape = [h_dim]))
  
  D_W3 = tf.Variable(xavier_init([h_dim, dim]))
  D_b3 = tf.Variable(tf.zeros(shape = [dim]))  # Multi-variate outputs
  
  theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3]
  
  #Generator variables
  # Data + Mask as inputs (Random noise is in missing components)
  G_W1 = tf.Variable(xavier_init([dim*2, h_dim]))  
  G_b1 = tf.Variable(tf.zeros(shape = [h_dim]))
  
  G_W2 = tf.Variable(xavier_init([h_dim, h_dim]))
  G_b2 = tf.Variable(tf.zeros(shape = [h_dim]))
  
  G_W3 = tf.Variable(xavier_init([h_dim, dim]))
  G_b3 = tf.Variable(tf.zeros(shape = [dim]))
  
  theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3]
  
  ## VAE functions
  def encoder(X):
    h = tf.nn.relu(tf.matmul(X, Q_W1) + Q_b1)
    z_mu = tf.matmul(h, Q_W2_mu) + Q_b2_mu
    z_logvar = tf.matmul(h, Q_W2_sigma) + Q_b2_sigma
    return z_mu, z_logvar

  def decoder(z):
    h = tf.nn.relu(tf.matmul(z, P_W1) + P_b1)
    logits = tf.matmul(h, P_W2) + P_b2
    prob = tf.nn.sigmoid(logits)
    return prob, logits

  def sample_z(mu, log_var):
      eps = tf.random_normal(shape=tf.shape(mu))
      return mu + tf.exp(log_var / 2) * eps

  ## GAIN functions
  
  # Generator
  def generator(x,m, use_dropout):
    # Concatenate Mask and Data
    inputs = tf.concat(values = [x, m], axis = 1)
    G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1)
    if use_dropout: G_h1 = tf.nn.dropout(G_h1, rate=0.5)
    G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2)
    if use_dropout: G_h2 = tf.nn.dropout(G_h2, rate=0.5)
    G_h3 = tf.matmul(G_h2, G_W3) + G_b3
    G_prob = tf.nn.sigmoid(G_h3)
    return G_prob
  
  # Discriminator
  def discriminator(x, h, use_dropout):
    # Concatenate Data and Hint
    inputs = tf.concat(values = [x, h], axis = 1) 
    D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) 
    if use_dropout: D_h1 = tf.nn.dropout(D_h1, rate=0.5)
    D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2)
    if use_dropout: D_h2 = tf.nn.dropout(D_h2, rate=0.5)
    D_h3 = tf.matmul(D_h2, D_W3) + D_b3
    #D_prob = tf.nn.sigmoid(D_h3)
    return D_h3

  if tf.reduce_sum(dropout) == 1: 
    use_dropout = True
  else:
    use_dropout = False
    noise_factor = 0

  # Encoder
  X_noise = X + noise_factor * tf.random_normal(tf.shape(X))
  X_noise = tf.clip_by_value(X_noise, 0., 1.)

  z_mu, z_logvar = encoder(X_noise)
  z_sample = sample_z(z_mu, z_logvar)
  X_e, logits = decoder(z_sample)

  # E[log P(X|z)]
  recon_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=X), 1)
  # D_KL(Q(z|X_noise) || P(z|X)); calculate in closed form as both dist. are Gaussian
  kl_loss = gamma * tf.reduce_sum(tf.exp(z_logvar) + z_mu**2 - 1. - z_logvar, 1)
  # VAE loss

  # Generator
  G_sample = generator(X, M, use_dropout)
  G_sample_reg = generator(X_e, M, use_dropout)
  
  # Combine with observed data
  Hat_X = X * M + G_sample * (1-M)
  Hat_X_reg = X * M + G_sample_reg * (1-M)
  
  # Discriminator
  D_prob = discriminator(Hat_X, H, use_dropout)
  D_prob_reg = tf.nn.sigmoid(discriminator(Hat_X_reg, H, use_dropout))

  ## GAIN loss
  E_loss_temp = tf.reduce_mean(recon_loss) * beta + \
                tf.reduce_mean(tf.math.log(D_prob_reg + 1e-8)) 

  D_loss_temp = -tf.reduce_mean(M * D_prob + (1-M) * (1-D_prob)) 
  G_loss_temp = -tf.reduce_mean((1-M) * D_prob)
  
  X_true = M * X
  X_pred = M * G_sample
  
  MSE_loss = tf.reduce_mean(tf.math.abs(M * X - M * G_sample)) / tf.reduce_mean(M)
  Hu_loss = tf.reduce_mean(tf.keras.losses.Huber()(X_true, X_pred))
  KL_loss = tf.reduce_mean(tf.keras.losses.kullback_leibler_divergence(X_true, X_pred))
  
  D_loss = D_loss_temp # + 0.001 * G_loss_temp
  G_loss = G_loss_temp + E_loss_temp + alpha * MSE_loss #+ delta * tf.math.abs(KL_loss)
  E_loss = E_loss_temp #+ 0.001 * G_loss_temp
  
  ## GAIN solver
  E_solver = tf.train.AdamOptimizer(learning_rate=0.00005, beta1=0.5).minimize(E_loss, var_list=theta_E)
  D_solver = tf.train.RMSPropOptimizer(learning_rate=0.000001).minimize(D_loss, var_list=theta_D)
  G_solver = tf.train.RMSPropOptimizer(learning_rate=0.00002).minimize(G_loss, var_list=theta_G)
  
  ## Iterations
  sess = tf.Session()
  sess.run(tf.global_variables_initializer())
  
  losses = {'E': [], 'D': [], 'G': [], 'K-L': [], 'MSE': [], 'Hu': []}
    
  dropout = tf.constant(1)
  # Start Iterations
  for it in tqdm(range(iterations)):    

    # Get batch coordinates
    batch_idx = sample_batch_index(no, batch_size)

    # Get (normalized) data at coordinates
    X_mb = norm_data_x[batch_idx, :]  

    # Get auxiliary (missingness) matrix
    M_mb = data_m[batch_idx, :]  

    # Generate a random normal distribution (batch_size X dim)  
    Z_mb = uniform_sampler(0, 0.01, batch_size, dim, True) 

    # Sample hint vectors
    H_mb_temp = binary_sampler(hint_rate, batch_size, dim)
    H_mb = M_mb * H_mb_temp
      
    # Mask * Data + (1- Mask) * Random
    X_mb = M_mb * X_mb + (1-M_mb) * Z_mb 
    
    _, E_loss_curr = sess.run([E_solver, E_loss_temp], 
        feed_dict={M: M_mb, X: X_mb, H: H_mb })
        
    _, D_loss_curr = sess.run([D_solver, D_loss_temp], 
         feed_dict = {M: M_mb, X: X_mb, H: H_mb  })
         
    _, G_loss_curr, MSE_loss_curr, KL_loss_curr, Hu_loss_curr = \
       sess.run([G_solver, G_loss_temp, MSE_loss, KL_loss, Hu_loss],
       feed_dict = {X: X_mb, M: M_mb, H: H_mb })

    if it % 20 == 0:
      losses['E'].append(E_loss_curr)
      losses['D'].append(D_loss_curr)
      losses['G'].append(G_loss_curr * 5)
      losses['MSE'].append(MSE_loss_curr * alpha)
    
      print('Iteration: %d, encoder: %.3f, discriminator: %.3f, generator: %.3f, MSE: %.3f' % 
        (it, E_loss_curr, D_loss_curr, G_loss_curr, MSE_loss_curr))
    
    if MSE_loss_curr < 0.019:
      break
    
  ## Return imputed data      
  Z_mb = uniform_sampler(0, 0.01, no, dim, False) 
  M_mb = data_m
  X_mb = norm_data_x          
  X_mb = M_mb * X_mb + (1-M_mb) * Z_mb 
    
  dropout = tf.constant(0)
  imputed_data = sess.run([G_sample], feed_dict = {X: X_mb, M: M_mb })[0]
  
  imputed_data = data_m * norm_data_x + (1-data_m) * imputed_data
  
  # Renormalization
  imputed_data = renormalization(imputed_data, norm_parameters)  
  
  # Rounding
  imputed_data = rounding(imputed_data, data_x)  
  
  import matplotlib.pyplot as plt
  plt.title('Encoder, generator, and discriminator losses over time')
  plt.plot(losses['E'], label='Encoder', lw=2, alpha=0.5)
  plt.plot(losses['G'], label='Generator', lw=2, alpha=0.5)
  plt.plot(losses['D'], label='Discriminator', lw=2, alpha=0.5)
  #plt.plot(losses['K-L'], label='K-L', lw=1)
  plt.plot(losses['MSE'], label='MSE', lw=2, alpha=0.5)
  #plt.plot(losses['Hu'], label='Huber', lw=1)
  plt.xlabel('$Number of training epochs$',fontsize=6)
  plt.legend()
  ax = plt.gca()
  plt.show()
  
  return imputed_data
Example #2
0
def gain(data_x, gain_parameters):
    '''Impute missing values in data_x
  
  Args:
    - data_x: original data with missing values
    - gain_parameters: GAIN network parameters:
      - batch_size: Batch size
      - hint_rate: Hint rate
      - alpha: Hyperparameter
      - iterations: Iterations
      
  Returns:
    - imputed_data: imputed data
  '''
    # Define mask matrix
    data_m = 1 - np.isnan(data_x)

    # System parameters
    batch_size = gain_parameters['batch_size']
    hint_rate = gain_parameters['hint_rate']
    alpha = gain_parameters['alpha']
    iterations = gain_parameters['iterations']

    # Other parameters
    no, dim = data_x.shape

    # Hidden state dimensions
    h_dim = int(dim)

    # Normalization
    norm_data, norm_parameters = normalization(data_x)
    norm_data_x = np.nan_to_num(norm_data, 0)

    ## GAIN architecture
    # Input placeholders
    # Data vector
    X = tf.placeholder(tf.float32, shape=[None, dim])
    # Mask vector
    M = tf.placeholder(tf.float32, shape=[None, dim])
    # Hint vector
    H = tf.placeholder(tf.float32, shape=[None, dim])

    # Discriminator variables
    D_W1 = tf.Variable(xavier_init([dim * 2, h_dim]))  # Data + Hint as inputs
    D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

    D_W2 = tf.Variable(xavier_init([h_dim, h_dim]))
    D_b2 = tf.Variable(tf.zeros(shape=[h_dim]))

    D_W3 = tf.Variable(xavier_init([h_dim, dim]))
    D_b3 = tf.Variable(tf.zeros(shape=[dim]))  # Multi-variate outputs

    theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3]

    #Generator variables
    # Data + Mask as inputs (Random noise is in missing components)
    G_W1 = tf.Variable(xavier_init([dim * 2, h_dim]))
    G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

    G_W2 = tf.Variable(xavier_init([h_dim, h_dim]))
    G_b2 = tf.Variable(tf.zeros(shape=[h_dim]))

    G_W3 = tf.Variable(xavier_init([h_dim, dim]))
    G_b3 = tf.Variable(tf.zeros(shape=[dim]))

    theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3]

    ## GAIN functions
    # Generator
    def generator(x, m):
        # Concatenate Mask and Data
        inputs = tf.concat(values=[x, m], axis=1)
        G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1)
        G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2)
        G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3)
        return G_prob

    # Discriminator
    def discriminator(x, h):
        # Concatenate Data and Hint
        inputs = tf.concat(values=[x, h], axis=1)
        D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1)
        D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2)
        #D_h2 = tf.nn.dropout(D_h2, rate=0.3)
        D_prob = tf.nn.sigmoid()
        return D_prob

    ## GAIN structure
    # Generator
    G_sample = generator(X, M)

    # Combine with observed data
    Hat_X = X * M + G_sample * (1 - M)

    # Discriminator
    D_prob = discriminator(Hat_X, H)

    ## GAIN loss
    D_loss_temp = -tf.reduce_mean(M * tf.log(D_prob + 1e-8) \
                                  + (1-M) * tf.log(1. - D_prob + 1e-8))

    G_loss_temp = -tf.reduce_mean((1 - M) * tf.log(D_prob + 1e-8))

    X_true = M * X
    X_pred = M * G_sample

    MSE_loss = tf.reduce_mean((M * X - M * G_sample)**2) / tf.reduce_mean(M)
    Hu_loss = tf.reduce_mean(tf.keras.losses.Huber()(X_true, X_pred))
    KL_loss = tf.reduce_mean(
        tf.keras.losses.kullback_leibler_divergence(X_true, X_pred))

    D_loss = D_loss_temp
    alpha, beta, delta = 5, 0.05, 10  ### Extract
    G_loss = G_loss_temp + alpha * MSE_loss + beta * KL_loss  #.sqrt(MSE_loss)

    ## GAIN solver
    D_solver = tf.train.AdamOptimizer(learning_rate=0.0002,
                                      beta1=0.1).minimize(D_loss,
                                                          var_list=theta_D)
    G_solver = tf.train.AdamOptimizer(learning_rate=0.0001,
                                      beta1=0.1).minimize(G_loss,
                                                          var_list=theta_G)

    ## Iterations
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    losses = {'D': [], 'G': [], 'K-L': [], 'MSE': [], 'Hu': []}

    # Start Iterations
    for it in tqdm(range(iterations)):

        # Get batch coordinates
        batch_idx = sample_batch_index(no, batch_size)

        # Get (normalized) data at coordinates
        X_mb = norm_data_x[batch_idx, :]

        # Get auxiliary (missingness) matrix
        M_mb = data_m[batch_idx, :]

        # Generate a random normal distribution (batch_size X dim)
        Z_mb = uniform_sampler(0, 0.01, batch_size, dim)

        # Sample hint vectors
        H_mb_temp = binary_sampler(hint_rate, batch_size, dim)
        H_mb = M_mb * H_mb_temp

        # Mask * Data + (1- Mask) * Random
        X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb

        _, D_loss_curr = sess.run([D_solver, D_loss_temp],
                                  feed_dict={
                                      M: M_mb,
                                      X: X_mb,
                                      H: H_mb
                                  })
        _, G_loss_curr, MSE_loss_curr, KL_loss_curr, Hu_loss_curr = \
        sess.run([G_solver, G_loss_temp, MSE_loss, KL_loss, Hu_loss],
                 feed_dict = {X: X_mb, M: M_mb, H: H_mb})

        #if int(MSE_loss_curr * 1000) % 10 == 0:
        losses['D'].append(D_loss_curr)
        losses['G'].append(G_loss_curr)
        losses['K-L'].append(KL_loss_curr * beta)
        losses['MSE'].append(MSE_loss_curr * alpha)
        losses['Hu'].append(Hu_loss_curr * delta)
        print(it, G_loss_curr - MSE_loss_curr * alpha - KL_loss_curr * beta,
              MSE_loss_curr * alpha, KL_loss_curr * beta, G_loss_curr,
              MSE_loss_curr)

        if MSE_loss_curr < 0.01:
            break

    import matplotlib.pyplot as plt
    plt.plot(losses['D'], label='discriminator', lw=1)
    plt.plot(losses['G'], label='generator', lw=1)
    plt.plot(losses['K-L'], label='K-L', lw=1)
    plt.plot(losses['MSE'], label='MSE', lw=1)
    plt.plot(losses['Hu'], label='Huber', lw=1)
    plt.legend()
    plt.show()

    ## Return imputed data
    Z_mb = uniform_sampler(0, 0.01, no, dim)
    M_mb = data_m
    X_mb = norm_data_x
    X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb

    imputed_data = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0]

    imputed_data = data_m * norm_data_x + (1 - data_m) * imputed_data

    # Renormalization
    imputed_data = renormalization(imputed_data, norm_parameters)

    # Rounding
    imputed_data = rounding(imputed_data, data_x)

    return imputed_data
Example #3
0
def main (alpha=1000, batch_size=128, hint_rate=0.5, 
  iterations=900, miss_rate=0.3):
  
  gain_parameters = {'batch_size': batch_size,
                     'hint_rate': hint_rate,
                     'alpha': alpha,
                     'iterations': iterations}
  
  # Load data and introduce missingness
  #file_name = 'data/spam.csv'
  #data_x = np.loadtxt(file_name, delimiter=",", skiprows=1)
  
  enable_transform = False
  remove_outliers = False
  n_time_points = 3
  
  data_x = pickle.load(open('./missing_data.sav', 'rb'))
  data_x = data_x.transpose().astype(np.float)[:,:]
  print(data_x.shape)
  # if remove_outliers:
  #  data_x = pickle.load(open('./missing_data.sav', 'rb'))
  #  data_x = data_x.transpose().astype(np.float)
  # else:
  #  data_x = pickle.load(open('./denoised_missing_data.sav', 'rb')) 

  signed_variables = ['base_excess']
  no, dim = data_x.shape
  
  data_x_encoded = np.copy(data_x)
  miss_data_x = np.copy(data_x)
  miss_data_x_enc = np.copy(data_x)
  
  scalers = []
  
  for i in range(0, dim):
      variable, var_x = variables[i], np.copy(data_x[:,i])
      encoder_model = encoders[i]
      # Exclude outliers based on error
      nn_indices = ~np.isnan(data_x_encoded[:,i])
      nn_values = data_x[:,i][nn_indices]

      scaler = MinMaxScaler()
      var_x_scaled = scaler.fit_transform(var_x.reshape((-1,1)))
    
      enc_x_scaled = encoder_model.predict(var_x_scaled)
      enc_x_unscaled = scaler.inverse_transform(enc_x_scaled)
      data_x_encoded[:,i] = enc_x_unscaled.flatten()
      
      scalers.append(scaler)
      
      if remove_outliers:
        print('Excluding outliers...')
        mse = np.mean(np.power(var_x.reshape((-1,1)) - enc_x_unscaled, 2),axis=1)
      
        x = np.ma.array(mse, mask=np.isnan(mse))
        y = np.ma.array(var_x, mask=np.isnan(var_x))
        outlier_indices = (x / np.max(y)) > 2
        
        outlier_values = var_x[outlier_indices]
        
        print('... %d outlier(s) excluded' % \
          len(outlier_values), outlier_values)
        
        miss_data_x[outlier_indices == True,i] = np.nan
        miss_data_x_enc[outlier_indices == True,i] = np.nan
      
      #print(var_x, '----', enc_x_scaled, '----', enc_x_unscaled.flatten())
      print('Loaded model for %s...' % variable)
  
  no_total = no * dim
  no_nan = np.count_nonzero(np.isnan(data_x.flatten()) == True)
  no_not_nan = no_total - no_nan
  print('Input shape', no, 'x', dim)
  print('NAN values:', no_nan, '/', no_total, \
    '%2.f%%' % (no_nan / no_total * 100))

  n_patients = int(no/n_time_points)

  if len(variables) != dim:
    print(len(variables), dim)
    print('Incompatible dimensions.')
    exit()
  
  if enable_transform:  
    print('Applying transformation...')
    transformer = MinMaxScaler()
    transformer.fit(data_x)
  
    #data_x = transformer.transform(data_x)
    #miss_data_x = transformer.transform(miss_data_x)
    miss_data_x_enc = transformer.transform(data_x_encoded)
  
  # Introduce missing data
  data_m = binary_sampler(1-miss_rate, no, dim)

  miss_data_x[data_m == 0] = np.nan
  miss_data_x_enc[data_m == 0] = np.nan

  no_nan = np.count_nonzero(np.isnan(miss_data_x.flatten()) == True)
  no_not_nan = no_total - no_nan

  print('After removal, NAN values:', no_nan, '/', no_total, \
    '%2.f%%' % (no_nan / no_total * 100))
  
  real_miss_rate = (no_nan / no_total * 100)
  
  imputed_data_x_gan = gain(
    miss_data_x_enc, gain_parameters)
  
  # n_gans = 3
  # idxg_combined = []
  # 
  # for  n_gan in range(0, n_gans):
  #   np.random.seed(n_gan + 1)
  #   idxg_combined.append(gain(miss_data_x_enc, gain_parameters))
  # 
  # idxg_combined = np.concatenate(idxg_combined)
  #   
  # idxg_combined_final = gain(
  #   miss_data_x_enc, gain_parameters)
  # 
  # for j in range(0, dim):
  #   idxg_combined_tmp = np.copy(idxg_combined)
  #   
  #   for i in range(0, n_patients * n_time_points):
  #     if np.isnan(miss_data_x[i,j]) and data_m[i,j] != 0:
  #       idxg_combined_tmp[i,j] = np.nan
  # 
  #   imputer = IterativeImputer() # KNNImputer(n_neighbors=5)
  #   idxg_knn = imputer.fit_transform(idxg_combined_tmp)
  #   idxg_combined_final[:,j] = idxg_knn[0:n_patients*n_time_points,j]
  #   print('Done KNN imputation #%d' % j)
  # 
  # imputed_data_x_gan = idxg_combined_final

  imputer = KNNImputer(n_neighbors=5)
  imputed_data_x_knn = imputer.fit_transform(miss_data_x)
  
  imputer = IterativeImputer()
  imputed_data_x_mice = imputer.fit_transform(miss_data_x)
  
  if enable_transform:
    #data_x = transformer.inverse_transform(data_x)
    #miss_data_x = transformer.inverse_transform(miss_data_x)
    imputed_data_x_gan = transformer.inverse_transform(imputed_data_x_gan)
    #imputed_data_x_knn = transformer.inverse_transform(imputed_data_x_knn)
    #imputed_data_x_mice = transformer.inverse_transform(imputed_data_x_mice)
  
  # Save imputed data to disk
  pickle.dump(imputed_data_x_gan,open('./filled_data.sav', 'wb'))
  
  # Get residuals for computation of stats
  distances_gan = np.zeros((dim, n_time_points*n_patients))
  distances_knn = np.zeros((dim, n_time_points*n_patients))
  distances_mice = np.zeros((dim, n_time_points*n_patients))

  for i in range(0, n_patients):
    for j in range(0, dim):
      variable_name = variables[j]
      i_start = int(i*n_time_points)
      i_stop = int(i*n_time_points+n_time_points)
      
      original_tuple = data_x[i_start:i_stop,j]
      corrupted_tuple = miss_data_x[i_start:i_stop,j]
      imputed_tuple_gan = imputed_data_x_gan[i_start:i_stop,j]
      imputed_tuple_knn = imputed_data_x_knn[i_start:i_stop,j]
      imputed_tuple_mice = imputed_data_x_mice[i_start:i_stop,j]
      
      if i == 1 or i == 2:
        print(original_tuple, corrupted_tuple, imputed_tuple_gan, imputed_tuple_knn)
      for k in range(0, n_time_points):
        a, b, c, d = original_tuple[k], imputed_tuple_gan[k], imputed_tuple_knn[k], imputed_tuple_mice[k]
        if np.isnan(a) or data_m[i_start+k,j] != 0: continue
        if i % 10 == 0: print(variable_name, a,b,c,d, b-a)
        distances_gan[j,i*k] = (b - a)
        distances_knn[j,i*k] = c - a
        distances_mice[j,i*k] = d - a
  
  # Compute distance statistics
  rrmses_gan, mean_biases, median_biases, bias_cis = [], [], [], []
  rrmses_knn, mean_biases_knn, median_biases_knn, bias_cis_knn = [], [], [], []

  for j in range(0, dim):
    
    # Stats for original data
    dim_mean = np.mean([x for x in data_x[:,j] if not np.isnan(x)])
    dim_max = np.max([x for x in data_x[:,j] if not np.isnan(x)])

    dists_gan = distances_gan[j]
    dists_knn = distances_knn[j]
    dists_mice = distances_mice[j]
    
    #dists_gan /= dim_max
    #dists_knn /= dim_max
    #dists_mice /= dim_max
    
    # Stats for GAN
    mean_bias = np.round(np.mean(dists_gan), 4)
    median_bias = np.round(np.median(dists_gan), 4)
    mean_ci_95 = mean_confidence_interval(dists_gan)
    rmse = np.sqrt(np.mean(dists_gan**2))
    rrmse = np.round(rmse / dim_mean * 100, 2)
    
    bias_cis.append([mean_ci_95[1], mean_ci_95[2]])
    mean_biases.append(mean_bias)
    median_biases.append(median_bias)
    rrmses_gan.append(rrmse)
    
    # Stats for KNN
    rmse_knn = np.sqrt(np.mean(dists_knn**2))
    rrmses_knn = np.round(rmse_knn / dim_mean * 100, 2)
    
    # Stats for MICE
    rmse_mice = np.sqrt(np.mean(dists_mice**2))
    rrmses_mice = np.round(rmse_mice / dim_mean * 100, 2)
    
    print(variables[j], ' - rrmse: ', rrmse, 'median bias: %.2f' % median_bias,
      '%%, bias: %.2f (95%% CI, %.2f to %.2f)' % mean_ci_95)

  n_fig_rows = 6
  n_fig_cols = 6

  n_fig_total = n_fig_rows * n_fig_cols

  if dim > n_fig_total:
    print('Warning: not all variables plotted')

  fig, axes = plt.subplots(\
    n_fig_rows, n_fig_cols, figsize=(15,15))
  fig2, axes2 = plt.subplots(\
    n_fig_rows, n_fig_cols, figsize=(15,15))

  for j in range(0, dim):
    
    ax_title = variables[j]
    ax = axes[int(j/n_fig_cols), j % n_fig_cols]
    ax2 = axes2[int(j/n_fig_cols), j % n_fig_cols]
    ax.set_title(ax_title,fontdict={'fontsize':6})

    input_arrays = [data_x, imputed_data_x_gan, imputed_data_x_knn, imputed_data_x_mice]
    
    output_arrays = [
      np.asarray([input_arr[ii,j] for ii in range(0, no) if \
        (not np.isnan(data_x[ii,j]) and \
        data_m[ii,j] == 0)]) for input_arr in input_arrays
    ]
    
    deleted_values, imputed_values_gan, imputed_values_knn, imputed_values_mice = output_arrays
    
    # Make KDE
    low_ci, high_ci = bias_cis[j]
    xlabel = 'mean bias = %.2f (95%% CI, %.2f to %.2f)' % \
      (mean_biases[j], low_ci, high_ci)
      
    ax.set_xlabel(xlabel, fontsize=6)
    ax.set_ylabel('$p(x)$',fontsize=6)
    
    range_arrays = np.concatenate([deleted_values, imputed_values_gan])
    
    x_range = (np.min(range_arrays), 
      np.min([
        np.mean(range_arrays) + 3 * np.std(range_arrays), 
        np.max(range_arrays)
      ])
    )
    
    kde_kws = { 'shade': False, 'bw':'scott', 'clip': x_range }
    
    sns.distplot(imputed_values_gan, hist=False,
      kde_kws={**{ 'color': 'r'}, **kde_kws}, ax=ax)
    
    sns.distplot(imputed_values_knn, hist=False,
      kde_kws={**{ 'color': 'b', 'alpha': 0.5 }, **kde_kws},ax=ax)

    sns.distplot(imputed_values_mice, hist=False,
      kde_kws={**{ 'color': 'g', 'alpha': 0.5 }, **kde_kws},ax=ax)

    sns.distplot(deleted_values, hist=False,
      kde_kws={**{ 'color': '#000000'}, **kde_kws},ax=ax)

    # Make QQ plot
    qqplot(deleted_values, imputed_values_gan, ax=ax2, color='r')
    qqplot(deleted_values, imputed_values_knn, ax=ax2, color='b')
    qqplot(deleted_values, imputed_values_mice, ax=ax2, color='g')
    
  top_title = 'KDE plot of original data (black) and data imputed using GAN (red) and KNN (blue)'
  fig.suptitle(top_title, fontsize=8)
  fig.legend(labels=['GAN', 'KNN', 'MICE', 'Observed'])

  fig.tight_layout(rect=[0,0.03,0,1.25])
  fig.subplots_adjust(hspace=1, wspace=0.35)

  top_title = 'Q-Q plot of observed vs. predicted values'
  fig2.suptitle(top_title, fontsize=8)

  fig2.tight_layout(rect=[0,0.03,0,1.25])
  fig2.subplots_adjust(hspace=1, wspace=0.35)
  
  plt.show()

  print()
  mrrmse_gan = np.round(np.asarray(rrmses_gan).mean(), 2)
  print('Average RMSE (GAN): ', mrrmse_gan, '%')

  print()
  mrrmse_knn = np.round(np.asarray(rrmses_knn).mean(), 2)
  print('Average RMSE (KNN): ', mrrmse_knn, '%')

  print()
  mrrmse_mice = np.round(np.asarray(rrmses_mice).mean(), 2)
  print('Average RMSE (MICE): ', mrrmse_mice, '%')
  
  return real_miss_rate, mrrmse_gan, mrrmse_knn, mrrmse_mice
def main(iterations=NUM_ITERATIONS,
         batch_size=128,
         hint_rate=0.5,
         miss_rate=0.3):

    gain_parameters = {
        'batch_size': batch_size,
        'hint_rate': hint_rate,
        'iterations': iterations
    }

    enable_transform = False
    remove_outliers = False
    n_time_points = 3

    data_x = pickle.load(open('./missing_data.sav', 'rb'))
    data_x = data_x.transpose().astype(np.float)[:, :]

    # Remove variables with more
    no, dim = data_x.shape
    removed = 0
    for d in range(0, dim):
        if variables[d - removed] in remove_variables:
            variables.remove(variables[d - removed])
            data_x = np.delete(data_x, d - removed, axis=1)
            removed += 1

    no, dim = data_x.shape

    if len(variables) != dim:
        print(len(variables), dim)
        print('Incompatible dimensions.')
        exit()

    no_total = no * dim
    no_nan = np.count_nonzero(np.isnan(data_x.flatten()) == True)
    no_not_nan = no_total - no_nan
    n_patients = int(no / n_time_points)

    miss_data_x = np.copy(data_x)

    print('Input shape', no, 'x', dim)
    print('NAN values:', no_nan, '/', no_total, \
      '%2.f%%' % (no_nan / no_total * 100))

    # Introduce missing data
    data_m = binary_sampler(1 - miss_rate, no, dim)
    miss_data_x[data_m == 0] = np.nan

    transformer = RobustScaler()
    miss_data_x = transformer.fit_transform(miss_data_x)

    no_nan = np.count_nonzero(np.isnan(miss_data_x.flatten()) == True)
    no_not_nan = no_total - no_nan

    print('After removal, NAN values:', no_nan, '/', no_total, \
      '%2.f%%' % (no_nan / no_total * 100))

    real_miss_rate = (no_nan / no_total * 100)

    miss_data_x_gan_tmp = np.zeros((n_patients, dim * n_time_points))

    # Swap (one row per time point) to (one column per time point)
    for i in range(0, n_patients):
        for j in range(0, dim):
            for n in range(0, n_time_points):
                miss_data_x_gan_tmp[i, n * dim +
                                    j] = miss_data_x[i * n_time_points + n, j]

    imputed_data_x_gan_tmp = gain(miss_data_x_gan_tmp, gain_parameters)

    imputed_data_x_gan = np.copy(miss_data_x)

    ## Swap (one column per time point) to (one row per time point)
    for i in range(0, n_patients):
        for j in range(0, dim):
            for n in range(0, n_time_points):
                imputed_data_x_gan[i * n_time_points + n,
                                   j] = imputed_data_x_gan_tmp[i, n * dim + j]

    imputer = KNNImputer(n_neighbors=5)
    imputed_data_x_knn = imputer.fit_transform(miss_data_x)

    imputer = IterativeImputer(verbose=True)
    imputed_data_x_mice = imputer.fit_transform(miss_data_x)

    imputed_data_x_gan = transformer.inverse_transform(imputed_data_x_gan)
    imputed_data_x_knn = transformer.inverse_transform(imputed_data_x_knn)
    imputed_data_x_mice = transformer.inverse_transform(imputed_data_x_mice)

    # Save imputed data to disk
    pickle.dump(imputed_data_x_gan, open('./filled_data.sav', 'wb'))

    # Get residuals for computation of stats
    distances_gan = np.zeros((dim, n_time_points * n_patients))
    distances_knn = np.zeros((dim, n_time_points * n_patients))
    distances_mice = np.zeros((dim, n_time_points * n_patients))
    distributions = {'deleted': [], 'gan': [], 'knn': [], 'mice': []}

    from scipy.stats import iqr

    for j in range(0, dim):

        nn_values = data_x[:, j].flatten()
        nn_values = nn_values[~np.isnan(nn_values)]

        dim_iqr = np.mean(nn_values)  # iqr(nn_values)

        for i in range(0, n_patients):
            variable_name = variables[j]
            i_start = int(i * n_time_points)
            i_stop = int(i * n_time_points + n_time_points)

            original_tuple = data_x[i_start:i_stop, j]
            corrupted_tuple = miss_data_x[i_start:i_stop, j]
            imputed_tuple_gan = imputed_data_x_gan[i_start:i_stop, j]
            imputed_tuple_knn = imputed_data_x_knn[i_start:i_stop, j]
            imputed_tuple_mice = imputed_data_x_mice[i_start:i_stop, j]

            #if i == 1 or i == 2:
            #  print(original_tuple, corrupted_tuple, imputed_tuple_gan, imputed_tuple_knn)

            for k in range(0, n_time_points):
                a, b, c, d = original_tuple[k], imputed_tuple_gan[k], \
                             imputed_tuple_knn[k], imputed_tuple_mice[k]
                if np.isnan(a) or data_m[i_start + k, j] != 0: continue
                #if i % 10 == 0: print(variable_name, a,b,c,d, b-a)
                distances_gan[j, i * k] = (b - a)
                distances_knn[j, i * k] = (c - a)
                distances_mice[j, i * k] = (d - a)

    # Compute distance statistics
    all_stats = {}

    for j in range(0, dim):

        print('%d. Imputed variable: %s' % (j, variables[j]))

        current_stats = {'gan': {}, 'knn': {}, 'mice': {}}  # make a copy

        # Stats for original data
        dim_mean = np.mean([x for x in data_x[:, j] if not np.isnan(x)])
        dim_max = np.max([x for x in data_x[:, j] if not np.isnan(x)])
        dim_iqr = iqr([x for x in data_x[:, j] if not np.isnan(x)])

        # Indices for removed data
        ind = (data_m[:, j]
               == 0).flatten() & (~np.isnan(data_x[:, j])).flatten()

        # Stats for GAN
        current_stats['gan']['bias'] = np.mean(distances_gan[j])
        current_stats['gan']['rmse'] = np.sqrt(np.mean(distances_gan[j]**2))
        current_stats['gan']['nrmse'] = current_stats['gan']['rmse'] / dim_iqr
        current_stats['gan']['mape'] = np.mean(np.abs(distances_gan[j]))
        current_stats['gan']['wd'] = wasserstein_distance(
            data_x[ind, j].flatten(), imputed_data_x_gan[ind, j].flatten())

        # Stats for KNN
        current_stats['knn']['bias'] = np.mean(distances_knn[j])
        current_stats['knn']['rmse'] = np.sqrt(np.mean(distances_knn[j]**2))
        current_stats['knn']['nrmse'] = current_stats['knn']['rmse'] / dim_iqr
        current_stats['knn']['mape'] = np.mean(np.abs(distances_knn[j]))
        current_stats['knn']['wd'] = wasserstein_distance(
            data_x[ind, j].flatten(), imputed_data_x_knn[ind, j].flatten())

        # Stats for MICE
        current_stats['mice']['bias'] = np.mean(distances_mice[j])
        current_stats['mice']['rmse'] = np.sqrt(np.mean(distances_mice[j]**2))
        current_stats['mice'][
            'nrmse'] = current_stats['mice']['rmse'] / dim_iqr
        current_stats['mice']['mape'] = np.mean(np.abs(distances_mice[j]))
        current_stats['mice']['wd'] = wasserstein_distance(
            data_x[ind, j].flatten(), imputed_data_x_mice[ind, j].flatten())

        for model_name in current_stats:
            model = current_stats[model_name]
            print('... %s - bias: %.3f, RMSE: %.3f, ME: %.3f, WD: %.3f' % \
              (model_name, model['bias'], model['rmse'], model['mape'], model['wd']))

        all_stats[variables[j]] = dict(current_stats)

        print()

    n_fig_rows, n_fig_cols = 6, 6
    n_fig_total = n_fig_rows * n_fig_cols

    if dim > n_fig_total: print('Warning: not all variables plotted')

    all_fig_axes = [
        plt.subplots(n_fig_rows, n_fig_cols, figsize=(15, 15))
        for _ in range(0, 3)
    ]

    for j in range(0, dim):

        dim_not_nan = np.count_nonzero(~np.isnan(data_x[:, j]))
        deleted_no = np.count_nonzero(
            np.isnan(miss_data_x[:, j]) & ~np.isnan(data_x[:, j]))
        ax_title = variables[j] + (' (%d of %d observed)' %
                                   (deleted_no, dim_not_nan))

        dim_axes = [
            fig_axes[1][int(j / n_fig_cols), j % n_fig_cols]
            for fig_axes in all_fig_axes
        ]

        [
            ax.set_title(ax_title,
                         fontdict={
                             'fontsize': 7,
                             'fontweight': 'bold'
                         }) for ax in dim_axes
        ]

        input_arrays = [
            data_x, imputed_data_x_gan, imputed_data_x_knn, imputed_data_x_mice
        ]

        output_arrays = [
          np.asarray([input_arr[ii,j] for ii in range(0, no) if \
            (not np.isnan(data_x[ii,j]) and \
            data_m[ii,j] == 0)]) for input_arr in input_arrays
        ]

        deleted_values, imputed_values_gan, imputed_values_knn, imputed_values_mice = output_arrays

        plot_distribution_densities(output_arrays, all_stats, variables[j],
                                    dim_axes[0])
        plot_distribution_residuals(output_arrays, dim_axes[1])
        plot_distribution_summaries(output_arrays, dim_axes[2])

        # Make QQ plot of original and deleted values vs. normal distribution
        #dist_max = np.max(np.concatenate([imputed_values_gan, deleted_values]))
        #qqplot_1sample((data_x[~np.isnan(data_x[:,j]),j] - dist_min) / dist_max, ax=ax3, color='b')
        #qqplot_1sample((data_x[data_m[:,j] == 0,j] - dist_min) / dist_max, ax=ax3, color='r',draw_line=False)

    # Figure 1
    fig1 = all_fig_axes[0][0]
    top_title = 'Kernel density estimation for erased and predicted values, for each imputation method'
    fig1.suptitle(top_title, fontsize=8)

    fig1.tight_layout(rect=[0, 0.03, 0, 1.25])
    fig1.subplots_adjust(hspace=1, wspace=0.35)

    # Figure 2
    fig2 = all_fig_axes[1][0]
    top_title = 'Q-Q plot of erased vs. imputed values, for each imputation method'
    fig2.suptitle(top_title, fontsize=8)

    fig2.tight_layout(rect=[0, 0.03, 0, 1.25])
    fig2.subplots_adjust(hspace=1, wspace=0.35)

    # Figure 3
    fig3 = all_fig_axes[2][0]
    top_title = 'Bayesian confidence intervals for the mean and standard deviation, for erased values and imputed values'
    fig3.suptitle(top_title, fontsize=8)

    fig3.tight_layout(rect=[0, 0.03, 0, 1.25])
    fig3.subplots_adjust(hspace=1, wspace=0.35)

    # Figure 4
    fig5, ax5 = plt.subplots(1, 1)
    top_title = 'Distribution of normalized RMSEs for each imputation method'
    fig5.suptitle(top_title, fontsize=8)
    plot_error_distributions(all_stats, fig5, ax5)
    ax5.set_ylabel('Probability density', fontsize=6)
    ax5.set_xlabel('NRMSE (normalized to IQR)', fontsize=6)
    ax5.legend(fontsize=6)
    fig5.tight_layout(rect=[0, 0.03, 0, 1.25])
    fig5.subplots_adjust(hspace=1, wspace=0.35)

    plt.show()

    for model_name in ['gan', 'knn', 'mice']:
        wds = [
            all_stats[variable_name][model_name]['wd']
            for variable_name in all_stats
        ]
        nrmses = [
            all_stats[variable_name][model_name]['nrmse']
            for variable_name in all_stats
        ]
        mwd = np.round(np.asarray(wds).mean(), 2)
        mnrmse = np.round(np.asarray(nrmses).mean(), 2)
        print('Model: %s - average WD = %.2f, average NRMSE = %.2f ' %
              (model_name, mwd, mnrmse))

    return all_stats
Example #5
0
def gain(data_x, gain_parameters):
    '''Impute missing values in data_x
  
  Args:
    - data_x: original data with missing values
    - gain_parameters: GAIN network parameters:
      - batch_size: Batch size
      - hint_rate: Hint rate
      - alpha: Hyperparameter
      - iterations: Iterations
      
  Returns:
    - imputed_data: imputed data
  '''
    # Define mask matrix
    data_m = 1 - np.isnan(data_x)

    # System parameters
    batch_size = gain_parameters['batch_size']
    hint_rate = gain_parameters['hint_rate']
    alpha = 10  # gain_parameters['alpha']
    iterations = gain_parameters['iterations']

    # Other parameters
    no, dim = data_x.shape

    # Hidden state dimensions
    h_dim = int(dim)

    # Normalization
    norm_data, norm_parameters = normalization(data_x)
    norm_data_x = np.nan_to_num(norm_data, 0)

    ## GAIN architecture
    # Input placeholders
    # Data vector
    X = tf.placeholder(tf.float32, shape=[None, dim])
    # Mask vector
    M = tf.placeholder(tf.float32, shape=[None, dim])
    # Hint vector
    H = tf.placeholder(tf.float32, shape=[None, dim])

    # Discriminator variables
    D_W1 = tf.Variable(xavier_init([dim * 2, h_dim]))  # Data + Hint as inputs
    D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

    D_W2 = tf.Variable(xavier_init([h_dim, h_dim]))
    D_b2 = tf.Variable(tf.zeros(shape=[h_dim]))

    D_W3 = tf.Variable(xavier_init([h_dim, dim]))
    D_b3 = tf.Variable(tf.zeros(shape=[dim]))  # Multi-variate outputs

    theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3]

    #Generator variables
    # Data + Mask as inputs (Random noise is in missing components)
    G_W1 = tf.Variable(xavier_init([dim * 2, h_dim]))
    G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

    G_W2 = tf.Variable(xavier_init([h_dim, h_dim]))
    G_b2 = tf.Variable(tf.zeros(shape=[h_dim]))

    G_W3 = tf.Variable(xavier_init([h_dim, dim]))
    G_b3 = tf.Variable(tf.zeros(shape=[dim]))

    theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3]

    ## GAIN functions
    # Generator
    def generator(x, m):
        # Concatenate Mask and Data
        inputs = tf.concat(values=[x, m], axis=1)
        G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1)
        G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2)
        # MinMax normalized output
        G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3)
        return G_prob

    # Discriminator
    def discriminator(x, h):
        # Concatenate Data and Hint
        inputs = tf.concat(values=[x, h], axis=1)
        D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1)
        D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2)
        D_logit = tf.matmul(D_h2, D_W3) + D_b3
        D_prob = tf.nn.sigmoid(D_logit)
        return D_prob

    ## GAIN structure
    # Generator
    G_sample = generator(X, M)

    # Combine with observed data
    Hat_X = X * M + G_sample * (1 - M)

    # Discriminator
    D_prob = discriminator(Hat_X, H)

    ## GAIN loss
    D_loss_temp = -tf.reduce_mean(M * tf.log(D_prob + 1e-8) \
                                  + (1-M) * tf.log(1. - D_prob + 1e-8))

    G_loss_temp = -tf.reduce_mean((1 - M) * tf.log(D_prob + 1e-8))

    MSE_loss = \
    tf.reduce_mean((M * X - M * G_sample)**2) / tf.reduce_mean(M)

    D_loss = D_loss_temp
    G_loss = G_loss_temp + alpha * MSE_loss

    ## GAIN solver
    D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D)
    G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G)

    ## Iterations
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    # Start Iterations
    for it in tqdm(range(iterations)):

        # Sample batch
        batch_idx = sample_batch_index(no, batch_size)
        X_mb = norm_data_x[batch_idx, :]
        M_mb = data_m[batch_idx, :]
        # Sample random vectors
        Z_mb = uniform_sampler(0, 0.01, batch_size, dim, True)
        # Sample hint vectors
        H_mb_temp = binary_sampler(hint_rate, batch_size, dim)
        H_mb = M_mb * H_mb_temp

        # Combine random vectors with observed vectors
        X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb

        _, D_loss_curr = sess.run([D_solver, D_loss_temp],
                                  feed_dict={
                                      M: M_mb,
                                      X: X_mb,
                                      H: H_mb
                                  })
        _, G_loss_curr, MSE_loss_curr = \
        sess.run([G_solver, G_loss_temp, MSE_loss],
                 feed_dict = {X: X_mb, M: M_mb, H: H_mb})

    ## Return imputed data
    Z_mb = uniform_sampler(0, 0.01, no, dim, True)
    M_mb = data_m
    X_mb = norm_data_x
    X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb

    imputed_data = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0]

    imputed_data = data_m * norm_data_x + (1 - data_m) * imputed_data

    # Renormalization
    imputed_data = renormalization(imputed_data, norm_parameters)

    # Rounding
    imputed_data = rounding(imputed_data, data_x)

    return imputed_data