def main_NeuMF(hyper_params, gpu_id=None): from pytorch_models.NeuMF import GMF, MLP, NeuMF from data import load_data from eval import evaluate, eval_ranking from utils import load_user_item_counts, is_cuda_available from utils import xavier_init, log_end_epoch from loss import MSELoss import torch user_count, item_count = load_user_item_counts(hyper_params) train_reader, test_reader, val_reader, hyper_params = load_data( hyper_params) start_time = time.time() initial_path = hyper_params['model_path'] # Pre-Training the GMF Model hyper_params['model_path'] = initial_path + "_gmf" gmf_model = GMF(hyper_params) if is_cuda_available: gmf_model = gmf_model.cuda() xavier_init(gmf_model) gmf_model = train_complete(hyper_params, GMF, train_reader, val_reader, user_count, item_count, gmf_model) # Pre-Training the MLP Model hyper_params['model_path'] = initial_path + "_mlp" mlp_model = MLP(hyper_params) if is_cuda_available: mlp_model = mlp_model.cuda() xavier_init(mlp_model) mlp_model = train_complete(hyper_params, MLP, train_reader, val_reader, user_count, item_count, mlp_model) # Training the final NeuMF Model hyper_params['model_path'] = initial_path model = NeuMF(hyper_params) if is_cuda_available: model = model.cuda() model.init(gmf_model, mlp_model) model = train_complete(hyper_params, NeuMF, train_reader, val_reader, user_count, item_count, model) # Evaluating the final model for MSE on test-set criterion = MSELoss(hyper_params) metrics, user_count_mse_map, item_count_mse_map = evaluate(model, criterion, test_reader, hyper_params, user_count, item_count, review=False) # Evaluating the final model for HR@1 on test-set metrics.update(eval_ranking(model, test_reader, hyper_params, review=False)) log_end_epoch(hyper_params, metrics, 'final', (time.time() - start_time), metrics_on='(TEST)') return metrics, user_count_mse_map, item_count_mse_map
def _create_variables(self): G_W1 = tf.Variable( xavier_init(self.noise_dim + self.label_dim, self.G_hidden_layer)) G_b1 = tf.Variable( tf.zeros(shape=[self.G_hidden_layer], dtype=tf.float64)) G_W2 = tf.Variable(xavier_init(self.G_hidden_layer, self.input_dim)) G_b2 = tf.Variable(tf.zeros(shape=[self.input_dim], dtype=tf.float64)) theta_G = {'G_W1': G_W1, 'G_b1': G_b1, 'G_W2': G_W2, 'G_b2': G_b2} D_W1 = tf.Variable(xavier_init(self.input_dim, self.D_hidden_layer)) D_b1 = tf.Variable( tf.zeros(shape=[self.D_hidden_layer], dtype=tf.float64)) D_W2_gan = tf.Variable(xavier_init(self.D_hidden_layer, 1)) D_b2_gan = tf.Variable(tf.zeros(shape=[1], dtype=tf.float64)) D_W2_aux = tf.Variable(xavier_init(self.D_hidden_layer, self.label_dim)) D_b2_aux = tf.Variable( tf.zeros(shape=[self.label_dim], dtype=tf.float64)) theta_D = { 'D_W1': D_W1, 'D_b1': D_b1, 'D_W2_gan': D_W2_gan, 'D_b2_gan': D_b2_gan, 'D_W2_aux': D_W2_aux, 'D_b2_aux': D_b2_aux } return theta_G, theta_D
def generator(hparams, z, scope_name, reuse): with tf.variable_scope(scope_name) as scope: if reuse: scope.reuse_variables() w1 = tf.get_variable('w1', initializer=utils.xavier_init( hparams.n_z, hparams.n_hidden_gener_1)) b1 = tf.get_variable('b1', initializer=tf.zeros([hparams.n_hidden_gener_1], dtype=tf.float32)) hidden1 = hparams.transfer_fct(tf.matmul(z, w1) + b1) w2 = tf.get_variable('w2', initializer=utils.xavier_init( hparams.n_hidden_gener_1, hparams.n_hidden_gener_2)) b2 = tf.get_variable('b2', initializer=tf.zeros([hparams.n_hidden_gener_2], dtype=tf.float32)) hidden2 = hparams.transfer_fct(tf.matmul(hidden1, w2) + b2) w3 = tf.get_variable('w3', initializer=utils.xavier_init( hparams.n_hidden_gener_2, hparams.n_input)) b3 = tf.get_variable('b3', initializer=tf.zeros([hparams.n_input], dtype=tf.float32)) logits = tf.matmul(hidden2, w3) + b3 x_reconstr_mean = tf.nn.sigmoid(logits) return logits, x_reconstr_mean, b3
def _create_variables(self, n_features): ''' Create the TensorFlow variables for the model. Parameters ---------- n_features : number of features, int Returns ------- tuple ( weights( shape(n_features, n_components)), hidden bias( shape(n_components)), visible bias( shape(n_features))) ''' if self.W_: W_ = tf.Variable(self.W_, name='enc-w') else: W_ = tf.Variable(utils.xavier_init(n_features, self.n_components, self.xavier_init), name='enc-w') #self.W_ = tf.Variable(tf.trucated_normal(shape=[n_features, self.n_components], stdddev=0.1), name='enc-w') if self.bh_: bh_ = tf.Variable(self.bh_, name='hidden-bias') else: bh_ = tf.Variable(tf.constant(0.1, shape=[self.n_components]), name='hidden-bias') if self.bv_: bv_ = tf.Variable(self.bv_, name='visible-bias') else: bv_ = tf.Variable(tf.constant(0.1, shape=[n_features]), name='visible-bias') return W_, bh_, bv_
def discriminator(x, activation_fn, reuse=None, scope=None): """ Model function of 1D GAN discriminator """ # Convolutional layers conv = tf.layers.conv1d(inputs=x, filters=2 * CAPACITY, kernel_size=4, strides=2, activation=activation_fn, kernel_initializer=utils.xavier_init('relu'), padding='valid', name='conv_1', reuse=reuse) conv = tf.layers.conv1d(inputs=conv, filters=4 * CAPACITY, kernel_size=4, strides=2, activation=activation_fn, kernel_initializer=utils.xavier_init('relu'), padding='valid', name='conv_2', reuse=reuse) conv = tf.layers.conv1d(inputs=conv, filters=8 * CAPACITY, kernel_size=4, strides=2, activation=activation_fn, kernel_initializer=utils.xavier_init('relu'), padding='valid', name='conv_3', reuse=reuse) conv = tf.reshape( conv, shape=[-1, np.prod([dim.value for dim in conv.shape[1:]])]) # Dense layers dense = tf.layers.dense(inputs=conv, units=1024, activation=activation_fn, name='dense_1', kernel_initializer=utils.xavier_init(), reuse=reuse) return tf.layers.dense(inputs=dense, units=1, activation=tf.nn.sigmoid, name='dense_2', reuse=reuse, kernel_initializer=utils.xavier_init())
def _create_variables(self): fsize = self.feature_size weights = dict() for (i, hsize) in enumerate(self.layer_sizes): weights['w%d' % i] = tf.Variable(xavier_init(fsize, hsize, 4)) weights['b%d' % i] = tf.Variable( tf.zeros(shape=[hsize], dtype=tf.float64)) fsize = hsize return weights
def generator_i(hparams, z, scope_name, reuse, i, relative=True): with tf.variable_scope(scope_name) as scope: if reuse: scope.reuse_variables() try: W1 = tf.get_variable('W1', initializer=utils.xavier_init( hparams.grid[-1], hparams.n_hidden_gener_1)) except ValueError: scope.reuse_variables() W1 = tf.get_variable('W1', initializer=utils.xavier_init( hparams.grid[-1], hparams.n_hidden_gener_1)) w1 = slicer_dec(hparams, i, W1, None, relative) # hparams.track.append(w1) b1 = tf.get_variable('b1', initializer=tf.zeros([hparams.n_hidden_gener_1], dtype=tf.float32)) hidden1 = hparams.transfer_fct(tf.matmul(z, w1) + b1) w2 = tf.get_variable('w2', initializer=utils.xavier_init( hparams.n_hidden_gener_1, hparams.n_hidden_gener_2)) b2 = tf.get_variable('b2', initializer=tf.zeros([hparams.n_hidden_gener_2], dtype=tf.float32)) hidden2 = hparams.transfer_fct(tf.matmul(hidden1, w2) + b2) w3 = tf.get_variable('w3', initializer=utils.xavier_init( hparams.n_hidden_gener_2, hparams.n_input)) b3 = tf.get_variable('b3', initializer=tf.zeros([hparams.n_input], dtype=tf.float32)) logits = tf.matmul(hidden2, w3) + b3 x_reconstr_mean = tf.nn.sigmoid(logits) return logits, x_reconstr_mean, b3
def _create_variables(self): G_W1 = tf.Variable(xavier_init(self.noise_dim, self.G_hidden_layer[0])) G_b1 = tf.Variable(tf.zeros(shape=[self.G_hidden_layer[0]], dtype=tf.float64)) G_W2 = tf.Variable(xavier_init(self.G_hidden_layer[0], self.G_hidden_layer[1])) G_b2 = tf.Variable(tf.zeros(shape=[self.G_hidden_layer[1]], dtype=tf.float64)) G_W3 = tf.Variable(xavier_init(self.G_hidden_layer[1], self.input_dim)) G_b3 = tf.Variable(tf.zeros(shape=[self.input_dim], dtype=tf.float64)) theta_G = {'G_W1': G_W1, 'G_b1': G_b1, 'G_W2': G_W2, 'G_b2': G_b2, 'G_W3': G_W3, 'G_b3': G_b3} D_W1 = tf.Variable(xavier_init(self.input_dim, self.D_hidden_layer[0])) D_b1 = tf.Variable(tf.zeros(shape=[self.D_hidden_layer[0]], dtype=tf.float64)) D_W2 = tf.Variable(xavier_init(self.D_hidden_layer[0], self.D_hidden_layer[1])) D_b2 = tf.Variable(tf.zeros(shape=[self.D_hidden_layer[1]], dtype=tf.float64)) D_W3 = tf.Variable(xavier_init(self.D_hidden_layer[1], 1)) D_b3 = tf.Variable(tf.zeros(shape=[1], dtype=tf.float64)) theta_D = {'D_W1': D_W1, 'D_b1': D_b1, 'D_W2': D_W2, 'D_b2': D_b2, 'D_W3': D_W3, 'D_b3': D_b3} return theta_G, theta_D
def encoder(hparams, x_ph, scope_name, reuse): with tf.variable_scope(scope_name) as scope: if reuse: scope.reuse_variables() w1 = tf.get_variable('w1', initializer=utils.xavier_init( hparams.n_input, hparams.n_hidden_recog_1)) b1 = tf.get_variable('b1', initializer=tf.zeros([hparams.n_hidden_recog_1], dtype=tf.float32)) hidden1 = hparams.transfer_fct(tf.matmul(x_ph, w1) + b1) w2 = tf.get_variable('w2', initializer=utils.xavier_init( hparams.n_hidden_recog_1, hparams.n_hidden_recog_2)) b2 = tf.get_variable('b2', initializer=tf.zeros([hparams.n_hidden_recog_2], dtype=tf.float32)) hidden2 = hparams.transfer_fct(tf.matmul(hidden1, w2) + b2) w3 = tf.get_variable('w3', initializer=utils.xavier_init( hparams.n_hidden_recog_2, hparams.n_z)) b3 = tf.get_variable('b3', initializer=tf.zeros([hparams.n_z], dtype=tf.float32)) z_mean = tf.matmul(hidden2, w3) + b3 w4 = tf.get_variable('w4', initializer=utils.xavier_init( hparams.n_hidden_recog_2, hparams.n_z)) b4 = tf.get_variable('b4', initializer=tf.zeros([hparams.n_z], dtype=tf.float32)) z_log_sigma_sq = tf.matmul(hidden2, w4) + b4 return z_mean, z_log_sigma_sq
def _encoder_(hparams, x_ph, scope_name, reuse): with tf.variable_scope(scope_name) as scope: if reuse: scope.reuse_variables() w1 = tf.get_variable('w1', initializer=utils.xavier_init( hparams.n_input, hparams.n_hidden_recog_1)) b1 = tf.get_variable('b1', initializer=tf.zeros([hparams.n_hidden_recog_1], dtype=tf.float32)) hidden1 = hparams.transfer_fct(tf.matmul(x_ph, w1) + b1) w2 = tf.get_variable('w2', initializer=utils.xavier_init( hparams.n_hidden_recog_1, hparams.n_hidden_recog_2)) b2 = tf.get_variable('b2', initializer=tf.zeros([hparams.n_hidden_recog_2], dtype=tf.float32)) hidden2 = hparams.transfer_fct(tf.matmul(hidden1, w2) + b2) W3 = tf.get_variable('W3', initializer=utils.xavier_init( hparams.n_hidden_recog_2, hparams.grid[-1])) B3 = tf.get_variable('B3', initializer=tf.zeros([hparams.grid[-1]], dtype=tf.float32)) z_mean = tf.matmul(hidden2, W3) + B3 W4 = tf.get_variable('W4', initializer=utils.xavier_init( hparams.n_hidden_recog_2, hparams.grid[-1])) B4 = tf.get_variable('B4', initializer=tf.zeros([hparams.grid[-1]], dtype=tf.float32)) z_log_sigma_sq = tf.matmul(hidden2, W4) + B4 return z_mean, z_log_sigma_sq
def _create_variables(self, n_features): """ Create the TensorFlow variables for the model. :return: tuple(weights(shape(n_features, n_components)), hidden bias(shape(n_components)), visible bias(shape(n_features))) """ W_ = tf.Variable(utils.xavier_init(n_features, self.n_components, self.xavier_init), name='enc-w') bh_ = tf.Variable(tf.zeros([self.n_components]), name='hidden-bias') bv_ = tf.Variable(tf.zeros([n_features]), name='visible-bias') return W_, bh_, bv_
def PC_GAIN (incomplete_data_x , gain_parameters , data_m): '''Impute missing values in incomplete_data_x Args: - incomplete_data_x: original data with missing values - gain_parameters: PC_GAIN network parameters: - batch_size: Batch size,64 - hint_rate: Hint rate,0.9 - alpha: Hyperparameter,200 - beta: Hyperparameter,20 - lambda_: Hyperparameter,0.2 - k: Hyperparameter,4 - iterations: Iterations,10000 Returns: - imputed_data: imputed data ''' # System parameters batch_size = gain_parameters['batch_size'] hint_rate = gain_parameters['hint_rate'] alpha = gain_parameters['alpha'] beta = gain_parameters['beta'] lambda_ = gain_parameters['lambda_'] k = gain_parameters['k'] iterations = gain_parameters['iterations'] cluster_species = gain_parameters['cluster_species'] # Other parameters no, dim = incomplete_data_x.shape # Hidden state dimensions h_dim = int(dim) # Normalization norm_data , norm_parameters = normalization(incomplete_data_x) norm_data_x = np.nan_to_num(norm_data, 0) ## PC_GAIN architecture X = tf.placeholder(tf.float32, shape = [None, dim]) M = tf.placeholder(tf.float32, shape = [None, dim]) H = tf.placeholder(tf.float32, shape = [None, dim]) Z = tf.placeholder(tf.float32, shape = [None, dim]) Y = tf.placeholder(tf.float32, shape = [None, k]) # Discriminator variables D_W1 = tf.Variable(xavier_init([dim*2, h_dim])) # Data + Hint as inputs D_b1 = tf.Variable(tf.zeros(shape = [h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, h_dim])) D_b2 = tf.Variable(tf.zeros(shape = [h_dim])) D_W3 = tf.Variable(xavier_init([h_dim, dim])) D_b3 = tf.Variable(tf.zeros(shape = [dim])) # Multi-variate outputs theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3] #Generator variables # Data + Mask as inputs (Random noise is in missing components) G_W1 = tf.Variable(xavier_init([dim*2, h_dim])) G_b1 = tf.Variable(tf.zeros(shape = [h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, h_dim])) G_b2 = tf.Variable(tf.zeros(shape = [h_dim])) G_W3 = tf.Variable(xavier_init([h_dim, dim])) G_b3 = tf.Variable(tf.zeros(shape = [dim])) theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3] C_W1 = tf.Variable(xavier_init([dim, h_dim])) C_b1 = tf.Variable(tf.zeros(shape = [h_dim])) C_W2 = tf.Variable(xavier_init([h_dim, h_dim])) C_b2 = tf.Variable(tf.zeros(shape = [h_dim])) C_W3 = tf.Variable(xavier_init([h_dim, k])) C_b3 = tf.Variable(tf.zeros(shape = [k])) # 分类器 theta_C = [C_W1, C_b1, C_W2, C_b2, C_W3, C_b3] ## PC_GAIN functions # Generator def generator(x,m): # Concatenate Mask and Data inputs = tf.concat(values = [x, m], axis = 1) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2) # MinMax normalized output G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3) return G_prob # Discriminator def discriminator(x, h): # Concatenate Data and Hint inputs = tf.concat(values = [x, h], axis = 1) D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2) D_logit = tf.matmul(D_h2, D_W3) + D_b3 D_prob = tf.nn.sigmoid(D_logit) return D_prob, D_logit # Classer (neural network classifier mentioned in the paper) def classer(feature): C_h1 = tf.nn.relu(tf.matmul(feature, C_W1) + C_b1) C_h2 = tf.nn.relu(tf.matmul(C_h1, C_W2) + C_b2) C_h3 = tf.matmul(C_h2, C_W3) + C_b3 C_prob = tf.nn.softmax(C_h3) return C_prob ## PC_GAIN structure # Generator G_sample = generator(X, M) # Combine with observed data Hat_X = X * M + G_sample * (1-M) # Discriminator D_prob, D_logit = discriminator(Hat_X, H) ## PC_GAIN loss D_loss_temp = -tf.reduce_mean(M * tf.log(D_prob + 1e-8) + (1-M) * tf.log(1. - D_prob + 1e-8)) G_loss_temp = -tf.reduce_mean((1-M) * tf.log(D_prob + 1e-8)) G_loss_with_C = -tf.reduce_mean(Y * tf.log(Y + 1e-8)) MSE_loss = tf.reduce_mean((M * X - M * G_sample) * (M * X - M * G_sample)) / tf.reduce_mean(M) D_loss = D_loss_temp G_loss_pre = G_loss_temp + alpha * MSE_loss G_loss = G_loss_temp + alpha * MSE_loss + beta * G_loss_with_C ## PC_GAIN solver D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D) G_solver_pre = tf.train.AdamOptimizer().minimize(G_loss_pre, var_list=theta_G) G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G) ## Iterations init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) ##Select pre-training data loss_rate = [] for i in range(no): index = 0 for j in range(dim): if data_m[i,j] == 0: index = index + 1 loss_rate.append([index , i]) loss_rate = sorted(loss_rate,key=(lambda x:x[0])) no_x_L = int(no * lambda_) index_x_L = [] for i in range(no_x_L): index_x_L.append(loss_rate[i][1]) norm_data_x_L = norm_data_x[index_x_L, :] data_m_L = data_m[index_x_L, :] ##Pre-training print('...Pre-training') for it in tqdm(range(int(iterations * 0.7))): batch_idx = sample_batch_index(no_x_L, batch_size) X_mb = norm_data_x_L[batch_idx, :] M_mb = data_m_L[batch_idx, :] Z_mb = uniform_sampler(0, 0.01, batch_size, dim) H_mb_temp = binary_sampler(hint_rate, batch_size, dim) H_mb = M_mb * H_mb_temp X_mb = M_mb * X_mb + (1-M_mb) * Z_mb _, D_loss_curr, D_logit_curr, D_prob_curr = sess.run([D_solver, D_loss_temp, D_logit, D_prob], feed_dict = {M: M_mb, X: X_mb, H:H_mb}) _, G_loss_curr, MSE_loss_curr = sess.run([G_solver_pre, G_loss_temp, MSE_loss], feed_dict = {X: X_mb, M: M_mb, H:H_mb}) Z_mb = uniform_sampler(0, 0.01, no_x_L, dim) M_mb = data_m_L X_mb = norm_data_x_L X_mb = M_mb * X_mb + (1-M_mb) * Z_mb imputed_data_L = sess.run([G_sample], feed_dict = {X: X_mb, M: M_mb})[0] imputed_data_L = data_m_L * norm_data_x_L + (1 - data_m_L) * imputed_data_L ## Select different clustering methods if cluster_species == 'KM': data_c , data_class = KM(imputed_data_L, k) elif cluster_species == 'SC': data_c , data_class = SC(imputed_data_L, k) elif cluster_species == 'AC': data_c , data_class = AC(imputed_data_L, k) elif cluster_species == 'KMPP': data_c , data_class = KMPP(imputed_data_L, k) else: exit('have not this cluster methods') ## Pseudo-label training multi-classification SVM ## You can also choose other classifiers, ## such as the neural network classifier mentioned in the paper coder = preprocessing.OneHotEncoder() model = svm.SVC(kernel="linear", decision_function_shape="ovo") coder.fit(data_class.reshape(-1,1)) model.fit(imputed_data_L, data_class) ## Updata the generator G and the discriminator D ## To avoid the effects of pre-training, ## you can also choose to reinitialize the generator parameters for it in tqdm(range(iterations)): batch_idx = sample_batch_index(no, batch_size) X_mb = norm_data_x[batch_idx, :] M_mb = data_m[batch_idx, :] Z_mb = uniform_sampler(0, 0.01, batch_size, dim) H_mb_temp = binary_sampler(hint_rate, batch_size, dim) H_mb = M_mb * H_mb_temp X_mb = M_mb * X_mb + (1-M_mb) * Z_mb _, D_loss_curr, D_logit_curr, D_prob_curr = sess.run([D_solver, D_loss_temp, D_logit, D_prob], feed_dict = {M: M_mb, X: X_mb, H:H_mb}) ## Introducing pseudo label supervision Hat_X_curr = sess.run(Hat_X, feed_dict = {X: X_mb, M: M_mb, H:H_mb}) y_pred = model.predict(Hat_X_curr) sample_prob = coder.transform(y_pred.reshape(-1,1)).toarray() _, G_loss_curr, MSE_loss_curr , G_loss_with_C_curr = sess.run([G_solver, G_loss_temp, MSE_loss, G_loss_with_C], feed_dict = {X: X_mb, M: M_mb, H:H_mb , Y:sample_prob}) ## Return imputed data Z_mb = uniform_sampler(0, 0.01, no, dim) M_mb = data_m X_mb = norm_data_x X_mb = M_mb * X_mb + (1-M_mb) * Z_mb imputed_data = sess.run([G_sample], feed_dict = {X: X_mb, M: M_mb})[0] imputed_data = data_m * norm_data_x + (1-data_m) * imputed_data imputed_data = renormalization(imputed_data, norm_parameters) return imputed_data
def generator(z, window, num_channels, training=False, reuse=None): """ Model function of 1D GAN generator """ # Find dense feature vector size according to generated window size and convolution strides (note that if you change convolution padding or the number of convolution layers, you will have to change this value too) stride = 2 kernel_size = 4 activation_fn = tf.nn.leaky_relu # We find the dimension of output after 4 convolutions on 1D window def get_upconv_output_dim(in_dim): return (in_dim - kernel_size ) // stride + 1 # Transposed convolution with VALID padding dense_window_size = get_upconv_output_dim( get_upconv_output_dim( get_upconv_output_dim(get_upconv_output_dim(window)))) reuse_batchnorm = reuse # Fully connected layers dense = tf.layers.dense(inputs=z, units=1024, name='dense1', kernel_initializer=utils.xavier_init('relu'), activation=activation_fn, reuse=reuse) dense = tf.layers.dense(inputs=dense, units=dense_window_size * 8 * CAPACITY, name='dense2', kernel_initializer=utils.xavier_init('relu'), reuse=reuse) dense = activation_fn( tf.layers.batch_normalization(dense, name='dense2_bn', training=training, reuse=reuse_batchnorm)) dense = tf.reshape(dense, shape=[-1, dense_window_size, 1, 8 * CAPACITY]) # Deconvolution layers (We use tf.nn.conv2d_transpose as there is no implementation of conv1d_transpose in tensorflow for now) upconv = tf.layers.conv2d_transpose( inputs=dense, filters=8 * CAPACITY, kernel_size=(kernel_size, 1), strides=(stride, 1), padding='valid', name='upconv0', kernel_initializer=utils.xavier_init('relu'), reuse=reuse) upconv = activation_fn( tf.layers.batch_normalization(upconv, name='upconv0_bn', training=training, reuse=reuse_batchnorm)) upconv = tf.layers.conv2d_transpose( inputs=upconv, filters=4 * CAPACITY, kernel_size=(kernel_size, 1), strides=(stride, 1), padding='valid', name='upconv1', kernel_initializer=utils.xavier_init('relu'), reuse=reuse) upconv = activation_fn( tf.layers.batch_normalization(upconv, name='upconv1_bn', training=training, reuse=reuse_batchnorm)) upconv = tf.layers.conv2d_transpose( inputs=upconv, filters=2 * CAPACITY, kernel_size=(kernel_size, 1), strides=(stride, 1), padding='valid', name='upconv2', kernel_initializer=utils.xavier_init('relu'), reuse=reuse) upconv = activation_fn( tf.layers.batch_normalization(upconv, name='upconv2_bn', training=training, reuse=reuse_batchnorm)) upconv = tf.layers.conv2d_transpose( inputs=upconv, filters=num_channels, kernel_size=(kernel_size, 1), strides=(stride, 1), padding='valid', name='upconv3', kernel_initializer=utils.xavier_init(''), reuse=reuse) upconv = tf.layers.batch_normalization(upconv, name='upconv3_bn', training=training, reuse=reuse_batchnorm) return tf.squeeze(upconv, axis=2, name='output')
def gain(miss_data_x, gain_parameters): '''Impute missing values in data_x Args: - miss_data_x: missing data - gain_parameters: GAIN network parameters: - batch_size: Batch size - alpha: Hyperparameter - iterations: Iterations Returns: - imputed_data: imputed data ''' # Define mask matrix m = 1 - np.isnan(miss_data_x) # System parameters batch_size = gain_parameters['batch_size'] # hint_rate = gain_parameters['hint_rate'] alpha = gain_parameters['alpha'] iterations = gain_parameters['iterations'] # Other parameters no, dim = miss_data_x.shape # Hidden state dimensions h_dim = int(dim) # Normalization norm_data, norm_parameters = normalization(miss_data_x) norm_data_x = np.nan_to_num(norm_data, 0) ## GAIN architecture tf1.reset_default_graph() # Input placeholders # Data vector X = tf1.placeholder(tf.float32, shape=[None, dim]) # Mask vector M = tf1.placeholder(tf.float32, shape=[None, dim]) # # Hint vector # H = tf.placeholder(tf.float32, shape = [None, dim]) # B vector B = tf1.placeholder(tf.float32, shape=[None, dim]) # Discriminator variables D_W1 = tf1.Variable(xavier_init([dim * 2, h_dim])) # Data + Hint as inputs D_b1 = tf1.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf1.Variable(xavier_init([h_dim, h_dim])) D_b2 = tf1.Variable(tf.zeros(shape=[h_dim])) D_W3 = tf1.Variable(xavier_init([h_dim, dim])) D_b3 = tf1.Variable(tf.zeros(shape=[dim])) # Multi-variate outputs theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3] #Generator variables # Data + Mask as inputs (Random noise is in missing components) G_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, h_dim])) G_b2 = tf.Variable(tf.zeros(shape=[h_dim])) G_W3 = tf.Variable(xavier_init([h_dim, dim])) G_b3 = tf.Variable(tf.zeros(shape=[dim])) theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3] ## GAIN functions # Generator def generator(x, m): # Concatenate Mask and Data inputs = tf.concat(values=[x, m], axis=1) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2) # MinMax normalized output G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3) return G_prob # Discriminator def discriminator(x, h): # Concatenate Data and Hint inputs = tf.concat(values=[x, h], axis=1) D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2) D_logit = tf.matmul(D_h2, D_W3) + D_b3 D_prob = tf.nn.sigmoid(D_logit) return D_prob ## GAIN structure # Generator G_sample = generator(X, M) H = B * M + 0.5 * (1 - B) D_prob_g = discriminator(X * M + G_sample * (1 - M), H) fake_X = tf1.placeholder(tf.float32, shape=[None, dim]) # Hint vector Hat_X = X * M + fake_X * (1 - M) # Discriminator D_prob = discriminator(Hat_X, H) # GAIN loss # D_loss_temp = -tf.reduce_mean((1-B)*(M * tf.log(D_prob + 1e-8) \ # + (1-M) * tf.log(1. - D_prob + 1e-8))) \ # / tf.reduce_mean(1-B) # # G_loss_temp = -tf.reduce_mean((1-B)*(1-M) * tf.log(D_prob + 1e-8)) / tf.reduce_mean(1-B) D_loss_temp = -tf.reduce_mean((M * tf1.log(D_prob + 1e-8) \ + (1-M) * tf1.log(1. - D_prob + 1e-8))) G_loss_temp = -tf.reduce_mean((1 - M) * tf1.log(D_prob_g + 1e-8)) MSE_loss = tf.reduce_mean((M * X - M * G_sample)**2) / tf.reduce_mean(M) D_loss = D_loss_temp G_loss = G_loss_temp + alpha * MSE_loss ## GAIN solver D_solver = tf1.train.AdamOptimizer().minimize(D_loss, var_list=theta_D) G_solver = tf1.train.AdamOptimizer().minimize(G_loss, var_list=theta_G) ## Iterations sess = tf1.Session() sess.run(tf1.global_variables_initializer()) gen_new_params = [] params = [] for param in theta_G: params.append(sess.run(param)) gen_new_params.append(params) for it in range(iterations): # for it in tqdm(range(iterations)): # Sample batch # print(sess.run(theta_G[-1])) gen_old_params = copy.deepcopy(gen_new_params) batch_idx = sample_batch_index(no, batch_size) X_mb = norm_data_x[batch_idx, :] M_mb = m[batch_idx, :] # Sample random vectors Z_mb = uniform_sampler(0.0, 0.01, batch_size, dim) # Sample hint vectors # H_mb_temp = binary_sampler(0.9, batch_size, dim) # H_mb = M_mb * H_mb_temp # H_mb_temp = binary_sampler(hint_rate, batch_size, dim) B_mb = sample_batch_binary(dim, batch_size) # H_mb = B_mb*M_mb + 0.5*(1-B_mb) # Combine random vectors with observed vectors X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb f_mb = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0] # print(f_mb) for w in range(len(theta_G)): theta_G[w].load(gen_new_params[0][w], sess) _, D_loss_curr = sess.run([D_solver, D_loss_temp], feed_dict={ X: X_mb, M: M_mb, fake_X: f_mb, B: B_mb }) batch_idx = sample_batch_index(no, batch_size) X_mb = norm_data_x[batch_idx, :] M_mb = m[batch_idx, :] # Sample random vectors Z_mb = uniform_sampler(0.0, 0.01, batch_size, dim) # Sample hint vectors # H_mb_temp = binary_sampler(0.9, batch_size, dim) # H_mb = M_mb * H_mb_temp # H_mb_temp = binary_sampler(hint_rate, batch_size, dim) B_mb = sample_batch_binary(dim, batch_size) # H_mb = B_mb*M_mb + 0.5*(1-B_mb) # Combine random vectors with observed vectors X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb for w in range(len(theta_G)): theta_G[w].load(gen_old_params[0][w], sess) _, G_loss_curr, MSE_loss_curr = \ sess.run([G_solver, G_loss_temp, MSE_loss], feed_dict = {X: X_mb, M: M_mb, B: B_mb}) params = [] for param in theta_G: params.append(sess.run(param)) gen_new_params[0] = params ## Return imputed data Z_mb = uniform_sampler(0.0, 0.01, no, dim) M_mb = m X_mb = norm_data_x X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb for w in range(len(theta_G)): theta_G[w].load(gen_new_params[0][w], sess) imputed_data = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0] sess.close() imputed_data = m * norm_data_x + (1 - m) * imputed_data # Renormalization imputed_data = renormalization(imputed_data, norm_parameters) # Rounding imputed_data = rounding(imputed_data, miss_data_x) return imputed_data
def egain(miss_data_x, gain_parameters): #def Egain(miss_data_x, gain_parameters): '''Impute missing values in data_x Args: - miss_data_x: missing data - gain_parameters: GAIN network parameters: - batch_size: Batch size - alpha: Hyperparameter - iterations: Iterations Returns: - imputed_data: imputed data ''' # Define mask matrix m = 1 - np.isnan(miss_data_x) # System parameters batch_size = gain_parameters['batch_size'] # hint_rate = gain_parameters['hint_rate'] alpha = gain_parameters['alpha'] iterations = gain_parameters['iterations'] loss_type = ['trickLogD', 'minimax', 'ls'] nloss = 3 beta = 1.0 ncandi = 1 #1#3 nbest = 1 #1#3 nD = 1 # # of discrim updates for each gen update # Other parameters no, dim = miss_data_x.shape # Hidden state dimensions h_dim = int(dim) # Normalization norm_data, norm_parameters = normalization(miss_data_x) norm_data_x = np.nan_to_num(norm_data, 0) ## GAIN architecture #tf.reset_default_graph() tf.compat.v1.get_default_graph() # Input placeholders # Data vector X = tf1.placeholder(tf.float32, shape=[None, dim]) # Mask vector M = tf1.placeholder(tf.float32, shape=[None, dim]) # B vector B = tf1.placeholder(tf.float32, shape=[None, dim]) # Discriminator variables D_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) # Data + Hint as inputs D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, h_dim])) D_b2 = tf.Variable(tf.zeros(shape=[h_dim])) D_W3 = tf.Variable(xavier_init([h_dim, dim])) D_b3 = tf.Variable(tf.zeros(shape=[dim])) # Multi-variate outputs theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3] # Generator variables # Data + Mask as inputs (Random noise is in missing components) G_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, h_dim])) G_b2 = tf.Variable(tf.zeros(shape=[h_dim])) G_W3 = tf.Variable(xavier_init([h_dim, dim])) G_b3 = tf.Variable(tf.zeros(shape=[dim])) theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3] ## GAIN functions # Generator def generator(x, m): # Concatenate Mask and Data inputs = tf.concat(values=[x, m], axis=1) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2) # MinMax normalized output G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3) return G_prob # Discriminator def discriminator(x, h): # Concatenate Data and Hint inputs = tf.concat(values=[x, h], axis=1) D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2) D_logit = tf.matmul(D_h2, D_W3) + D_b3 D_prob = tf.nn.sigmoid(D_logit) return D_prob ## GAIN structure # Hint vector H = B * M + 0.5 * (1 - B) # 0.5 => 0.1 # Generator G_sample = generator(X, M) D_prob_g = discriminator(X * M + G_sample * (1 - M), H) # Combine with observed data fake_X = tf1.placeholder(tf.float32, shape=[None, dim]) # Hint vector Hat_X = X * M + fake_X * (1 - M) # D loss D_prob = discriminator(Hat_X, H) D_loss_temp = -tf.reduce_mean( (M * tf1.log(D_prob + 1e-8) + (1 - M) * tf1.log(1. - D_prob + 1e-8))) D_loss = D_loss_temp # Updated parramter D_solver = tf1.train.AdamOptimizer(learning_rate=0.002, beta1=0.5, beta2=0.99).minimize(D_loss, var_list=theta_D) # G loss #Update loss function G_loss_logD = -tf.reduce_mean((1 - M) * 1 / 2 * tf1.log(D_prob_g + 1e-8)) G_loss_minimax = tf.reduce_mean( (1 - M) * 1 / 2 * tf1.log(1. - D_prob_g + 1e-8)) G_loss_ls = tf1.reduce_mean((1 - M) * tf1.square(D_prob_g - 1)) MSE_loss = tf.reduce_mean((M * X - M * G_sample)**2) / tf.reduce_mean(M) G_loss_logD_all = G_loss_logD + alpha * MSE_loss G_loss_minimax_all = G_loss_minimax + alpha * MSE_loss G_loss_ls_all = G_loss_ls + alpha * MSE_loss #Update parramter G_solver_logD = tf1.train.AdamOptimizer(learning_rate=0.002, beta1=0.5, beta2=0.99).minimize( G_loss_logD_all, var_list=theta_G) G_solver_minimax = tf1.train.AdamOptimizer(learning_rate=0.002, beta1=0.5, beta2=0.99).minimize( G_loss_minimax_all, var_list=theta_G) G_solver_ls = tf1.train.AdamOptimizer(learning_rate=0.002, beta1=0.5, beta2=0.99).minimize( G_loss_ls_all, var_list=theta_G) # Fitness function Fq_score = tf.reduce_mean((1 - M) * D_prob) Fd_score = -tf1.log( tf.reduce_sum(tf.square(tf.gradients(D_loss_temp, theta_D[0]))) + tf.reduce_sum(tf.square(tf.gradients(D_loss_temp, theta_D[1]))) + tf.reduce_sum(tf.square(tf.gradients(D_loss_temp, theta_D[2]))) + tf.reduce_sum(tf.square(tf.gradients(D_loss_temp, theta_D[3]))) + tf.reduce_sum(tf.square(tf.gradients(D_loss_temp, theta_D[4]))) + tf.reduce_sum(tf.square(tf.gradients(D_loss_temp, theta_D[5])))) ## Iterations sess = tf1.Session() # Start Iterations gen_new_params = [] fitness_best = np.zeros(nbest) fitness_candi = np.zeros(ncandi) # for it in tqdm(range(iterations)): for it in tqdm(range(iterations)): # Train candidates G if it == 0: for can_i in range(0, ncandi): sess.run(tf1.global_variables_initializer()) batch_idx = sample_batch_index(no, batch_size) X_mb = norm_data_x[batch_idx, :] M_mb = m[batch_idx, :] Z_mb = uniform_sampler(0.0, 0.01, batch_size, dim) X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb B_mb = sample_batch_binary(dim, batch_size) gen_samples = sess.run([G_sample], feed_dict={ X: X_mb, M: M_mb })[0] fq_score, fd_score = sess.run([Fq_score, Fd_score], feed_dict={ X: X_mb, M: M_mb, fake_X: gen_samples, B: B_mb }) fitness = fq_score + beta * fd_score fitness_best[can_i] = fitness params = [] for param in theta_G: params.append(sess.run(param)) gen_new_params.append(params) gen_best_params = copy.deepcopy(gen_new_params) else: # generate new candidate gen_old_params = copy.deepcopy(gen_new_params) # print(gen_old_params[0][-1]) # print(it) for can_i in range(ncandi): for type_i in range(nloss): batch_idx = sample_batch_index(no, batch_size) X_mb = norm_data_x[batch_idx, :] M_mb = m[batch_idx, :] Z_mb = uniform_sampler(0.0, 0.01, batch_size, dim) # update 1.0 ==> 0.01 X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb B_mb = sample_batch_binary(dim, batch_size) # Load and update weights for i in range(len(theta_G)): theta_G[i].load(gen_old_params[can_i][i], sess) loss = loss_type[type_i] if loss == 'trickLogD': sess.run([G_solver_minimax], feed_dict={ X: X_mb, M: M_mb, B: B_mb }) elif loss == 'minimax': sess.run([G_solver_logD], feed_dict={ X: X_mb, M: M_mb, B: B_mb }) elif loss == 'ls': sess.run([G_solver_ls], feed_dict={ X: X_mb, M: M_mb, B: B_mb }) # calculate fitness score gen_samples = sess.run([G_sample], feed_dict={ X: X_mb, M: M_mb })[0] fq_score, fd_score = sess.run([Fq_score, Fd_score], feed_dict={ X: X_mb, M: M_mb, fake_X: gen_samples, B: B_mb }) fitness = fq_score + beta * fd_score # print(fitness) gap = fitness_best - fitness if min(gap) < 0: idx_replace = np.argmin(gap) params = [] for param in theta_G: params.append(sess.run(param)) gen_best_params[idx_replace] = params fitness_best[idx_replace] = fitness if can_i * nloss + type_i < ncandi: idx = can_i * nloss + type_i params = [] for param in theta_G: params.append(sess.run(param)) gen_new_params[idx] = params fitness_candi[idx] = fitness else: gap = fitness_candi - fitness if min(gap) < 0: idx_replace = np.argmin(gap) params = [] for param in theta_G: params.append(sess.run(param)) gen_new_params[idx_replace] = params fitness_candi[idx_replace] = fitness # Train D for i in range(nD): batch_idx = sample_batch_index(no, batch_size) X_mb = norm_data_x[batch_idx, :] M_mb = m[batch_idx, :] Z_mb = uniform_sampler(0.0, 0.01, batch_size, dim) # 1.0 ==> 0.01 X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb B_mb = sample_batch_binary(dim, batch_size) # impute data for each candidat for can_i in range(ncandi): for w in range(len(theta_G)): theta_G[w].load(gen_new_params[can_i][w], sess) if can_i == ncandi - 1: gen_samples_cani = sess.run( [G_sample], feed_dict={ X: X_mb[can_i * batch_size // ncandi:], M: M_mb[can_i * batch_size // ncandi:] })[0] else: gen_samples_cani = sess.run( [G_sample], feed_dict={ X: X_mb[can_i * batch_size // ncandi:(can_i + 1) * batch_size // ncandi], M: M_mb[can_i * batch_size // ncandi:(can_i + 1) * batch_size // ncandi] })[0] # print(gen_samples_cani.shape) if can_i == 0: gen_samples = gen_samples_cani else: gen_samples = np.append(gen_samples, gen_samples_cani, axis=0) sess.run([D_solver], feed_dict={ X: X_mb, M: M_mb, fake_X: gen_samples, B: B_mb }) ## Return imputed data idx = np.argmax(fitness_best) # print(idx) for i in range(len(theta_G)): theta_G[i].load(gen_best_params[idx][i], sess) Z_mb = uniform_sampler(0.0, 0.01, no, dim) M_mb = m X_mb = norm_data_x X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb imputed_data = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0] sess.close() imputed_data = m * norm_data_x + (1 - m) * imputed_data # Renormalization imputed_data = renormalization(imputed_data, norm_parameters) # Rounding imputed_data = rounding(imputed_data, miss_data_x) return imputed_data
def _create_graph(self, n_features): """ Creates the computational graph. :type n_features: int :param n_features: Number of features. :return: self """ # ################################### # # Computation Graph Specification # # ################################### # # Symbolic variables self.x = tf.placeholder('float', [None, n_features], name='x-input') self.x_corr = tf.placeholder('float', [None, n_features], name='x-corr-input') self.keep_prob = tf.placeholder('float') # Biases self.bh_ = tf.Variable(tf.zeros([self.n_components]), name='hidden-bias') self.bv_ = tf.Variable(tf.zeros([n_features]), name='visible-bias') # Weights self.Wf_ = tf.Variable(utils.xavier_init(n_features, self.n_components, self.xavier_init), name='enc-w') if self.tied_weights: self.Wg_ = tf.transpose(self.Wf_) else: self.Wg_ = tf.Variable(utils.xavier_init(n_features, self.n_components, self.xavier_init), name='dec-w') # ############ # # Encoding # # ############ # with tf.name_scope("Wf_x_bh"): if self.enc_act_func == 'sigmoid': self.y = tf.nn.dropout( tf.nn.sigmoid(tf.matmul(self.x_corr, self.Wf_) + self.bh_), self.keep_prob) elif self.enc_act_func == 'tanh': self.y = tf.nn.dropout( tf.nn.tanh(tf.matmul(self.x_corr, self.Wf_) + self.bh_), self.keep_prob) else: # cannot be reached, just for completeness self.y = None # ############ # # Decoding # # ############ # with tf.name_scope("Wg_y_bv"): if self.dec_act_func == 'sigmoid': self.z = tf.nn.sigmoid(tf.matmul(self.y, self.Wg_) + self.bv_) elif self.dec_act_func == 'tanh': self.z = tf.nn.tanh(tf.matmul(self.y, self.Wg_) + self.bv_) elif self.dec_act_func == 'none': self.z = tf.matmul(self.y, self.Wg_) + self.bv_ else: # cannot be reached, just for completeness self.z = None # ############### # # Summary Ops # # ############### # # Add summary ops to collect data _ = tf.histogram_summary("enc_weights", self.Wf_) _ = tf.histogram_summary("hid_biases", self.bh_) _ = tf.histogram_summary("vis_biases", self.bv_) _ = tf.histogram_summary("y", self.y) _ = tf.histogram_summary("z", self.z) if not self.tied_weights: _ = tf.histogram_summary("dec_weights", self.Wg_) # ######## # # Cost # # ######## # with tf.name_scope("cost"): if self.loss_func == 'cross_entropy': self.cost = -tf.reduce_sum(self.x * tf.log(self.z)) _ = tf.scalar_summary("cross_entropy", self.cost) elif self.loss_func == 'mean_squared': self.cost = tf.sqrt(tf.reduce_mean(tf.square(self.x - self.z))) _ = tf.scalar_summary("mean_squared", self.cost) else: # cannot be reached, just for completeness self.cost = None with tf.name_scope("train"): if self.opt == 'gradient_descent': self.train_step = tf.train.GradientDescentOptimizer( self.learning_rate).minimize(self.cost) elif self.opt == 'ada_grad': self.train_step = tf.train.AdagradOptimizer( self.learning_rate).minimize(self.cost) elif self.opt == 'momentum': self.train_step = tf.train.MomentumOptimizer( self.learning_rate, self.momentum).minimize(self.cost) else: # cannot be reached, just for completeness self.train_step = None
def ganite(train_x, train_t, train_y, test_x, parameters): """GANITE module. Args: - train_x: features in training data - train_t: treatments in training data - train_y: observed outcomes in training data - test_x: features in testing data - parameters: GANITE network parameters - h_dim: hidden dimensions - batch_size: the number of samples in each batch - iterations: the number of iterations for training - alpha: hyper-parameter to adjust the loss importance Returns: - test_y_hat: estimated potential outcome for testing set """ # Parameters h_dim = parameters['h_dim'] batch_size = parameters['batch_size'] iterations = parameters['iteration'] alpha = parameters['alpha'] no, dim = train_x.shape # Reset graph tf.reset_default_graph() ## 1. Placeholder # 1.1. Feature (X) X = tf.placeholder(tf.float32, shape=[None, dim]) # 1.2. Treatment (T) T = tf.placeholder(tf.float32, shape=[None, 1]) # 1.3. Outcome (Y) Y = tf.placeholder(tf.float32, shape=[None, 1]) ## 2. Variables # 2.1 Generator G_W1 = tf.Variable(xavier_init([ (dim + 2), h_dim ])) # Inputs: X + Treatment + Factual outcome G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, h_dim])) G_b2 = tf.Variable(tf.zeros(shape=[h_dim])) # Multi-task outputs for increasing the flexibility of the generator G_W31 = tf.Variable(xavier_init([h_dim, h_dim])) G_b31 = tf.Variable(tf.zeros(shape=[h_dim])) G_W32 = tf.Variable(xavier_init([h_dim, 1])) G_b32 = tf.Variable( tf.zeros(shape=[1])) # Output: Estimated outcome when t = 0 G_W41 = tf.Variable(xavier_init([h_dim, h_dim])) G_b41 = tf.Variable(tf.zeros(shape=[h_dim])) G_W42 = tf.Variable(xavier_init([h_dim, 1])) G_b42 = tf.Variable( tf.zeros(shape=[1])) # Output: Estimated outcome when t = 1 # Generator variables theta_G = [ G_W1, G_W2, G_W31, G_W32, G_W41, G_W42, G_b1, G_b2, G_b31, G_b32, G_b41, G_b42 ] # 2.2 Discriminator D_W1 = tf.Variable(xavier_init([ (dim + 2), h_dim ])) # Inputs: X + Factual outcomes + Estimated counterfactual outcomes D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, h_dim])) D_b2 = tf.Variable(tf.zeros(shape=[h_dim])) D_W3 = tf.Variable(xavier_init([h_dim, 1])) D_b3 = tf.Variable(tf.zeros(shape=[1])) # Discriminator variables theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3] # 2.3 Inference network I_W1 = tf.Variable(xavier_init([(dim), h_dim])) # Inputs: X I_b1 = tf.Variable(tf.zeros(shape=[h_dim])) I_W2 = tf.Variable(xavier_init([h_dim, h_dim])) I_b2 = tf.Variable(tf.zeros(shape=[h_dim])) # Multi-task outputs for increasing the flexibility of the inference network I_W31 = tf.Variable(xavier_init([h_dim, h_dim])) I_b31 = tf.Variable(tf.zeros(shape=[h_dim])) I_W32 = tf.Variable(xavier_init([h_dim, 1])) I_b32 = tf.Variable( tf.zeros(shape=[1])) # Output: Estimated outcome when t = 0 I_W41 = tf.Variable(xavier_init([h_dim, h_dim])) I_b41 = tf.Variable(tf.zeros(shape=[h_dim])) I_W42 = tf.Variable(xavier_init([h_dim, 1])) I_b42 = tf.Variable( tf.zeros(shape=[1])) # Output: Estimated outcome when t = 1 # Inference network variables theta_I = [ I_W1, I_W2, I_W31, I_W32, I_W41, I_W42, I_b1, I_b2, I_b31, I_b32, I_b41, I_b42 ] ## 3. Definitions of generator, discriminator and inference networks # 3.1 Generator def generator(x, t, y): """Generator function. Args: - x: features - t: treatments - y: observed labels Returns: - G_logit: estimated potential outcomes """ # Concatenate feature, treatments, and observed labels as input inputs = tf.concat(axis=1, values=[x, t, y]) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2) # Estimated outcome if t = 0 G_h31 = tf.nn.relu(tf.matmul(G_h2, G_W31) + G_b31) G_logit1 = tf.matmul(G_h31, G_W32) + G_b32 # Estimated outcome if t = 1 G_h41 = tf.nn.relu(tf.matmul(G_h2, G_W41) + G_b41) G_logit2 = tf.matmul(G_h41, G_W42) + G_b42 G_logit = tf.concat(axis=1, values=[G_logit1, G_logit2]) return G_logit # 3.2. Discriminator def discriminator(x, t, y, hat_y): """Discriminator function. Args: - x: features - t: treatments - y: observed labels - hat_y: estimated counterfactuals Returns: - D_logit: estimated potential outcomes """ # Concatenate factual & counterfactual outcomes input0 = (1. - t) * y + t * tf.reshape(hat_y[:, 0], [-1, 1]) # if t = 0 input1 = t * y + (1. - t) * tf.reshape(hat_y[:, 1], [-1, 1]) # if t = 1 inputs = tf.concat(axis=1, values=[x, input0, input1]) D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2) D_logit = tf.matmul(D_h2, D_W3) + D_b3 return D_logit # 3.3. Inference Nets def inference(x): """Inference function. Args: - x: features Returns: - I_logit: estimated potential outcomes """ I_h1 = tf.nn.relu(tf.matmul(x, I_W1) + I_b1) I_h2 = tf.nn.relu(tf.matmul(I_h1, I_W2) + I_b2) # Estimated outcome if t = 0 I_h31 = tf.nn.relu(tf.matmul(I_h2, I_W31) + I_b31) I_logit1 = tf.matmul(I_h31, I_W32) + I_b32 # Estimated outcome if t = 1 I_h41 = tf.nn.relu(tf.matmul(I_h2, I_W41) + I_b41) I_logit2 = tf.matmul(I_h41, I_W42) + I_b42 I_logit = tf.concat(axis=1, values=[I_logit1, I_logit2]) return I_logit ## Structure # 1. Generator Y_tilde_logit = generator(X, T, Y) Y_tilde = tf.nn.sigmoid(Y_tilde_logit) # 2. Discriminator D_logit = discriminator(X, T, Y, Y_tilde) # 3. Inference network Y_hat_logit = inference(X) Y_hat = tf.nn.sigmoid(Y_hat_logit) ## Loss functions # 1. Discriminator loss D_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=T, logits=D_logit)) # 2. Generator loss G_loss_GAN = -D_loss G_loss_Factual = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( labels = Y, logits = (T * tf.reshape(Y_tilde_logit[:,1],[-1,1]) + \ (1. - T) * tf.reshape(Y_tilde_logit[:,0],[-1,1]) ))) G_loss = G_loss_Factual + alpha * G_loss_GAN # 3. Inference loss I_loss1 = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=(T) * Y + (1 - T) * tf.reshape(Y_tilde[:, 1], [-1, 1]), logits=tf.reshape(Y_hat_logit[:, 1], [-1, 1]))) I_loss2 = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=(1 - T) * Y + (T) * tf.reshape(Y_tilde[:, 0], [-1, 1]), logits=tf.reshape(Y_hat_logit[:, 0], [-1, 1]))) I_loss = I_loss1 + I_loss2 ## Solver G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G) D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D) I_solver = tf.train.AdamOptimizer().minimize(I_loss, var_list=theta_I) ## GANITE training sess = tf.Session() sess.run(tf.global_variables_initializer()) print('Start training Generator and Discriminator') # 1. Train Generator and Discriminator for it in range(iterations): for _ in range(2): # Discriminator training X_mb, T_mb, Y_mb = batch_generator(train_x, train_t, train_y, batch_size) _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={ X: X_mb, T: T_mb, Y: Y_mb }) # Generator traininig X_mb, T_mb, Y_mb = batch_generator(train_x, train_t, train_y, batch_size) _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={ X: X_mb, T: T_mb, Y: Y_mb }) # Check point if it % 1000 == 0: print('Iteration: ' + str(it) + '/' + str(iterations) + ', D loss: ' + \ str(np.round(D_loss_curr, 4)) + ', G loss: ' + str(np.round(G_loss_curr, 4))) print('Start training Inference network') # 2. Train Inference network for it in range(iterations): X_mb, T_mb, Y_mb = batch_generator(train_x, train_t, train_y, batch_size) _, I_loss_curr = sess.run([I_solver, I_loss], feed_dict={ X: X_mb, T: T_mb, Y: Y_mb }) # Check point if it % 1000 == 0: print('Iteration: ' + str(it) + '/' + str(iterations) + ', I loss: ' + str(np.round(I_loss_curr, 4))) ## Generate the potential outcomes test_y_hat = sess.run(Y_hat, feed_dict={X: test_x}) return test_y_hat
def build_mode(self): """ Creates the computational graph. :return: self """ n_features = 3072 self.input_data = tf.placeholder('float', [None, n_features], name='x-input') self.input_data_corr = tf.placeholder('float', [None, n_features], name='x-corr-input') self.W_ = tf.Variable(utils.xavier_init(n_features, self.n_components, self.xavier_init), name='enc-w') self.bh_ = tf.Variable(tf.zeros([self.n_components]), name='hidden-bias') self.bv_ = tf.Variable(tf.zeros([n_features]), name='visible-bias') # Encode with tf.name_scope("W_x_bh"): if self.enc_act_func == 'sigmoid': self.encode = tf.nn.sigmoid( tf.matmul(self.input_data_corr, self.W_) + self.bh_) elif self.enc_act_func == 'tanh': self.encode = tf.nn.tanh( tf.matmul(self.input_data_corr, self.W_) + self.bh_) elif self.enc_act_func == 'relu': self.encode = tf.nn.relu( tf.matmul(self.input_data_corr, self.W_) + self.bh_) else: self.encode = None # Decode with tf.name_scope("Wg_y_bv"): if self.dec_act_func == 'sigmoid': self.decode = tf.nn.sigmoid( tf.matmul(self.encode, tf.transpose(self.W_)) + self.bv_) elif self.dec_act_func == 'tanh': self.decode = tf.nn.tanh( tf.matmul(self.encode, tf.transpose(self.W_)) + self.bv_) elif self.dec_act_func == 'relu': self.decode = tf.nn.relu( tf.matmul(self.encode, tf.transpose(self.W_)) + self.bv_) elif self.dec_act_func == 'none': self.decode = tf.matmul(self.encode, tf.transpose( self.W_)) + self.bv_ else: self.decode = None # Cost Function with tf.name_scope("cost"): if self.loss_func == 'cross_entropy': self.cost = -tf.reduce_sum( self.input_data * tf.log(self.decode)) _ = tf.summary.scalar("cross_entropy", self.cost) elif self.loss_func == 'mean_squared': self.cost = tf.sqrt( tf.reduce_mean(tf.square(self.input_data - self.decode))) _ = tf.summary.scalar("mean_squared", self.cost) else: self.cost = None # Train Step with tf.name_scope("train"): if self.opt == 'gradient_descent': self.train_step = tf.train.GradientDescentOptimizer( self.learning_rate).minimize(self.cost) elif self.opt == 'ada_grad': self.train_step = tf.train.AdagradOptimizer( self.learning_rate).minimize(self.cost) elif self.opt == 'momentum': self.train_step = tf.train.MomentumOptimizer( self.learning_rate, self.momentum).minimize(self.cost) elif self.opt == 'adam': self.train_step = tf.train.AdamOptimizer( self.learning_rate).minimize(self.cost) else: self.train_step = None
def gain(data_x, feature_name, onehotencoder, ori_data_dim, gain_parameters): '''Impute missing values in data_x Args: - data_x: original data with missing values - feature_name: feature namelist of original data - onehotencoder: onehotencoder of this data - ori_data_dim: dimensions of original data - gain_parameters: GAIN network parameters: - data_name: the file name of dataset - batch_size: Batch size - hint_rate: Hint rate - alpha: Hyperparameter - iterations: Iterations - onehot: the number of feature for onehot encoder (start from first feature) - predict: option for prediction mode Returns: - imputed_data: imputed data ''' # Define mask matrix data_m = 1 - np.isnan(data_x) # System parameters data_name = gain_parameters['data_name'] batch_size = gain_parameters['batch_size'] hint_rate = gain_parameters['hint_rate'] alpha = gain_parameters['alpha'] iterations = gain_parameters['iterations'] onehot = gain_parameters['onehot'] predict = gain_parameters['predict'] # Model Path model_path = 'model/' + data_name # Other parameters no, dim = data_x.shape # Hidden state dimensions h_dim = int(dim) # Normalization norm_data, norm_parameters = normalization(data_x) norm_data_x = np.nan_to_num(norm_data, 0) ## GAIN architecture # Input placeholders # Data vector q X = tf.placeholder(tf.float32, shape=[None, dim], name='X') # Mask vector M = tf.placeholder(tf.float32, shape=[None, dim], name='M') # Hint vector H = tf.placeholder(tf.float32, shape=[None, dim], name='H') # Discriminator variables D_W1 = tf.Variable(xavier_init([dim * 2, h_dim]), name='D_W1') # Data + Hint as inputs D_b1 = tf.Variable(tf.zeros(shape=[h_dim]), name='D_b1') D_W2 = tf.Variable(xavier_init([h_dim, h_dim]), name='D_W2') D_b2 = tf.Variable(tf.zeros(shape=[h_dim]), name='D_b2') D_W3 = tf.Variable(xavier_init([h_dim, dim]), name='D_W3') D_b3 = tf.Variable(tf.zeros(shape=[dim]), name='D_b3') # Multi-variate outputs theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3] #Generator variables # Data + Mask as inputs (Random noise is in missing components) G_W1 = tf.Variable(xavier_init([dim * 2, h_dim]), name='G_W1') G_b1 = tf.Variable(tf.zeros(shape=[h_dim]), name='G_b1') G_W2 = tf.Variable(xavier_init([h_dim, h_dim]), name='G_W2') G_b2 = tf.Variable(tf.zeros(shape=[h_dim]), name='G_b2') G_W3 = tf.Variable(xavier_init([h_dim, dim]), name='G_W3') G_b3 = tf.Variable(tf.zeros(shape=[dim]), name='G_b3') theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3] ## GAIN functions # Generator def generator(x, m): # Concatenate Mask and Data inputs = tf.concat(values=[x, m], axis=1) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2) # MinMax normalized output G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3) return G_prob # Discriminator def discriminator(x, h): # Concatenate Data and Hint inputs = tf.concat(values=[x, h], axis=1) D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2) D_logit = tf.matmul(D_h2, D_W3) + D_b3 D_prob = tf.nn.sigmoid(D_logit) return D_prob ## GAIN structure # Generator G_sample = generator(X, M) # Combine with observed data Hat_X = X * M + G_sample * (1 - M) # Discriminator D_prob = discriminator(Hat_X, H) ## GAIN loss D_loss_temp = -tf.reduce_mean(M * tf.log(D_prob + 1e-8) \ + (1-M) * tf.log(1. - D_prob + 1e-8)) G_loss_temp = -tf.reduce_mean((1 - M) * tf.log(D_prob + 1e-8)) MSE_loss = \ tf.reduce_mean((M * X - M * G_sample)**2) / tf.reduce_mean(M) D_loss = D_loss_temp G_loss = G_loss_temp + alpha * MSE_loss ## GAIN solver D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D) G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G) ## Iterations sess = tf.Session() saver = tf.train.Saver() if predict is True and os.path.exists(model_path + '.ckpt.meta'): print("Model Restore") saver.restore(sess, model_path + '.ckpt') else: sess.run(tf.global_variables_initializer()) # Start Iterations for it in tqdm(range(iterations)): # Sample batch batch_idx = sample_batch_index(no, batch_size) X_mb = norm_data_x[batch_idx, :] M_mb = data_m[batch_idx, :] # Sample random vectors Z_mb = uniform_sampler(0, 0.01, batch_size, dim) # Sample hint vectors H_mb_temp = binary_sampler(hint_rate, batch_size, dim) H_mb = M_mb * H_mb_temp # Combine random vectors with observed vectors X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb _, D_loss_curr = sess.run([D_solver, D_loss_temp], feed_dict={ M: M_mb, X: X_mb, H: H_mb }) _, G_loss_curr, MSE_loss_curr = \ sess.run([G_solver, G_loss_temp, MSE_loss], feed_dict = {X: X_mb, M: M_mb, H: H_mb}) if predict is False: save_path = saver.save(sess, model_path + '.ckpt') ## Return imputed data Z_mb = uniform_sampler(0, 0.01, no, dim) M_mb = data_m X_mb = norm_data_x X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb imputed_data = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0] imputed_data = data_m * norm_data_x + (1 - data_m) * imputed_data # Renormalization imputed_data = renormalization(imputed_data, norm_parameters) # Rounding imputed_data = rounding(imputed_data, data_x) # Reverse encoding if onehot > 0: imputed_data = reverse_encoding(imputed_data, feature_name, onehotencoder, onehot, ori_data_dim) return imputed_data
def main_pytorch(hyper_params, gpu_id=None): from data import load_data from eval import evaluate, eval_ranking from utils import load_obj, is_cuda_available from utils import load_user_item_counts, xavier_init, log_end_epoch from loss import MSELoss if hyper_params['model_type'] in ['deepconn', 'deepconn++']: from pytorch_models.DeepCoNN import DeepCoNN as Model elif hyper_params['model_type'] in ['transnet', 'transnet++']: from pytorch_models.TransNet import TransNet as Model elif hyper_params['model_type'] in ['NARRE']: from pytorch_models.NARRE_modify import NARRE as Model elif hyper_params['model_type'] in ['bias_only', 'MF', 'MF_dot']: from pytorch_models.MF import MF as Model import torch # Load the data readers user_count, item_count = load_user_item_counts(hyper_params) if hyper_params['model_type'] not in [ 'bias_only', 'MF', 'MF_dot', 'NeuMF' ]: review_based_model = True try: from data_fast import load_data_fast train_reader, test_reader, val_reader, hyper_params = load_data_fast( hyper_params) print( "Loaded preprocessed epoch files. Should be faster training..." ) except Exception as e: print("Tried loading preprocessed epoch files, but failed.") print( "Please consider running `prep_all_data.sh` to make quick data for DeepCoNN/TransNet/NARRE." ) print("This will save large amounts of run time.") print("Loading standard (slower) data..") train_reader, test_reader, val_reader, hyper_params = load_data( hyper_params) else: review_based_model = False train_reader, test_reader, val_reader, hyper_params = load_data( hyper_params) # Initialize the model model = Model(hyper_params) if is_cuda_available: model = model.cuda() xavier_init(model) # Train the model start_time = time.time() model = train_complete(hyper_params, Model, train_reader, val_reader, user_count, item_count, model, review=review_based_model) # Calculating MSE on test-set print("Calculating MSE on test-set") criterion = MSELoss(hyper_params) metrics, user_count_mse_map, item_count_mse_map = evaluate( model, criterion, test_reader, hyper_params, user_count, item_count, review=review_based_model) print("Calculating HR@1 on test-set") # Calculating HR@1 on test-set _, test_reader2, _, _ = load_data( hyper_params) # Needs default slow reader metrics.update( eval_ranking(model, test_reader2, hyper_params, review=review_based_model)) log_end_epoch(hyper_params, metrics, 'final', time.time() - start_time, metrics_on='(TEST)') return metrics, user_count_mse_map, item_count_mse_map
def __init__(self, args): super(model_b,self).__init__() """ Creating model. """ self.cudaenabled=args.cudaenabled # general parameters self.batchsize=args.batchsize # todo: Felan 1 .# todo:TUNE self.learningrate =args.learningrate # todo:TUNE self.momentum =args.momentum # todo:TUNE self.weight_decay =args.weight_decay # todo:TUNE self.batchnorm =args.batchnorm # todo: not implemented yet self.batchsampler =args.batchsampler # weighted #adaptive #todo:not debugged yet : shuffle/attention / nearestneighbor / ada (adaptive reweighting(increase/decrease) sampler prob based on validation err) self.earlystopimprovethresh =args.earlystopimprovethresh # todo:not implemented yet self.maxiter = args.maxiter # # glossaries to load self.datatype= args.datatype #['weighted','unnestedrels','unweightednestsinglecount'] #, 'unweightednest' self.entities_embeds=args.entities.todense() #t.from_numpy(args.entities.todense()) self.relations_embeds =args.relations.todense() #t.from_numpy(args.relations.todense()) self.edim= self.entities_embeds.shape[0] self.rdim= self.relations_embeds.shape[0] self.reldim=args.reldim self.entdim=args.entdim # todo: revise for better memory: tmp=vocab; del vocab self.entities_clusters_id= args.entities_clusters_id# self.id2clustersid4entities= Vocab(self.entities_clusters_id).word2id#args.entities_clusters_id self.relations_clusters_id= args.relations_clusters_id# self.id2clustersid4relations= Vocab(self.relations_clusters_id).word2id self.N_c_e= len(self.id2clustersid4entities) self.N_c_r= len(self.id2clustersid4relations) # self.tails_glossary_tensor= args.tails_glossary_tensor #todo self.negative_sampler_thresh = args.negative_sampler_thresh ##TODO:tune self.projElossfcnType= args.projElossfcnType ##TODO:tune self.cluster_update_regularity = args.cluster_update_regularity ##TODO:tune self.pretrained_entities_dimentionality_reduction=args.pretrained_entities_dimentionality_reduction ##TODO:tune self.pretrained_relations_dimentionality_reduction=args.pretrained_relations_dimentionality_reduction ##TODO:tune self.normalize_candidate_tails=args.normalize_candidate_tails # True or false ##TODO:tune, reweight all tails DOWN if corresponding head(~input) are dense in numof tails self.normalize_candidate_heads=args.normalize_candidate_heads # True or false## ##TODO:tune, reweight each tail DOWN if its connected more densely to more heads self.sumoverheads= self.tails_glossary_tensor.sumover('heads') self.sumovertails= self.tails_glossary_tensor.sumover('tails') self.regularize_within_clusters_relations= args.regularize_within_clusters_relations ##TODO:tune self.regularize_within_clusters_entities= args.regularize_within_clusters_entities ##TODO:tune self.regularization_rate_relations= args.regularization_rate_relations ##TODO:tune self.regularization_rate_entities= args.regularization_rate_entities ##TODO:tune # glossaries to learn self.E = xavier_init((self.edim,self.entdim)) # entity vecs to indirectly learn self.R = xavier_init((self.rdim,self.reldim)) # relation vecs to indirectly learn self.CR= xavier_init((self.N_c_r, self.reldim)) # Parameters of all RELATIONS' clusters to indirectly learn self.CE= xavier_init((self.N_c_e, self.entdim)) # Parameters of all ENTITIES' clusters to indirectly learn # Now set parameters : # t.nn.Parameter sets requires_grad true /also easily lists all gradablevars by model.parameters() self.e=self.tnnparameter(t.from_numpy(args.entities[0,:].todense())) #entity_current_input defaultly,requires_grad=True self.r=self.tnnparameter(t.from_numpy(args.relations[0,:].todense())) # relation_current_input # defaultly,requires_grad=True self.cr=self.tnnparameter(t.from_numpy(self.CR[0,:])) # entities_clusters # defaultly,requires_grad=True self.ce=self.tnnparameter(t.from_numpy(self.CE[0,:])) # relations_clusters # defaultly,requires_grad=True # set parameters that don't need update_entities_relations_parameters self.bp=self.tnnparameter(t.from_numpy(xavier_init((1, self.entdim)) )) # projection bias # defaultly,requires_grad=True # dimensionality reduction, entities' feature glossaries if self.pretrained_entities_dimentionality_reduction is True: if args.entities_dimentionality_reduction is not None: self.dimred_e = self.fromnumpy(args.entities_dimentionality_reduction) else: self.dimred_e = self.fromnumpy(xavier_init((args.entities.shape[1], self.edim))) else: if args.entities_dimentionality_reduction is not None: self.dimred_e = self.tnnparameter(t.from_numpy(args.entities_dimentionality_reduction)) else: self.dimred_e = self.tnnparameter(t.from_numpy(xavier_init((args.entities.shape[1], self.edim)))) # dimensionality reduction, relations' feature glossaries if self.pretrained_relations_dimentionality_reduction is True: if args.relations_dimentionality_reduction is not None: self.dimred_r = self.fromnumpy(args.relations_dimentionality_reduction) else: self.dimred_r = self.fromnumpy(xavier_init((args.relations.shape[1], self.rdim))) else: if args.entities_dimentionality_reduction is not None: self.dimred_r = self.tnnparameter(t.from_numpy(args.relations_dimentionality_reduction)) else: self.dimred_r = self.tnnparameter(t.from_numpy(xavier_init((args.relations.shape[1], self.rdim)))) # current ongoing entity/rel/ParamRegardClus to learn self.entity_current_id= 0 self.relation_current_id= 0 self.entity_cluster_current_id= 0 self.relation_cluster_current_id= 0 # define loss function for model: if self.projElossfcnType=='pointwise': self.projBfcntype= t.nn.functional.sigmoid elif self.projElossfcnType== 'listwise': self.projBfcntype= t.nn.functional.softmax
def gain(data_x, gain_parameters): '''Impute missing values in data_x Args: - data_x: original data with missing values - gain_parameters: GAIN network parameters: - batch_size: Batch size - hint_rate: Hint rate - alpha: Hyperparameter - iterations: Iterations Returns: - imputed_data: imputed data ''' # Define mask matrix data_m = 1 - np.isnan(data_x) # System parameters batch_size = gain_parameters['batch_size'] hint_rate = gain_parameters['hint_rate'] alpha = gain_parameters['alpha'] iterations = gain_parameters['iterations'] # Other parameters no, dim = data_x.shape # Hidden state dimensions h_dim = int(dim) # Normalization norm_data, norm_parameters = normalization(data_x) norm_data_x = np.nan_to_num(norm_data, 0) ## GAIN architecture # Input placeholders # Data vector X = tf.placeholder(tf.float32, shape=[None, dim]) # Mask vector M = tf.placeholder(tf.float32, shape=[None, dim]) # Hint vector H = tf.placeholder(tf.float32, shape=[None, dim]) # Discriminator variables D_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) # Data + Hint as inputs D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, h_dim])) D_b2 = tf.Variable(tf.zeros(shape=[h_dim])) D_W3 = tf.Variable(xavier_init([h_dim, dim])) D_b3 = tf.Variable(tf.zeros(shape=[dim])) # Multi-variate outputs theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3] #Generator variables # Data + Mask as inputs (Random noise is in missing components) G_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, h_dim])) G_b2 = tf.Variable(tf.zeros(shape=[h_dim])) G_W3 = tf.Variable(xavier_init([h_dim, dim])) G_b3 = tf.Variable(tf.zeros(shape=[dim])) theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3] ## GAIN functions # Generator def generator(x, m): # Concatenate Mask and Data inputs = tf.concat(values=[x, m], axis=1) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2) # MinMax normalized output G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3) return G_prob # Discriminator def discriminator(x, h): # Concatenate Data and Hint inputs = tf.concat(values=[x, h], axis=1) D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2) D_logit = tf.matmul(D_h2, D_W3) + D_b3 D_prob = tf.nn.sigmoid(D_logit) return D_prob ## GAIN structure # Generator G_sample = generator(X, M) # Combine with observed data Hat_X = X * M + G_sample * (1 - M) # Discriminator D_prob = discriminator(Hat_X, H) ## GAIN loss D_loss_temp = -tf.reduce_mean(M * tf.log(D_prob + 1e-8) \ + (1-M) * tf.log(1. - D_prob + 1e-8)) G_loss_temp = -tf.reduce_mean((1 - M) * tf.log(D_prob + 1e-8)) MSE_loss = \ tf.reduce_mean((M * X - M * G_sample)**2) / tf.reduce_mean(M) D_loss = D_loss_temp G_loss = G_loss_temp + alpha * MSE_loss ## GAIN solver D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D) G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G) ## Iterations sess = tf.Session() sess.run(tf.global_variables_initializer()) # Start Iterations for it in tqdm(range(iterations)): # Sample batch batch_idx = sample_batch_index(no, batch_size) X_mb = norm_data_x[batch_idx, :] M_mb = data_m[batch_idx, :] # Sample random vectors Z_mb = uniform_sampler(0, 0.01, batch_size, dim) # Sample hint vectors H_mb_temp = binary_sampler(hint_rate, batch_size, dim) H_mb = M_mb * H_mb_temp # Combine random vectors with observed vectors X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb _, D_loss_curr = sess.run([D_solver, D_loss_temp], feed_dict={ M: M_mb, X: X_mb, H: H_mb }) _, G_loss_curr, MSE_loss_curr = \ sess.run([G_solver, G_loss_temp, MSE_loss], feed_dict = {X: X_mb, M: M_mb, H: H_mb}) ## Return imputed data Z_mb = uniform_sampler(0, 0.01, no, dim) M_mb = data_m X_mb = norm_data_x X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb imputed_data = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0] imputed_data = data_m * norm_data_x + (1 - data_m) * imputed_data # Renormalization imputed_data = renormalization(imputed_data, norm_parameters) # Rounding imputed_data = rounding(imputed_data, data_x) return imputed_data
collate_fn=training.collate_pil ) for i, (x, y) in enumerate(loader): mtcnn(x, save_path=y) print('\rBatch {} of {}'.format(i + 1, len(loader)), end='') # Remove mtcnn to reduce GPU memory usage del mtcnn "Done MTCNN" ''' resnet = InceptionResnetV1(classify=True, pretrained=None, num_classes=5631) print("Xavier Init...") resnet = xavier_init(resnet) print("Done...") trans = transforms.Compose( [np.float32, transforms.ToTensor(), fixed_image_standardization]) print("Creating Dataset (Cropped Images)") dataset = datasets.ImageFolder(data_dir + '_cropped', transform=trans) print("Done.") resnet = nn.DataParallel(resnet.to(device)) weights = torch.load('./saved_models/lr_0.001/epoch_6.pt') resnet.load_state_dict(weights) optimizer = optim.AdamW(resnet.parameters(), lr=0.0001)
def cph(data_x, cph_parameters, data_image): seed = 25 random.seed(seed) np.random.seed(seed) tf.set_random_seed(seed) '''Impute missing values in data_x Args: - data_x: original data with missing values - parameters: CPH network parameters: - batch_size: Batch size - hint_rate: Hint rate - alpha: Hyperparameter - iterations: Iterations Returns: - imputed_data: imputed data ''' # Define mask matrix data_m = 1 - np.isnan(data_x) # System parameters batch_size = cph_parameters['batch_size'] hint_rate = cph_parameters['hint_rate'] alpha = cph_parameters['alpha'] iterations = cph_parameters['iterations'] # Other parameters no, dim = data_x.shape # Hidden state dimensions h_dim = int(dim) #print(h_dim) # Normalization norm_data, norm_parameters = normalization(data_x) #norm_data_x = np.nan_to_num(norm_data, 0) norm_data_x = np.nan_to_num(data_x, 0) ## CPH architecture # Input placeholders X_pre = tf.placeholder(tf.float32, shape=[1, 483, dim, 3]) # Data vector #X = tf.placeholder(tf.float32, shape = [None, dim]) # Mask vector M = tf.placeholder(tf.float32, shape=[None, dim]) # Hint vector H = tf.placeholder(tf.float32, shape=[None, dim]) # Discriminator variables D_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) # Data + Hint as inputs D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, h_dim])) D_b2 = tf.Variable(tf.zeros(shape=[h_dim])) D_W3 = tf.Variable(xavier_init([h_dim, dim])) D_b3 = tf.Variable(tf.zeros(shape=[dim])) # Multi-variate outputs theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3] #Generator variables conv_filter_w1 = tf.Variable(tf.random_normal([1, 4, 3, 3])) conv_filter_b1 = tf.Variable(tf.random_normal([3])) conv_filter_w2 = tf.Variable(tf.random_normal([1, 4, 3, 1])) conv_filter_b2 = tf.Variable(tf.random_normal([1])) # Data + Mask as inputs (Random noise is in missing components) G_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, h_dim])) G_b2 = tf.Variable(tf.zeros(shape=[h_dim])) G_W3 = tf.Variable(xavier_init([h_dim, dim])) G_b3 = tf.Variable(tf.zeros(shape=[dim])) theta_G = [ G_W1, G_W2, G_W3, G_b1, G_b2, G_b3, conv_filter_w1, conv_filter_b1, conv_filter_w2, conv_filter_b2 ] ## CPH functions # CNN + Generator def generator(x, m): relu_feature_maps1 = tf.nn.relu( \ tf.nn.conv2d(x, conv_filter_w1, strides=[1, 1, 1, 1], padding='SAME') + conv_filter_b1) max_pool1 = tf.nn.max_pool(relu_feature_maps1, ksize=[1, 1, 4, 1], strides=[1, 1, 1, 1], padding='SAME') relu_feature_maps2 = tf.nn.relu( \ tf.nn.conv2d(max_pool1, conv_filter_w2, strides=[1, 1, 1, 1], padding='SAME') + conv_filter_b2) max_pool2 = tf.nn.max_pool(relu_feature_maps2, ksize=[1, 1, 4, 1], strides=[1, 1, 1, 1], padding='SAME') x2 = tf.reshape(max_pool2, [483, dim]) # Concatenate Mask and Data inputs = tf.concat(values=[x2, m], axis=1) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2) # MinMax normalized output G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3) return G_prob # Discriminator def discriminator(x, h): # Concatenate Data and Hint inputs = tf.concat(values=[x, h], axis=1) D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2) D_logit = tf.matmul(D_h2, D_W3) + D_b3 D_prob = tf.nn.sigmoid(D_logit) return D_prob ## CPH structure # Generator G_sample = generator(X_pre, M) X2 = X_pre[0, :, :, 0] # Combine with observed data Hat_X = X2 * M + G_sample * (1 - M) # Discriminator D_prob = discriminator(Hat_X, H) ## CPH loss D_loss_temp = -tf.reduce_mean(M * tf.log(D_prob + 1e-8) \ + (1-M) * tf.log(1. - D_prob + 1e-8)) G_loss_temp = -tf.reduce_mean((1 - M) * tf.log(D_prob + 1e-8)) MSE_loss = \ tf.reduce_mean((M * X2 - M * G_sample)**2) / tf.reduce_mean(M) D_loss = D_loss_temp G_loss = G_loss_temp + alpha * MSE_loss ## CPH solver D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D) G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G) ## Iterations sess = tf.Session() sess.run(tf.global_variables_initializer()) # Start Iterations for it in tqdm(range(iterations)): # Sample batch batch_idx = sample_batch_index(no, batch_size) #print(len(batch_idx)) image_mb = data_image[:, batch_idx, :, :] X_mb = norm_data_x[batch_idx, :] M_mb = data_m[batch_idx, :] # Sample random vectors Z_mb = uniform_sampler(0, 0.01, batch_size, dim) # Sample hint vectors H_mb_temp = binary_sampler(hint_rate, batch_size, dim) H_mb = M_mb * H_mb_temp # Combine random vectors with observed vectors X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb image_mb[0, :, :, 0] = X_mb _, D_loss_curr = sess.run([D_solver, D_loss_temp], feed_dict={ M: M_mb, X_pre: image_mb, H: H_mb }) _, G_loss_curr, MSE_loss_curr = \ sess.run([G_solver, G_loss_temp, MSE_loss], feed_dict = {X_pre: image_mb, M: M_mb, H: H_mb}) ## Return imputed data Z_mb = uniform_sampler(0, 0.01, no, dim) M_mb = data_m X_mb = norm_data_x X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb image_mb = data_image image_mb[0, :, :, 0] = X_mb imputed_data = sess.run([G_sample], feed_dict={ X_pre: image_mb, M: M_mb })[0] imputed_data = data_m * norm_data_x + (1 - data_m) * imputed_data # Renormalization #imputed_data = renormalization(imputed_data, norm_parameters) # Rounding imputed_data = rounding(imputed_data, data_x) return imputed_data
def gain(data_x, gain_parameters): '''Impute missing values in data_x Args: - data_x: original data with missing values - gain_parameters: GAIN network parameters: - batch_size: Batch size - hint_rate: Hint rate - alpha: Hyperparameter - iterations: Iterations Returns: - imputed_data: imputed data ''' # Define mask matrix data_m = 1 - np.isnan(data_x) # System parameters batch_size = gain_parameters['batch_size'] hint_rate = gain_parameters['hint_rate'] alpha = gain_parameters['alpha'] iterations = gain_parameters['iterations'] checkpoint_dir = gain_parameters['checkpoint_dir'] data_name = gain_parameters['data_name'] # Other parameters no, dim = data_x.shape # Hidden state dimensions h_dim = int(dim) # Normalization norm_data, norm_parameters = normalization(data_x) norm_data_x = np.nan_to_num(norm_data, 0) ## GAIN architecture # Input placeholders # Data vector X = tf.placeholder(tf.float32, shape=[None, dim]) # Mask vector M = tf.placeholder(tf.float32, shape=[None, dim]) # Hint vector H = tf.placeholder(tf.float32, shape=[None, dim]) # Discriminator variables D_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) # Data + Hint as inputs D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, h_dim])) D_b2 = tf.Variable(tf.zeros(shape=[h_dim])) D_W3 = tf.Variable(xavier_init([h_dim, dim])) D_b3 = tf.Variable(tf.zeros(shape=[dim])) # Multi-variate outputs theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3] #Generator variables # Data + Mask as inputs (Random noise is in missing components) G_W1 = tf.Variable(xavier_init([dim * 2, h_dim]), name='G_W1') G_b1 = tf.Variable(tf.zeros(shape=[h_dim]), name='G_b1') G_W2 = tf.Variable(xavier_init([h_dim, h_dim]), name='G_W2') G_b2 = tf.Variable(tf.zeros(shape=[h_dim]), name='G_b2') G_W3 = tf.Variable(xavier_init([h_dim, dim]), name='G_W3') G_b3 = tf.Variable(tf.zeros(shape=[dim]), name='G_b3') theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3] ## GAIN functions # Generator def generator(x, m): # Concatenate Mask and Data inputs = tf.concat(values=[x, m], axis=1) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2) # MinMax normalized output G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3) return G_prob # Discriminator def discriminator(x, h): # Concatenate Data and Hint inputs = tf.concat(values=[x, h], axis=1) D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2) D_logit = tf.matmul(D_h2, D_W3) + D_b3 D_prob = tf.nn.sigmoid(D_logit) return D_prob # save models def save_model(sess, checkpoint_dir): model_name = "gain_model" model_dir = "%s" % (data_name) checkpoint_dir = os.path.join(checkpoint_dir, model_dir) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver.save(sess, os.path.join(checkpoint_dir, model_name)) # ## GAIN structure # Generator G_sample = generator(X, M) # # Combine with observed data # Hat_X = X * M + G_sample * (1-M) # # Discriminator # D_prob = discriminator(Hat_X, H) # ## GAIN loss # D_loss_temp = -tf.reduce_mean(M * tf.log(D_prob + 1e-8) \ # + (1-M) * tf.log(1. - D_prob + 1e-8)) # G_loss_temp = -tf.reduce_mean((1-M) * tf.log(D_prob + 1e-8)) # MSE_loss = \ # tf.reduce_mean((M * X - M * G_sample)**2) / tf.reduce_mean(M) # D_loss = D_loss_temp # G_loss = G_loss_temp + alpha * MSE_loss # ## GAIN solver # D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D) # G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G) ## Iterations saver = tf.train.Saver(max_to_keep=1) sess = tf.Session() sess.run(tf.global_variables_initializer()) # Start Iterations # for it in tqdm(range(iterations)): # # Sample batch # batch_idx = sample_batch_index(no, batch_size) # X_mb = norm_data_x[batch_idx, :] # M_mb = data_m[batch_idx, :] # # Sample random vectors # Z_mb = uniform_sampler(0, 0.01, batch_size, dim) # # Sample hint vectors # H_mb_temp = binary_sampler(hint_rate, batch_size, dim) # H_mb = M_mb * H_mb_temp # # Combine random vectors with observed vectors # X_mb = M_mb * X_mb + (1-M_mb) * Z_mb # _, D_loss_curr = sess.run([D_solver, D_loss_temp], # feed_dict = {M: M_mb, X: X_mb, H: H_mb}) # _, G_loss_curr, MSE_loss_curr = \ # sess.run([G_solver, G_loss_temp, MSE_loss], # feed_dict = {X: X_mb, M: M_mb, H: H_mb}) # save_model(sess, checkpoint_dir) print('testing mode') # resore the model # G_sample = load(sess, checkpoint_dir) print(" [*] Reading checkpoint...") # model_dir = "%s" % (data_name) # checkpoint_dir = os.path.join(checkpoint_dir, model_dir) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print('The model loaded successfully') ckpt_name = os.path.basename(ckpt.model_checkpoint_path) saver.restore(sess, os.path.join(checkpoint_dir, ckpt_name)) # print(sess.run(G_b1)) G_W1 = sess.run(G_W1) G_b1 = sess.run(G_b1) G_W2 = sess.run(G_W2) G_b2 = sess.run(G_b2) G_W3 = sess.run(G_W3) G_b3 = sess.run(G_b3) else: print('failed to load the model, check model path') ## Return imputed data Z_mb = uniform_sampler(0, 0.01, no, dim) M_mb = data_m X_mb = norm_data_x X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb imputed_data = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0] imputed_data = data_m * norm_data_x + (1 - data_m) * imputed_data # Renormalization imputed_data = renormalization(imputed_data, norm_parameters) # Rounding imputed_data = rounding(imputed_data, data_x) return imputed_data