# Evaluate results on the imputation with mode, not on the samlpes! samples_list.append(samples_test) p_params_list.append(test_params) # p_params_list.append(p_params) q_params_list.append(q_params) log_p_x_total.append(log_p_x_test) log_p_x_missing_total.append(log_p_x_missing_test) # Compute average loss avg_loss += np.mean(loss) avg_KL_s += np.mean(KL_s) avg_KL_z += np.mean(KL_z) # Concatenate samples in arrays s_total, z_total, y_total, est_data = read_functions.samples_concatenation( samples_list) # Transform discrete variables back to the original values train_data_transformed = read_functions.discrete_variables_transformation( train_data_aux[:n_batches * args.batch_size, :], types_dict) est_data_transformed = read_functions.discrete_variables_transformation( est_data, types_dict) est_data_imputed = read_functions.mean_imputation( train_data_transformed, miss_mask_aux[:n_batches * args.batch_size, :], types_dict) # est_data_transformed[np.isinf(est_data_transformed)] = 1e20 # Create global dictionary of the distribution parameters p_params_complete = read_functions.p_distribution_params_concatenation( p_params_list, types_dict, args.dim_latent_z,
def dec_network(settings, zcodes, scodes, VP=False): 'decode using set s and z values (if generated provide a generated miss_list) and return decoded data' argvals = settings.split() args = parser_arguments.getArgs(argvals) print(args) #Create a directoy for the save file if not os.path.exists('./Saved_Networks/' + args.save_file): os.makedirs('./Saved_Networks/' + args.save_file) network_file_name = './Saved_Networks/' + args.save_file + '/' + args.save_file + '.ckpt' log_file_name = './Saved_Network/' + args.save_file + '/log_file_' + args.save_file + '.txt' #Creating graph sess_HVAE = tf.Graph() with sess_HVAE.as_default(): tf_nodes = graph_new.HVAE_graph( args.model_name, args.types_file, args.batch_size, learning_rate=args.learning_rate, z_dim=args.dim_latent_z, y_dim=args.dim_latent_y, s_dim=args.dim_latent_s, y_dim_partition=args.dim_latent_y_partition) train_data, types_dict, miss_mask, true_miss_mask, n_samples = read_functions.read_data( args.data_file, args.types_file, args.miss_file, args.true_miss_file) #Get an integer number of batches n_batches = int(np.floor(np.shape(train_data)[0] / args.batch_size)) ######Compute the real miss_mask miss_mask = np.multiply(miss_mask, true_miss_mask) with tf.Session(graph=sess_HVAE) as session: # Add ops to save and restore all the variables. saver = tf.train.Saver() saver.restore(session, network_file_name) print("Model restored: " + network_file_name) print('::::::DECODING:::::::::') start_time = time.time() # Training cycle epoch = 0 samples_list = [] # Constant Gumbel-Softmax parameter (where we have finished the annealing) tau = 1e-3 for i in range(n_batches): data_list, miss_list = read_functions.next_batch( train_data, types_dict, miss_mask, args.batch_size, index_batch=i) #Create inputs for the feed_dict data_list_observed = [ data_list[i] * np.reshape(miss_list[:, i], [args.batch_size, 1]) for i in range(len(data_list)) ] #Delete not known data #Create feed dictionary feedDict = { i: d for i, d in zip(tf_nodes['ground_batch'], data_list) } feedDict.update({ i: d for i, d in zip(tf_nodes['ground_batch_observed'], data_list_observed) }) feedDict[tf_nodes['miss_list']] = miss_list if VP == True: vpfile = 'VP_misslist/' + re.sub( 'data_python/|.csv', '', args.data_file) + '_vpmiss.csv' print('::::::::::::' + vpfile) feedDict[tf_nodes['miss_list_VP']] = pd.read_csv(vpfile, header=None) elif VP == 'nomiss': print(':::::::::::: ones for miss list VP') feedDict[tf_nodes['miss_list_VP']] = np.ones(miss_list.shape) else: feedDict[tf_nodes['miss_list_VP']] = miss_list feedDict[tf_nodes['tau_GS']] = tau feedDict[tf_nodes['zcodes']] = np.array(zcodes).reshape( (len(zcodes), 1)) feedDict[tf_nodes['scodes']] = np.array(scodes).reshape( (len(scodes), 1)) #Get samples from the fixed decoder function samples_zgen, log_p_x_test, log_p_x_missing_test, test_params = session.run( [ tf_nodes['samples_zgen'], tf_nodes['log_p_x_zgen'], tf_nodes['log_p_x_missing_zgen'], tf_nodes['test_params_zgen'] ], feed_dict=feedDict) samples_list.append(samples_zgen) #Separate the samples from the batch list s_aux, z_aux, y_total, est_data = read_functions.samples_concatenation( samples_list) #Transform discrete variables to original values est_data_transformed = read_functions.discrete_variables_transformation( est_data, types_dict) return est_data_transformed
feed_dict=feedDict) #Collect all samples, distirbution parameters and logliks in lists samples_list.append(samples) p_params_list.append(p_params) q_params_list.append(q_params) log_p_x_total.append(log_p_x) log_p_x_missing_total.append(log_p_x_missing) # Compute average loss avg_loss += np.mean(loss) avg_KL_s += np.mean(KL_s) avg_KL_z += np.mean(KL_z) #Concatenate samples in arrays s_total, z_total, y_total, est_data = read_functions.samples_concatenation(samples_list) #Transform discrete variables back to the original values train_data_transformed = read_functions.discrete_variables_transformation(train_data_aux[:n_batches*args.batch_size,:], types_dict) est_data_transformed = read_functions.discrete_variables_transformation(est_data, types_dict) est_data_imputed = read_functions.mean_imputation(train_data_transformed, miss_mask_aux[:n_batches*args.batch_size,:], types_dict) #Create global dictionary of the distribution parameters p_params_complete = read_functions.p_distribution_params_concatenation(p_params_list, types_dict, args.dim_latent_z, args.dim_latent_s) q_params_complete = read_functions.q_distribution_params_concatenation(q_params_list, args.dim_latent_z, args.dim_latent_s) #Compute mean and mode of our loglik models loglik_mean, loglik_mode = read_functions.statistics(p_params_complete['x'],types_dict) #Try this for the errors error_train_mean, error_test_mean = read_functions.error_computation(train_data_transformed, loglik_mean, types_dict, miss_mask_aux[:n_batches*args.batch_size,:])