Exemple #1
0
                # Evaluate results on the imputation with mode, not on the samlpes!
                samples_list.append(samples_test)
                p_params_list.append(test_params)
                #                        p_params_list.append(p_params)
                q_params_list.append(q_params)
                log_p_x_total.append(log_p_x_test)
                log_p_x_missing_total.append(log_p_x_missing_test)

                # Compute average loss
                avg_loss += np.mean(loss)
                avg_KL_s += np.mean(KL_s)
                avg_KL_z += np.mean(KL_z)

            # Concatenate samples in arrays
            s_total, z_total, y_total, est_data = read_functions.samples_concatenation(
                samples_list)

            # Transform discrete variables back to the original values
            train_data_transformed = read_functions.discrete_variables_transformation(
                train_data_aux[:n_batches * args.batch_size, :], types_dict)
            est_data_transformed = read_functions.discrete_variables_transformation(
                est_data, types_dict)
            est_data_imputed = read_functions.mean_imputation(
                train_data_transformed,
                miss_mask_aux[:n_batches * args.batch_size, :], types_dict)

            #            est_data_transformed[np.isinf(est_data_transformed)] = 1e20

            # Create global dictionary of the distribution parameters
            p_params_complete = read_functions.p_distribution_params_concatenation(
                p_params_list, types_dict, args.dim_latent_z,
Exemple #2
0
def dec_network(settings, zcodes, scodes, VP=False):
    'decode using set s and z values (if generated provide a generated miss_list) and return decoded data'
    argvals = settings.split()
    args = parser_arguments.getArgs(argvals)
    print(args)

    #Create a directoy for the save file
    if not os.path.exists('./Saved_Networks/' + args.save_file):
        os.makedirs('./Saved_Networks/' + args.save_file)
    network_file_name = './Saved_Networks/' + args.save_file + '/' + args.save_file + '.ckpt'
    log_file_name = './Saved_Network/' + args.save_file + '/log_file_' + args.save_file + '.txt'

    #Creating graph
    sess_HVAE = tf.Graph()
    with sess_HVAE.as_default():
        tf_nodes = graph_new.HVAE_graph(
            args.model_name,
            args.types_file,
            args.batch_size,
            learning_rate=args.learning_rate,
            z_dim=args.dim_latent_z,
            y_dim=args.dim_latent_y,
            s_dim=args.dim_latent_s,
            y_dim_partition=args.dim_latent_y_partition)

    train_data, types_dict, miss_mask, true_miss_mask, n_samples = read_functions.read_data(
        args.data_file, args.types_file, args.miss_file, args.true_miss_file)

    #Get an integer number of batches
    n_batches = int(np.floor(np.shape(train_data)[0] / args.batch_size))

    ######Compute the real miss_mask
    miss_mask = np.multiply(miss_mask, true_miss_mask)

    with tf.Session(graph=sess_HVAE) as session:
        # Add ops to save and restore all the variables.
        saver = tf.train.Saver()
        saver.restore(session, network_file_name)
        print("Model restored: " + network_file_name)

        print('::::::DECODING:::::::::')
        start_time = time.time()
        # Training cycle
        epoch = 0
        samples_list = []

        # Constant Gumbel-Softmax parameter (where we have finished the annealing)
        tau = 1e-3

        for i in range(n_batches):

            data_list, miss_list = read_functions.next_batch(
                train_data,
                types_dict,
                miss_mask,
                args.batch_size,
                index_batch=i)  #Create inputs for the feed_dict
            data_list_observed = [
                data_list[i] *
                np.reshape(miss_list[:, i], [args.batch_size, 1])
                for i in range(len(data_list))
            ]  #Delete not known data

            #Create feed dictionary
            feedDict = {
                i: d
                for i, d in zip(tf_nodes['ground_batch'], data_list)
            }
            feedDict.update({
                i: d
                for i, d in zip(tf_nodes['ground_batch_observed'],
                                data_list_observed)
            })
            feedDict[tf_nodes['miss_list']] = miss_list
            if VP == True:
                vpfile = 'VP_misslist/' + re.sub(
                    'data_python/|.csv', '', args.data_file) + '_vpmiss.csv'
                print('::::::::::::' + vpfile)
                feedDict[tf_nodes['miss_list_VP']] = pd.read_csv(vpfile,
                                                                 header=None)
            elif VP == 'nomiss':
                print(':::::::::::: ones for miss list VP')
                feedDict[tf_nodes['miss_list_VP']] = np.ones(miss_list.shape)
            else:
                feedDict[tf_nodes['miss_list_VP']] = miss_list
            feedDict[tf_nodes['tau_GS']] = tau
            feedDict[tf_nodes['zcodes']] = np.array(zcodes).reshape(
                (len(zcodes), 1))
            feedDict[tf_nodes['scodes']] = np.array(scodes).reshape(
                (len(scodes), 1))

            #Get samples from the fixed decoder function
            samples_zgen, log_p_x_test, log_p_x_missing_test, test_params = session.run(
                [
                    tf_nodes['samples_zgen'], tf_nodes['log_p_x_zgen'],
                    tf_nodes['log_p_x_missing_zgen'],
                    tf_nodes['test_params_zgen']
                ],
                feed_dict=feedDict)
            samples_list.append(samples_zgen)

        #Separate the samples from the batch list
        s_aux, z_aux, y_total, est_data = read_functions.samples_concatenation(
            samples_list)

        #Transform discrete variables to original values
        est_data_transformed = read_functions.discrete_variables_transformation(
            est_data, types_dict)

        return est_data_transformed
Exemple #3
0
                                              feed_dict=feedDict)
     
     #Collect all samples, distirbution parameters and logliks in lists
     samples_list.append(samples)
     p_params_list.append(p_params)
     q_params_list.append(q_params)
     log_p_x_total.append(log_p_x)
     log_p_x_missing_total.append(log_p_x_missing)
     
     # Compute average loss
     avg_loss += np.mean(loss)
     avg_KL_s += np.mean(KL_s)
     avg_KL_z += np.mean(KL_z)
     
 #Concatenate samples in arrays
 s_total, z_total, y_total, est_data = read_functions.samples_concatenation(samples_list)
 
 #Transform discrete variables back to the original values
 train_data_transformed = read_functions.discrete_variables_transformation(train_data_aux[:n_batches*args.batch_size,:], types_dict)
 est_data_transformed = read_functions.discrete_variables_transformation(est_data, types_dict)
 est_data_imputed = read_functions.mean_imputation(train_data_transformed, miss_mask_aux[:n_batches*args.batch_size,:], types_dict)
 
 #Create global dictionary of the distribution parameters
 p_params_complete = read_functions.p_distribution_params_concatenation(p_params_list, types_dict, args.dim_latent_z, args.dim_latent_s)
 q_params_complete = read_functions.q_distribution_params_concatenation(q_params_list,  args.dim_latent_z, args.dim_latent_s)
 
 #Compute mean and mode of our loglik models
 loglik_mean, loglik_mode = read_functions.statistics(p_params_complete['x'],types_dict)
     
 #Try this for the errors
 error_train_mean, error_test_mean = read_functions.error_computation(train_data_transformed, loglik_mean, types_dict, miss_mask_aux[:n_batches*args.batch_size,:])