## csv files required to run the program ## ############################## path_dataset_matrix = 'bl_data_MCI_v1.csv' path_mask_X1 = 'mask_age_nolab.csv' path_mask_X2 = 'mask_sex_nolab.csv' path_mask_X3 = 'mask_agesex_nolab.csv' path_mask_X4 = 'mask_nosignificance_nolab.csv' path_dataset_affinity_matrix_age = 'affinity_mat_1.csv' path_dataset_affinity_matrix_sex = 'affinity_mat_2.csv' path_dataset_affinity_matrix_agesex = 'affinity_mat_3.csv' labels_path = 'labels_comma.csv' #load the .csv files M_str, nrRows, nrCols = read_tadpole.load_csv_no_header(path_dataset_matrix) mask_age, _, _ = read_tadpole.load_csv_no_header(path_mask_X1) mask_sex, _, _ = read_tadpole.load_csv_no_header(path_mask_X2) mask_agesex, _, _ = read_tadpole.load_csv_no_header(path_mask_X3) mask_nosignificance, _, _ = read_tadpole.load_csv_no_header(path_mask_X4) A1, _, _ = read_tadpole.load_csv_no_header(path_dataset_affinity_matrix_age) A2, _, _ = read_tadpole.load_csv_no_header(path_dataset_affinity_matrix_sex) A3, _, _ = read_tadpole.load_csv_no_header(path_dataset_affinity_matrix_agesex) A_age = preprocessing_dataset.str_to_float(A1) A_sex = preprocessing_dataset.str_to_float(A2) A_sexage = preprocessing_dataset.str_to_float(A3) labels, _, _ = read_tadpole.load_csv_no_header(labels_path) labels = preprocessing_dataset.str_to_float(labels)
#import rbfopt #uncomment when doing optimization of hyperparameters from preprocessing import sparse_to_tuple, load_data_monti_tadpole from model import MG_GAE from utils import construct_feed_dict import read_tadpole import preprocessing_dataset VERBOSE = True path_dataset_matrix = "synthetic_data_noteasy.csv" labels_path = "labels_synthetic_data_noteasy.csv" #load the .csv files M_str, nrRows, nrCols = read_tadpole.load_csv_no_header(path_dataset_matrix) labels, _, _ = read_tadpole.load_csv_no_header(labels_path) #parameters/preprocessing step that do not change during the running labels = preprocessing_dataset.str_to_float(labels) M_init = np.concatenate((M_str, labels), axis=1) M_support = np.concatenate((M_str, labels), axis=1) M = preprocessing_dataset.normalization_gae(M_init) M_sup = preprocessing_dataset.normalization_for_supportreal(M_support) def run(seed, gamma, beta, hidden, lr, NB_EPOCH=300): """ Main function. Run the architecture for the initialization defined by seed and by the hyperparameters gamma, beta, hidden, lr
def run(seed, gamma, beta, hidden, lr, NB_EPOCH=300): """ Main function. Run the architecture for the initialization defined by seed and by the hyperparameters gamma, beta, hidden, lr Inputs: seed : seed to defined the initialization of the training/testing/validation split, gamma, beta, hidden, lr: hyperparameters of the architecture NB_EPOCH: number of runs to do of the same architecture with different weight initializations. Default: 1000 Outputs: auc_test, auc_train, auc_val: AUC on the test, train and validation sets """ tf.reset_default_graph() training_set_mask, testing_set_mask, idx_training, idx_testing = preprocessing_dataset.split_train_test( 0.8, M_str, seed, labels) #create a training and test mask on the data Otraining = preprocessing_dataset.load_mask(training_set_mask, M_str, nrRows, nrCols) Otest = preprocessing_dataset.load_mask(testing_set_mask, M_str, nrRows, nrCols) new_labels_train = np.copy(labels) new_labels_train[idx_testing] = -1 #split train set into 4 parts to create a validation set training_set_mask, validation_set_mask, idx_training, idx_validation = preprocessing_dataset.split_train_validation_4( 3, M_str, seed, new_labels_train) Otraining = preprocessing_dataset.load_mask(training_set_mask, M_str, nrRows, nrCols) Ovalidation = preprocessing_dataset.load_mask(validation_set_mask, M_str, nrRows, nrCols) Otraining = np.concatenate((Otraining, training_set_mask), axis=1) Ocol = np.zeros((Otest.shape[0], 1)) Otest_support = np.concatenate((Otest, Ocol), axis=1) Ovalidation_support = np.concatenate((Ovalidation, Ocol), axis=1) Osupport_t = Otraining + Otest_support + Ovalidation_support Ovalidation = np.concatenate((Ovalidation, validation_set_mask), axis=1) Otest = np.concatenate((Otest, testing_set_mask), axis=1) u_features, v_features, train_labels, train_u_indices, train_v_indices, val_labels, val_u_indices, val_v_indices, test_labels, test_u_indices, test_v_indices = load_data_monti_tadpole( M, Otraining, Otest, Ovalidation) m, n = M.shape # global normalization support = [] support_t = [] path_support_women = "women_synth_noteasy.csv" women_support, _, _ = read_tadpole.load_csv_no_header(path_support_women) women_support = preprocessing_dataset.str_to_float(women_support) women_support = women_support * M_sup women_support = sp.csr_matrix(women_support, dtype=np.float32) support.append(women_support) support_t.append(women_support.T) path_support_men = "men_synth_noteasy.csv" men_support, _, _ = read_tadpole.load_csv_no_header(path_support_men) men_support = preprocessing_dataset.str_to_float(men_support) men_support = men_support * M_sup men_support = sp.csr_matrix(men_support, dtype=np.float32) support.append(men_support) support_t.append(men_support.T) path_support_women_84 = "age_84_92_women_synth_noteasy.csv" women_84_support, _, _ = read_tadpole.load_csv_no_header( path_support_women_84) women_84_support = preprocessing_dataset.str_to_float(women_84_support) women_84_support = women_84_support * M_sup women_84_support = sp.csr_matrix(women_84_support, dtype=np.float32) support.append(women_84_support) support_t.append(women_84_support.T) path_support_men_84 = "age_84_92_men_synth_noteasy.csv" men_84_support, _, _ = read_tadpole.load_csv_no_header(path_support_men_84) men_84_support = preprocessing_dataset.str_to_float(men_84_support) men_84_support = men_84_support * M_sup men_84_support = sp.csr_matrix(men_84_support, dtype=np.float32) support.append(men_84_support) support_t.append(men_84_support.T) path_support_84 = "age_84_92_synth_noteasy.csv" age84_support, _, _ = read_tadpole.load_csv_no_header(path_support_84) age84_support = preprocessing_dataset.str_to_float(age84_support) age84_support = age84_support * M_sup age84_support = sp.csr_matrix(age84_support, dtype=np.float32) support.append(age84_support) support_t.append(age84_support.T) path_support_women_79 = "age_79_84_women_synth_noteasy.csv" women_79_support, _, _ = read_tadpole.load_csv_no_header( path_support_women_79) women_79_support = preprocessing_dataset.str_to_float(women_79_support) women_79_support = women_79_support * M_sup women_79_support = sp.csr_matrix(women_79_support, dtype=np.float32) support.append(women_79_support) support_t.append(women_79_support.T) path_support_men_79 = "age_79_84_men_synth_noteasy.csv" men_79_support, _, _ = read_tadpole.load_csv_no_header(path_support_men_79) men_79_support = preprocessing_dataset.str_to_float(men_79_support) men_79_support = men_79_support * M_sup men_79_support = sp.csr_matrix(men_79_support, dtype=np.float32) support.append(men_79_support) support_t.append(men_79_support.T) path_support_79 = "age_79_84_synth_noteasy.csv" age79_support, _, _ = read_tadpole.load_csv_no_header(path_support_79) age79_support = preprocessing_dataset.str_to_float(age79_support) age79_support = age79_support * M_sup age79_support = sp.csr_matrix(age79_support, dtype=np.float32) support.append(age79_support) support_t.append(age79_support.T) path_support_women_74 = "age_74_79_women_synth_noteasy.csv" women_74_support, _, _ = read_tadpole.load_csv_no_header( path_support_women_74) women_74_support = preprocessing_dataset.str_to_float(women_74_support) women_74_support = women_74_support * M_sup women_74_support = sp.csr_matrix(women_74_support, dtype=np.float32) support.append(women_74_support) support_t.append(women_74_support.T) path_support_men_74 = "age_74_79_men_synth_noteasy.csv" men_74_support, _, _ = read_tadpole.load_csv_no_header(path_support_men_74) men_74_support = preprocessing_dataset.str_to_float(men_74_support) men_74_support = men_74_support * M_sup men_74_support = sp.csr_matrix(men_74_support, dtype=np.float32) support.append(men_74_support) support_t.append(men_74_support.T) path_support_74 = "age_74_79_synth_noteasy.csv" age74_support, _, _ = read_tadpole.load_csv_no_header(path_support_74) age74_support = preprocessing_dataset.str_to_float(age74_support) age74_support = age74_support * M_sup age74_support = sp.csr_matrix(age74_support, dtype=np.float32) support.append(age74_support) support_t.append(age74_support.T) path_support_women_69 = "age_69_74_women_synth_noteasy.csv" women_69_support, _, _ = read_tadpole.load_csv_no_header( path_support_women_69) women_69_support = preprocessing_dataset.str_to_float(women_69_support) women_69_support = women_69_support * M_sup women_69_support = sp.csr_matrix(women_69_support, dtype=np.float32) support.append(women_69_support) support_t.append(women_69_support.T) path_support_men_69 = "age_69_74_men_synth_noteasy.csv" men_69_support, _, _ = read_tadpole.load_csv_no_header(path_support_men_69) men_69_support = preprocessing_dataset.str_to_float(men_69_support) men_69_support = men_69_support * M_sup men_69_support = sp.csr_matrix(men_69_support, dtype=np.float32) support.append(men_69_support) support_t.append(men_69_support.T) path_support_69 = "age_69_74_synth_noteasy.csv" age69_support, _, _ = read_tadpole.load_csv_no_header(path_support_69) age69_support = preprocessing_dataset.str_to_float(age69_support) age69_support = age69_support * M_sup age69_support = sp.csr_matrix(age69_support, dtype=np.float32) support.append(age69_support) support_t.append(age69_support.T) path_support_women_64 = "age_64_69_women_synth_noteasy.csv" women_64_support, _, _ = read_tadpole.load_csv_no_header( path_support_women_64) women_64_support = preprocessing_dataset.str_to_float(women_64_support) women_64_support = women_64_support * M_sup women_64_support = sp.csr_matrix(women_64_support, dtype=np.float32) support.append(women_64_support) support_t.append(women_64_support.T) path_support_men_64 = "age_64_69_men_synth_noteasy.csv" men_64_support, _, _ = read_tadpole.load_csv_no_header(path_support_men_64) men_64_support = preprocessing_dataset.str_to_float(men_64_support) men_64_support = men_64_support * M_sup men_64_support = sp.csr_matrix(men_64_support, dtype=np.float32) support.append(men_64_support) support_t.append(men_64_support.T) path_support_64 = "age_64_69_synth_noteasy.csv" age64_support, _, _ = read_tadpole.load_csv_no_header(path_support_64) age64_support = preprocessing_dataset.str_to_float(age64_support) age64_support = age64_support * M_sup age64_support = sp.csr_matrix(age64_support, dtype=np.float32) support.append(age64_support) support_t.append(age64_support.T) path_support_women_59 = "age_59_64_women_synth_noteasy.csv" women_59_support, _, _ = read_tadpole.load_csv_no_header( path_support_women_59) women_59_support = preprocessing_dataset.str_to_float(women_59_support) women_59_support = women_59_support * M_sup women_59_support = sp.csr_matrix(women_59_support, dtype=np.float32) support.append(women_59_support) support_t.append(women_59_support.T) path_support_men_59 = "age_59_64_men_synth_noteasy.csv" men_59_support, _, _ = read_tadpole.load_csv_no_header(path_support_men_59) men_59_support = preprocessing_dataset.str_to_float(men_59_support) men_59_support = men_59_support * M_sup men_59_support = sp.csr_matrix(men_59_support, dtype=np.float32) support.append(men_59_support) support_t.append(men_59_support.T) path_support_59 = "age_59_64_synth_noteasy.csv" age59_support, _, _ = read_tadpole.load_csv_no_header(path_support_59) age59_support = preprocessing_dataset.str_to_float(age59_support) age59_support = age59_support * M_sup age59_support = sp.csr_matrix(age59_support, dtype=np.float32) support.append(age59_support) support_t.append(age59_support.T) path_support_women_54 = "age_54_59_women_synth_noteasy.csv" women_54_support, _, _ = read_tadpole.load_csv_no_header( path_support_women_54) women_54_support = preprocessing_dataset.str_to_float(women_54_support) women_54_support = women_54_support * M_sup women_54_support = sp.csr_matrix(women_54_support, dtype=np.float32) support.append(women_54_support) support_t.append(women_54_support.T) path_support_men_54 = "age_54_59_men_synth_noteasy.csv" men_54_support, _, _ = read_tadpole.load_csv_no_header(path_support_men_54) men_54_support = preprocessing_dataset.str_to_float(men_54_support) men_54_support = men_54_support * M_sup men_54_support = sp.csr_matrix(men_54_support, dtype=np.float32) support.append(men_54_support) support_t.append(men_54_support.T) path_support_54 = "age_54_59_synth_noteasy.csv" age54_support, _, _ = read_tadpole.load_csv_no_header(path_support_54) age54_support = preprocessing_dataset.str_to_float(age54_support) age54_support = age54_support * M_sup age54_support = sp.csr_matrix(age54_support, dtype=np.float32) support.append(age54_support) support_t.append(age54_support.T) num_support = len(support) mask_support_t = [] Osupport_t = sp.csr_matrix(Osupport_t, dtype=np.int) for i in range(num_support): mask_support_t.append(Osupport_t.T) mask_support_t = sp.hstack(mask_support_t, format='csr') support = sp.hstack(support, format='csr') support_t = sp.hstack(support_t, format='csr') # Collect all user and item nodes for test set test_u = list(set(test_u_indices)) test_v = list(set(test_v_indices)) test_u_dict = {n: i for i, n in enumerate(test_u)} test_v_dict = {n: i for i, n in enumerate(test_v)} test_u_indices = np.array([test_u_dict[o] for o in test_u_indices]) test_v_indices = np.array([test_v_dict[o] for o in test_v_indices]) test_support = support[np.array(test_u)] for i in range(test_support.shape[0]): for j in range(563, test_support.shape[1], 564): test_support[i, j] = 0.0 test_support_t = sp.csr_matrix.multiply(support_t, mask_support_t) # Collect all user and item nodes for validation set val_u = list(set(val_u_indices)) val_v = list(set(val_v_indices)) val_u_dict = {n: i for i, n in enumerate(val_u)} val_v_dict = {n: i for i, n in enumerate(val_v)} val_u_indices = np.array([val_u_dict[o] for o in val_u_indices]) val_v_indices = np.array([val_v_dict[o] for o in val_v_indices]) val_support = support[np.array(val_u)] for i in range(val_support.shape[0]): for j in range(563, val_support.shape[1], 564): val_support[i, j] = 0.0 val_support_t = sp.csr_matrix.multiply(support_t, mask_support_t) # Collect all user and item nodes for train set train_u = list(set(train_u_indices)) train_v = list(set(train_v_indices)) train_u_dict = {n: i for i, n in enumerate(train_u)} train_v_dict = {n: i for i, n in enumerate(train_v)} train_u_indices = np.array([train_u_dict[o] for o in train_u_indices]) train_v_indices = np.array([train_v_dict[o] for o in train_v_indices]) train_support = support[np.array(train_u)] train_support_t = sp.csr_matrix.multiply(support_t, mask_support_t) placeholders = { 'u_features': tf.sparse_placeholder(tf.float32, shape=np.array(u_features.shape, dtype=np.int64)), 'v_features': tf.sparse_placeholder(tf.float32, shape=np.array(v_features.shape, dtype=np.int64)), 'u_features_nonzero': tf.placeholder(tf.int32, shape=()), 'v_features_nonzero': tf.placeholder(tf.int32, shape=()), 'labels': tf.placeholder(tf.float32, shape=(None, )), 'indices_labels': tf.placeholder(tf.int32, shape=(None, )), 'user_indices': tf.placeholder(tf.int32, shape=(None, )), 'item_indices': tf.placeholder(tf.int32, shape=(None, )), 'dropout': tf.placeholder_with_default(0., shape=()), 'weight_decay': tf.placeholder_with_default(0., shape=()), 'support': tf.sparse_placeholder(tf.float32, shape=(None, None)), 'support_t': tf.sparse_placeholder(tf.float32, shape=(None, None)), } div = hidden[0] // num_support if hidden[0] % num_support != 0: print( """\nWARNING: HIDDEN[0] (=%d) of stack layer is adjusted to %d such that it can be evenly split in %d splits.\n""" % (hidden[0], num_support * div, num_support)) hidden[0] = num_support * div # create model model = MG_GAE(placeholders, input_dim=u_features.shape[1], num_support=num_support, hidden=hidden, num_users=m, num_items=n, learning_rate=lr, gamma=gamma, beta=beta, logging=True) # Convert sparse placeholders to tuples to construct feed_dict test_support = sparse_to_tuple(test_support) test_support_t = sparse_to_tuple(test_support_t) val_support = sparse_to_tuple(val_support) val_support_t = sparse_to_tuple(val_support_t) train_support = sparse_to_tuple(train_support) train_support_t = sparse_to_tuple(train_support_t) u_features = sparse_to_tuple(u_features) v_features = sparse_to_tuple(v_features) assert u_features[2][1] == v_features[2][ 1], 'Number of features of users and items must be the same!' num_features = u_features[2][1] u_features_nonzero = u_features[1].shape[0] v_features_nonzero = v_features[1].shape[0] indices_labels = [563] * train_labels.shape[0] indices_labels_val = [563] * val_labels.shape[0] indices_labels_test = [563] * test_labels.shape[0] # Feed_dicts for validation and test set stay constant over different update steps train_feed_dict = construct_feed_dict(placeholders, u_features, v_features, u_features_nonzero, v_features_nonzero, train_support, train_support_t, train_labels, indices_labels, train_u_indices, train_v_indices, 0.) # No dropout for validation and test runs val_feed_dict = construct_feed_dict(placeholders, u_features, v_features, u_features_nonzero, v_features_nonzero, val_support, val_support_t, val_labels, indices_labels_val, val_u_indices, val_v_indices, 0.) test_feed_dict = construct_feed_dict(placeholders, u_features, v_features, u_features_nonzero, v_features_nonzero, test_support, test_support_t, test_labels, indices_labels_test, test_u_indices, test_v_indices, 0.) # Collect all variables to be logged into summary merged_summary = tf.summary.merge_all() sess = tf.Session() sess.run(tf.global_variables_initializer()) auc_train = [] auc_test = [] auc_val = [] test_pred = [] for epoch in range(NB_EPOCH): t = time.time() # Run single weight update outs = sess.run([ model.training_op, model.loss, model.indices, model.labels, model.outputs, model.labels_class, model.classification, model.inputs, model.gcn_u, model.gcn_v, model.loss_frob, model.binary_entropy, model.u_inputs, model.v_inputs, model.weight, model.input_u, model.input_v, model.u_indices, model.v_indices ], feed_dict=train_feed_dict) train_avg_loss = outs[1] label_train = outs[5] output_train = outs[6] fpr_train, tpr_train, thresholds_train = roc_curve( label_train, output_train, pos_label=label_train.max()) roc_auc_train = auc(fpr_train, tpr_train) auc_train.append(roc_auc_train) val_avg_loss, val_classification, val_labels_corres = sess.run( [model.loss, model.classification, model.labels_class], feed_dict=val_feed_dict) #test_feed_dict)# fpr_val, tpr_val, thresholds_train = roc_curve( val_labels_corres, val_classification, pos_label=label_train.max()) roc_auc_val = auc(fpr_val, tpr_val) auc_val.append(roc_auc_val) test_avg_loss, test_classification, test_labels_corres = sess.run( [model.loss, model.classification, model.labels_class], feed_dict=test_feed_dict) fpr_test, tpr_test, thresholds_test = roc_curve( test_labels_corres, test_classification, pos_label=label_train.max()) roc_auc_test = auc(fpr_test, tpr_test) auc_test.append(roc_auc_test) test_pred.append(test_classification) if VERBOSE: print("[*] Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(train_avg_loss), "train_auc=", "{:.5f}".format(roc_auc_train), "val_loss=", "{:.5f}".format(val_avg_loss), "val_auc=", "{:.5f}".format(roc_auc_val), "\t\ttime=", "{:.5f}".format(time.time() - t)) print('test auc = ', roc_auc_test) sess.close() return auc_test, auc_train, auc_val
# Juliette Valenchon 04/2019 import read_tadpole import numpy as np import preprocessing_dataset path_dataset_matrix = 'stat_results_v1_notext.csv' #matrix to complete stat, nrRows, nrCols = read_tadpole.load_csv_no_header(path_dataset_matrix) res_stat = stat.astype(np.float64) path_dataset_matrix = 'bl_data_MCI_v1.csv' M_str, nrRows, nrCols = read_tadpole.load_csv_no_header(path_dataset_matrix) M_init = preprocessing_dataset.normalization(M_str) def remove_column(column_remove, M): """ Function to remove the column of M which are indexed by an indice in column_remove Input: column_remove: array with indices of the column to remove M: matrix with we want to remove the columns that are indexed by an indice in column_remove Output: M: matrix where we removed the columns """ column_remove.sort(reverse=True) for i in range(len(column_remove)): M = np.hstack((M[:, :column_remove[i]], M[:, column_remove[i] + 1:])) return M
if attributes[k][l] == attributes[j][l]: adj[k, j] += 1 adj[j, k] += 1 else: print( "we only use two attributes here to built the adjacency matrix " ) return adj if __name__ == '__main__': age_file_path = 'age.csv' gender_file_path = 'gender.csv' age_column, nrRowsa, nrColsa = read_tadpole.load_csv_no_header( age_file_path) gender_column, nrRowsg, nrColsg = read_tadpole.load_csv_no_header( gender_file_path) """meta_data_graph = np.concatenate((age_column, gender_column), axis = 1) a3=adjacency_agesex(meta_data_graph) a2=adjacency_sex(gender_column) a1=adjacency_age(age_column) affinity_matrix = adjacency_age_sex_agesex(meta_data_graph) np.savetxt('affinity_mat_parisot_v1.csv', affinity_matrix, fmt='%f', delimiter=',') np.savetxt('affinity_mat_1.csv', a1, fmt='%f', delimiter=',') np.savetxt('affinity_mat_2.csv', a2, fmt='%f', delimiter=',') np.savetxt('affinity_mat_3.csv', a3, fmt='%f', delimiter=',')""" meta_data_graph = np.concatenate((age_column, gender_column), axis=1) adja = adjacency_age_sex_agesex(meta_data_graph)
import csv from preprocessing import sparse_to_tuple, load_data_monti_tadpole from model import MG_GAE from utils import construct_feed_dict import read_tadpole import preprocessing_dataset VERBOSE = True path_dataset_matrix ="synthetic_data_noteasy.csv" labels_path = "labels_synthetic_data_noteasy.csv" #load the .csv files M_str, nrRows, nrCols = read_tadpole.load_csv_no_header(path_dataset_matrix) labels, _, _ = read_tadpole.load_csv_no_header(labels_path) gender_file_path = 'sexs_synthetic_data_noteasy.csv' sex, nrRowsg, nrColsg = read_tadpole.load_csv_no_header(gender_file_path) gender_column=sex.reshape(-1) age_file_path = 'ages_synthetic_data_noteasy.csv' age_column, nrRowsa, nrColsa = preprocessing_dataset.load_csv_no_header_float(age_file_path) age_column=age_column.reshape(-1) labels = preprocessing_dataset.str_to_float(labels) labels_save=labels.reshape(-1) M_init= np.concatenate((M_str, labels), axis=1) M_support = np.concatenate((M_str, labels), axis=1)
attribute_mat[j][i]=0 for j in range(len(attribute)): attribute_mat[j][-1]=1 return attribute_mat sex=[] SYNTH=False if __name__ == '__main__': if SYNTH: path_feat_dep = 'feat_dependence_synthetic_data_noteasy.csv' feat_dep, _, _ = read_tadpole.load_csv_no_header(path_feat_dep) feat_dep=preprocessing_dataset.str_to_float(feat_dep) age_file_path = 'ages_synthetic_data_noteasy.csv' gender_file_path = 'sexs_synthetic_data_noteasy.csv' age_column, nrRowsa, nrColsa = read_tadpole.load_csv_no_header(age_file_path) gender_column, nrRowsg, nrColsg = read_tadpole.load_csv_no_header(gender_file_path) age_column=preprocessing_dataset.str_to_float(age_column) gender_column=preprocessing_dataset.str_to_float(gender_column) mask_age = mask_synth(feat_dep, age_column, 0) mask_sex = mask_synth(feat_dep, gender_column, 1) mask_agesex = mask_synth(feat_dep, age_column, 2) np.savetxt('mask_age_synthnoteasy.csv', mask_age, fmt='%f', delimiter=',') np.savetxt('mask_sex_synthnoteasy.csv', mask_sex, fmt='%f', delimiter=',')
font = {'family': 'Times New Roman', 'weight': 'medium', 'size': 20} import matplotlib matplotlib.rc('font', **font) import numpy as np from pylab import plot, show, savefig, xlim, figure, \ hold, ylim, legend, boxplot, setp, axes import pandas as pd import seaborn import read_tadpole import preprocessing_dataset path_dataset_matrix = 'bl_data_MCI_v1.csv' #matrix to complete M_str, nrRows, nrCols = read_tadpole.load_csv_no_header(path_dataset_matrix) path_dataset_header = 'header.csv' #labels of the different columns of features of M_str header_str_col, _, _ = read_tadpole.load_csv_no_header(path_dataset_header) gender_file_path = 'gender.csv' gender_column, nrRowsg, nrColsg = read_tadpole.load_csv_no_header( gender_file_path) age_file_path = 'age.csv' age_column, nrRowsa, nrColsa = read_tadpole.load_csv_no_header(age_file_path) M_str = preprocessing_dataset.normalization_gae(M_str) #create a matrix X with the three covariates AGE, SEX, AGE&SEX age = age_column.astype(np.float64)