Example #1
0
APPROACHES = ['KNN']
TRAINING_SET_SIZE = [10000, 30000, 50000]
# TRAINING_SET_SIZE = [int(sys.argv[1])]
TEST_SET_SIZE = [50000]
# TEST_SET_SIZE = [int(sys.argv[3])]
VALIDATION_SET_SIZE = [1000, 3000, 5000]
# VALIDATION_SET_SIZE = [int(sys.argv[2])]
# TEST_ITERATIONS = 1  # 100
TEST_ITERATIONS_BEG = 0
TEST_ITERATIONS_END = 50
TRAIN_ITERATIONS = 3
ENHANCED = False

RESULT_FOLDER = "/home/comete/mromanel/MILES_EXP/EXP_PSW/RESULT_FOLDER_REMAPPING/"
utilities.createFolder(RESULT_FOLDER)

RESULT_FOLDER = "/home/comete/mromanel/MILES_EXP/EXP_PSW/RESULT_FOLDER_REMAPPING/KNN/"
utilities.createFolder(RESULT_FOLDER)

DATA_FOLDER = "/home/comete/mromanel/MILES_EXP/EXP_PSW/DATA_FOLDER_AFTER_OUR_PREPROCESSING/"

DATA_FOLDER_TEST = "/home/comete/mromanel/MILES_EXP/EXP_PSW/DATA_FOLDER/"

G_MATRIX_PATH = '/home/comete/mromanel/MILES_EXP/EXP_PSW/G_MAT_FOLDER/G_MAT'


def read_command_line_options():
    thismodule = sys.modules[__name__]
    for idx, key_val in enumerate(sys.argv, 0):
Example #2
0
def main_EXP_G_VULN_GEO_LOCATION_create_data():
    channel_matrix = pn.read_pickle(path=CHANNEL_MATRIX_FILE)
    secrets_occurr_dictionary = pn.read_pickle(
        path=ORIGINAL_SECRETS_OCCURRENCES_FILE)
    # print "secrets_occurr_dictionary ===> ", secrets_occurr_dictionary

    tot_occurr = 0
    maxx_occurr = 0
    for secret in secrets_occurr_dictionary:
        current_secret_occurr = secrets_occurr_dictionary[secret]
        if current_secret_occurr > maxx_occurr:
            maxx_occurr = current_secret_occurr
        tot_occurr += current_secret_occurr
        # print secret, " ===> ", current_secret_occurr
    # print "maxx_occurr ===> ", maxx_occurr

    secrets_prior_dictionary = {}
    maxx_freq = 0
    for secret in secrets_occurr_dictionary:
        secrets_prior_dictionary[
            secret] = secrets_occurr_dictionary[secret] / float(tot_occurr)
        if secrets_prior_dictionary[secret] > maxx_freq:
            maxx_freq = secrets_prior_dictionary[secret]
    # print "secrets_prior_dictionary ===> ", secrets_prior_dictionary
    # print "maxx_freq ===> ", maxx_freq

    for mult_card in MULTIPLICATIVE_FACTOR_FOR_SETS_CARDINALITY:

        reform_secrets_occurr_dictionary_tr_ts = {}
        reform_secrets_occurr_dictionary_val = {}
        for secret in secrets_occurr_dictionary:
            reform_secrets_occurr_dictionary_tr_ts[secret] = int(
                round(secrets_occurr_dictionary[secret] * float(mult_card), 0))
            reform_secrets_occurr_dictionary_val[secret] = int(
                round(
                    secrets_occurr_dictionary[secret] * float(mult_card) *
                    float(VALIDATION_CARD_AS_FRACTION_OF_TR_CARD), 0))

        training_set_size = 0
        for keys in reform_secrets_occurr_dictionary_tr_ts:
            training_set_size += reform_secrets_occurr_dictionary_tr_ts[keys]
        test_set_size = training_set_size

        validation_set_size = 0
        for keys in reform_secrets_occurr_dictionary_val:
            validation_set_size += reform_secrets_occurr_dictionary_val[keys]

        # print "reform_secrets_occurr_dictionary_tr_ts ===> ", reform_secrets_occurr_dictionary_tr_ts

        for train_iteration in tqdm(range(TRAINING_ITERATIONS)):
            training_and_validation_and_test_set_store_folder = DATA_FOLDER + str(
                training_set_size) + "_training_and_" + str(
                    validation_set_size) + "_validation_and_" + str(
                        test_set_size
                    ) + "_test_store_folder_train_iteration_" + str(
                        train_iteration) + "/"

            utilities.createFolder(
                training_and_validation_and_test_set_store_folder)

            training_set_mat = sample_from_channel(
                channel_matrix=channel_matrix,
                rndmstt=utilities.create_new_rndm_state(),
                samples_per_secret_dictionary=
                reform_secrets_occurr_dictionary_tr_ts)

            training_df = pn.DataFrame(data=training_set_mat,
                                       columns=["O_train", "S_train"])
            training_df.to_pickle(
                path=training_and_validation_and_test_set_store_folder +
                "/training_set.pkl")

            print training_set_mat.shape

            ################################################################################################################

            validation_set_mat = sample_from_channel(
                channel_matrix=channel_matrix,
                rndmstt=utilities.create_new_rndm_state(),
                samples_per_secret_dictionary=
                reform_secrets_occurr_dictionary_val)

            validation_df = pn.DataFrame(data=validation_set_mat,
                                         columns=["O_val", "S_val"])
            validation_df.to_pickle(
                path=training_and_validation_and_test_set_store_folder +
                "/validation_set.pkl")

            print validation_set_mat.shape

            ################################################################################################################

            for test_iteration in range(TEST_ITERATIONS):
                test_set_mat = sample_from_channel(
                    channel_matrix=channel_matrix,
                    rndmstt=utilities.create_new_rndm_state(),
                    samples_per_secret_dictionary=
                    reform_secrets_occurr_dictionary_tr_ts)

                test_set_store_folder = training_and_validation_and_test_set_store_folder + str(
                    test_set_size) + "_size_test_sets/"
                utilities.createFolder(path=test_set_store_folder)

                test_df = pn.DataFrame(data=test_set_mat,
                                       columns=["O_test", "S_test"])
                test_df.to_pickle(path=test_set_store_folder + "/test_set_" +
                                  str(test_iteration) + ".pkl")
Example #3
0
def main_EXP_G_VULN_PSW_train_single_ANN_remapping():
    read_command_line_options()

    thismodule = sys.modules[__name__]

    EXP_PSW_FOLDER = "/home/comete/mromanel/MILES_EXP/EXP_PSW/"
    utilities.createFolder(EXP_PSW_FOLDER)

    RESULT_FOLDER = EXP_PSW_FOLDER + "RESULT_FOLDER_REMAPPING/"
    utilities.createFolder(RESULT_FOLDER)

    result_folder = RESULT_FOLDER + MODEL_NAME + "/"
    utilities.createFolder(result_folder)

    result_folder = result_folder + str(
        TRAINING_SIZE) + "_training_size_and_" + str(
            VALIDATION_SIZE) + "_validation_size_iteration_" + str(
                TRAINING_ITERATION) + "/"
    utilities.createFolder(result_folder)

    DATA_FOLDER = EXP_PSW_FOLDER + "DATA_FOLDER_AFTER_OUR_PREPROCESSING/"

    ANN_data_folder = DATA_FOLDER + str(TRAINING_SIZE) + "_training_and_" + str(
        VALIDATION_SIZE) + "_validation_store_folder_train_iteration_" + str(
            TRAINING_ITERATION)

    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  load datasets  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    print "\n\n\nDATA ARE LOADED FROM ", ANN_data_folder, "\n\n\n"

    log_file = open(result_folder + "/log_file.txt", "wa")
    log_file.write("\n\n\nDATA ARE LOADED FROM " + ANN_data_folder + "\n\n\n")
    log_file.close()

    training_set = pn.read_pickle(path=ANN_data_folder + "/training_set.pkl")
    O_train = training_set[:, 0]
    Z_train = training_set[:, 1]
    Z_train_enc = to_categorical(y=Z_train, num_classes=NUM_CLASSES)

    val_set = pn.read_pickle(path=ANN_data_folder + "/training_set.pkl")
    O_val = val_set[:, 0]
    Z_val = val_set[:, 1]
    Z_val_enc = to_categorical(y=Z_val, num_classes=NUM_CLASSES)
    """O_train = preprocess.scaler_between_minus_one_and_one(column=O_train, min_column=MIN_OBSERVABLE,
                                                          max_column=MAX_OBSERVABLE)

    O_val = preprocess.scaler_between_minus_one_and_one(column=O_val, min_column=MIN_OBSERVABLE,
                                                        max_column=MAX_OBSERVABLE)"""

    min_max_scaler = preprocessing.MinMaxScaler()

    O_train = O_train.reshape(-1, 1)
    O_train = min_max_scaler.fit_transform(O_train)

    O_val = O_val.reshape(-1, 1)
    O_val = min_max_scaler.transform(O_val)

    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  ANN: instantiate, train, evaluate %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    if len(O_train.shape) == 1:
        input_x_dimension = 1
    else:
        input_x_dimension = O_train.shape[1]

    if thismodule.BATCH_SIZE is None:
        thismodule.BATCH_SIZE = O_train.shape[0]

    secrets_classifier_manager = secrets_classifier.ClassifierNetworkManager(
        number_of_classes=Z_train_enc.shape[1],
        learning_rate=LEARNING_RATE,
        hidden_layers_card=HIDDEN_LAYERS_CARD,
        hidden_neurons_card=HIDDEN_NEAURONS_CARD,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        id_gpu=ID_GPU,
        perc_gpu=PERC_GPU,
        input_x_dimension=input_x_dimension)

    secrets_classifier_manager.train_classifier_net(
        training_set=O_train,
        training_supervision=Z_train_enc,
        validation_set=O_val,
        validation_supervision=Z_val_enc,
        results_folder=result_folder)
Example #4
0
SMALL_SQUARE_SIZE = 5000

# BIG_SQUARE_SIZE = 6000

SMALL_SQUARE_CELL_SIDE_LENGTH = 250

CELLS_PER_SIDE = SMALL_SQUARE_SIZE // SMALL_SQUARE_CELL_SIDE_LENGTH

DATABASE_PATH = "/home/comete/mromanel/MILES_EXP/gowalla/loc-gowalla_totalCheckins.txt"
COLNAMES = [
    "user", "check_in_timestamp", "latitude", "longitude", "location_id"
]
KEEP_COLS = ["latitude", "longitude"]
STORE_FOLDER = "/home/comete/mromanel/MILES_EXP/EXP_GEO_LOCATION_QIF_LIB_SETTING/"
utilities.createFolder(STORE_FOLDER)


def main_EXP_GEO_LOCATION_QIF_LIB_SETTING_retrieve_data_from_DB():
    ########################################################################################################################
    #############################################  SQUARES OF INTEREST  ####################################################
    ########################################################################################################################

    center = position_class.position(lat=CENTER_LAT, lon=CENTER_LON)

    small_square_min_lat, small_square_max_lat, small_square_min_lon, small_square_max_lon = \
        SOI_utilities.create_square_limit(
            central_position=center, side_length=SMALL_SQUARE_SIZE)

    SMALL_SQUARE = {
        "square_min_lat": small_square_min_lat,
Example #5
0
from tqdm import tqdm
from utilities_pckg import utilities
from qif import channel, measure, probab, metric, point, mechanism, lp
import math
from tabulate import tabulate

tqdm.monitor_interval = 0

TRAINING_SET_SIZE = [100, 1000, 10000, 30000, 50000]  # [90000, 270000, 450000]
# TEST_SET_SIZE = [90000]  # [90000, 270000, 450000]
VALIDATION_SET_SIZE = [10, 100, 1000, 3000, 5000]  # [9000, 27000, 45000]
TEST_ITERATIONS = 50
TRAIN_ITERATIONS = 5

BIS_EXP_GEO_LOCATION_FOLDER = "/home/comete/mromanel/MILES_EXP/BIS_EXP_GEO_LOCATION_QIF_LIB_SETTING/"
utilities.createFolder(BIS_EXP_GEO_LOCATION_FOLDER)

WIDTH = 20  # in cells
HEIGHT = 20  # in cells
CELL_SIZE = 250.  # in length units (meters)
EUCLID = euclid = metric.euclidean(point)  # Euclidean distance on qif.point
MAX_GAIN = 4
ALPHA = 0.95

DATA_FOLDER = BIS_EXP_GEO_LOCATION_FOLDER + "DATA_FOLDER/"
utilities.createFolder(DATA_FOLDER)
G_MAT_PATH = BIS_EXP_GEO_LOCATION_FOLDER + "G_OBJ/g_mat.pkl"

#   set solver
lp.defaults.solver = "GLOP"
Example #6
0
def main_BIS_EXP_GEO_LOCATION_create_channel_and_data():
    # grid

    # diagonal of the grid
    diag = euclid(point(0, 0), point(CELL_SIZE * WIDTH, CELL_SIZE * HEIGHT))

    # loss function, just euclidean distance
    loss = euclid_cell

    # some sanity checks
    sanity_checks(considered_cell=132)

    max_vuln = f(CELL_SIZE)  # maximum allowed posterior g-vulnerability
    hard_max_loss = 2 * CELL_SIZE  # loss(x,y) > hard_max_loss => C[x,y] = 0
    n_secrets = n_outputs = n_guesses = WIDTH * HEIGHT
    pi_dic = pn.read_pickle(
        path=
        "/home/comete/mromanel/MILES_EXP/BIS_EXP_GEO_LOCATION_QIF_LIB_SETTING/file_prior_distr.pkl"
    )
    # print("\n\n\npi dictionary ---> pi[cell]:cell_probability")
    print(pi_dic)

    pi_mat = np.zeros((WIDTH, HEIGHT))
    for i_ter in range(WIDTH):
        for j_ter in range(HEIGHT):
            cell_id = WIDTH * i_ter + j_ter
            pi_mat[i_ter, j_ter] = pi_dic[cell_id]
    print(
        "\n\n\n Table for pi where up is south, down is north, left is west, right is east."
    )
    headers = [
        "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12",
        "13", "14", "15", "16", "17", "18", "19"
    ]
    table_pi_mat = tabulate(pi_mat, headers, tablefmt="fancy_grid")
    print(table_pi_mat)

    G = pn.read_pickle(G_MAT_PATH)

    # pi_mat_map = np.flip(pi_mat, 0)
    # print("\n\n\n")
    # print("\n\n\n Table for pi where up is north, down is south, left is west, right is east.")
    # table_pi_mat_map = tabulate(pi_mat_map, headers, tablefmt="fancy_grid")
    # print(table_pi_mat_map)

    pi = pi_mat.flatten()  # probab.uniform(n_secrets)  # uniform prior
    # print("\n\n\npi ---> such that pi[i] = prob_cell[i]")
    print(pi)

    ############################

    list_of_cells_probs = []
    for id_cell_ind in range(len(pi_dic)):
        list_of_cells_probs.append(pi_dic[id_cell_ind])

    # sanity check

    for i in range(len(pi)):
        if pi[i] != list_of_cells_probs[i]:
            sys.exit("ERROR in prior")

    print(euclid_cell(13, 20))
    print(euclid_cell(20, 13))

    # solve
    C = mechanism.g_vuln.min_loss_given_max_vuln(pi, n_outputs, n_guesses,
                                                 max_vuln, gain, loss,
                                                 hard_max_loss)

    # get rho, R, a, b
    (rho, R, a, b) = measure.g_vuln.g_to_bayes(G, pi)
    print("a --->" + str(a))
    print("b --->" + str(b))

    # for any C we have Vg[pi, C] = a * V[rho, RC] + b

    print("    Vg[pi, C]:     ", measure.g_vuln.posterior(G, pi, C))
    print("a * V[rho, RC] + b:",
          a * measure.bayes_vuln.posterior(rho, R.dot(C)) + b)

    print(
        len(
            np.unique(
                create_single_dataset(size=10000, R=R, rho=rho, C=C)[:, 0])))

    # so we can estimate Vg in a black-box matter by generating samples according to rho and RC !

    if len(VALIDATION_SET_SIZE) != len(TRAINING_SET_SIZE):
        sys.exit("ERROR! Different size lists' lengths.")

    for size_list_iterator in range(len(TRAINING_SET_SIZE)):
        training_set_size = TRAINING_SET_SIZE[size_list_iterator]
        validation_set_size = VALIDATION_SET_SIZE[size_list_iterator]
        # test_set_size = TEST_SET_SIZE[size_list_iterator]

        for train_iteration in range(TRAIN_ITERATIONS):
            training_and_validation_and_test_set_store_folder = DATA_FOLDER + str(
                training_set_size) + "_training_and_" + str(
                    validation_set_size
                ) + "_validation_store_folder_train_iteration_" + str(
                    train_iteration) + "/"
            utilities.createFolder(
                path=training_and_validation_and_test_set_store_folder)

            tr = create_single_dataset(size=training_set_size,
                                       R=R,
                                       rho=rho,
                                       C=C)
            val = create_single_dataset(size=validation_set_size,
                                        R=R,
                                        rho=rho,
                                        C=C)

            pn.to_pickle(
                obj=tr,
                path=training_and_validation_and_test_set_store_folder +
                "training_set.pkl",
                protocol=2)
            pn.to_pickle(
                obj=val,
                path=training_and_validation_and_test_set_store_folder +
                "validation_set.pkl",
                protocol=2)

            print("\n\n\nSize " + str(TRAINING_SET_SIZE[size_list_iterator]) +
                  ", train iteration " + str(train_iteration))
Example #7
0
def main_EXP_G_VULN_MULTIPLE_GUESSES_create_data():
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  geometric distribution loading  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    print(
        "\n####################################################################################"
    )
    print(
        "#########################  geometric distribution loading  #########################"
    )
    print(
        "####################################################################################\n"
    )

    utilities.createFolder(DATA_FOLDER)

    channel_matrix_df = pn.read_pickle(path=CHANNEL_PATH)

    #   sanity check
    for i in range(len(channel_matrix_df.columns.values) - 1):
        if channel_matrix_df.index.values[
                i + 1] <= channel_matrix_df.index.values[i]:
            import sys
            sys.exit("BAD CHANNEL FORMAT: cols")
    for i in range(len(channel_matrix_df.index.values) - 1):
        if channel_matrix_df.index.values[
                i + 1] <= channel_matrix_df.index.values[i]:
            import sys
            sys.exit("BAD CHANNEL FORMAT: rows")

    channel_matrix = np.transpose(channel_matrix_df.values)
    print(channel_matrix.shape)

    print("Vg(pi, C)", measure.g_vuln.posterior(gain, pi, C=channel_matrix))

    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  create training sets  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    #   X are the observables and y are the secrets (respectively col 0 and 1), stratify wrt to secret
    #   split training and test data
    # mt = create_single_dataset(size=50000, C=channel_matrix)
    # print(mt)
    # print(len(np.unique(mt[:, 0])))

    if len(VALIDATION_SET_SIZE) != len(TRAINING_SET_SIZE):
        err_hndl(str_="array_sizes_not_matching", add=inspect.stack()[0][3])

    for size_list_iterator in range(len(TRAINING_SET_SIZE)):
        training_set_size = TRAINING_SET_SIZE[size_list_iterator]
        validation_set_size = VALIDATION_SET_SIZE[size_list_iterator]

        for train_iteration in range(TRAIN_ITERATIONS):
            training_set_mat = create_single_dataset(size=training_set_size,
                                                     C=channel_matrix)

            training_and_validation_and_test_set_store_folder = DATA_FOLDER + str(
                training_set_size) + "_training_and_" + str(
                    validation_set_size
                ) + "_validation_store_folder_train_iteration_" + str(
                    train_iteration) + "/"

            utilities.createFolder(
                path=training_and_validation_and_test_set_store_folder)

            training_df = pn.DataFrame(data=training_set_mat,
                                       columns=["O_train", "S_train"])
            pn.to_pickle(
                obj=training_df.values,
                path=training_and_validation_and_test_set_store_folder +
                "/training_set.pkl",
                protocol=2)

            ################################################################################################################

            validation_set_mat = create_single_dataset(
                size=validation_set_size, C=channel_matrix)

            validation_df = pn.DataFrame(data=validation_set_mat,
                                         columns=["O_val", "S_val"])
            pn.to_pickle(
                obj=validation_df.values,
                path=training_and_validation_and_test_set_store_folder +
                "/validation_set.pkl",
                protocol=2)

            # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
            # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  create test sets  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
            # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    if CREATE_TEST_SET:
        list_unq = []
        print(
            "\n####################################################################################"
        )
        print(
            "#################################  create test sets  ################################"
        )
        print(
            "####################################################################################\n"
        )
        test_set_size = TEST_SET_SIZE[0]
        for test_iteration in range(TEST_ITERATIONS):
            test_set_mat = create_single_dataset(size=test_set_size,
                                                 C=channel_matrix)
            list_unq.append(len(np.unique(test_set_mat[:, 0])))

            test_set_store_folder = DATA_FOLDER + str(
                test_set_size) + "_size_test_sets/"
            utilities.createFolder(path=test_set_store_folder)

            test_df = pn.DataFrame(data=test_set_mat,
                                   columns=["O_test", "S_test"])
            pn.to_pickle(obj=test_df.values,
                         path=test_set_store_folder + "/test_set_" +
                         str(test_iteration) + ".pkl",
                         protocol=2)

        print(list_unq)
Example #8
0
TRAINING_SET_SIZE = [100, 1000, 10000, 30000, 50000]
TEST_SET_SIZE = [50000]
VALIDATION_SET_SIZE = [10, 100, 1000, 3000, 5000]
TEST_ITERATIONS = 50
TRAIN_ITERATIONS = 5

WIDTH = 20  # in cells
HEIGHT = 20  # in cells
CELL_SIZE = 250.  # in length units (meters)
EUCLID = euclid = metric.euclidean(point)  # Euclidean distance on qif.point
MAX_GAIN = 4
ALPHA = 0.95

DATA_FOLDER = "/home/comete/mromanel/MILES_EXP/EXP_GEO_LOCATION_QIF_LIB_SETTING/DATA_FOLDER/"
utilities.createFolder(path=DATA_FOLDER)

CHANNEL_PATH = "/home/comete/mromanel/MILES_EXP/EXP_GEO_LOCATION_QIF_LIB_SETTING/channel.pkl"

G_OBJ_PATH = "/home/comete/mromanel/MILES_EXP/EXP_GEO_LOCATION_QIF_LIB_SETTING/G_OBJ/"
utilities.createFolder(path=G_OBJ_PATH)
G_MAT_PATH = G_OBJ_PATH + "g_mat.pkl"
G_MAT_ROWS_PATH = G_OBJ_PATH + "g_mat_rows.pkl"
G_MAT_COLS_PATH = G_OBJ_PATH + "g_mat_cols.pkl"

#   set solver
lp.defaults.solver = "GLOP"


# euclidean distance on cell ids
def euclid_cell(a, b):
Example #9
0
def main_BIS_EXP_G_VULN_MULTIPLE_GUESSES_create_channel_and_data():
    #   pi distribution
    n = 10
    pi = probab.uniform(n)
    print(pi.shape)

    #   g matrix
    G = pn.read_pickle(path=G_MAT_FILE)
    print(G.shape)

    #   channel matrix
    C = pn.read_pickle(path=CHANNEL_FILE).values
    C = np.transpose(C)
    print(C.shape)

    # get rho, R, a, b
    (rho, R, a, b) = measure.g_vuln.g_to_bayes(G, pi)
    print("a --->" + str(a))
    print("b --->" + str(b))

    # for any C we have Vg[pi, C] = a * V[rho, RC] + b

    print("    Vg[pi, C]:     ", measure.g_vuln.posterior(G, pi, C))
    print("a * V[rho, RC] + b:",
          a * measure.bayes_vuln.posterior(rho, R.dot(C)) + b)

    print(
        len(
            np.unique(
                create_single_dataset(size=10000, R=R, rho=rho, C=C)[:, 0])))

    # so we can estimate Vg in a black-box matter by generating samples according to rho and RC !

    if len(VALIDATION_SET_SIZE) != len(TRAINING_SET_SIZE):
        sys.exit("ERROR! Different size lists' lengths.")

    for size_list_iterator in range(len(TRAINING_SET_SIZE)):
        training_set_size = TRAINING_SET_SIZE[size_list_iterator]
        validation_set_size = VALIDATION_SET_SIZE[size_list_iterator]
        # test_set_size = TEST_SET_SIZE[size_list_iterator]

        for train_iteration in range(TRAIN_ITERATIONS):
            training_and_validation_and_test_set_store_folder = DATA_FOLDER + str(
                training_set_size) + "_training_and_" + str(
                    validation_set_size
                ) + "_validation_store_folder_train_iteration_" + str(
                    train_iteration) + "/"
            utilities.createFolder(
                path=training_and_validation_and_test_set_store_folder)

            tr = create_single_dataset(size=training_set_size,
                                       R=R,
                                       rho=rho,
                                       C=C)
            val = create_single_dataset(size=validation_set_size,
                                        R=R,
                                        rho=rho,
                                        C=C)

            pn.to_pickle(
                obj=tr,
                path=training_and_validation_and_test_set_store_folder +
                "training_set.pkl",
                protocol=2)
            pn.to_pickle(
                obj=val,
                path=training_and_validation_and_test_set_store_folder +
                "validation_set.pkl",
                protocol=2)

            print("\n\n\nSize " + str(TRAINING_SET_SIZE[size_list_iterator]) +
                  ", train iteration " + str(train_iteration))
Example #10
0
def main_EXP_G_VULN_GEO_LOCATION_train_single_ANN_remapping():
    read_command_line_options()

    thismodule = sys.modules[__name__]

    EXP_G_VULN_GEO_LOCATION_FOLDER = "/home/comete/mromanel/MILES_EXP/EXP_GEO_LOCATION_QIF_LIB_SETTING/"
    utilities.createFolder(EXP_G_VULN_GEO_LOCATION_FOLDER)

    RESULT_FOLDER = EXP_G_VULN_GEO_LOCATION_FOLDER + "RESULT_FOLDER_REMAPPING/"
    utilities.createFolder(RESULT_FOLDER)

    result_folder = RESULT_FOLDER + MODEL_NAME + "/"
    utilities.createFolder(result_folder)

    result_folder = result_folder + str(
        TRAINING_SIZE) + "_training_size_and_" + str(
            VALIDATION_SIZE) + "_validation_size_iteration_" + str(
                TRAINING_ITERATION) + "/"
    utilities.createFolder(result_folder)

    DATA_FOLDER = EXP_G_VULN_GEO_LOCATION_FOLDER + "DATA_FOLDER_AFTER_OUR_PREPROCESSING/"

    ANN_data_folder = DATA_FOLDER + str(TRAINING_SIZE) + "_training_and_" + str(
        VALIDATION_SIZE) + "_validation_store_folder_train_iteration_" + str(
            TRAINING_ITERATION)

    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  load datasets  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    print "\n\n\nDATA ARE LOADED FROM ", ANN_data_folder, "\n\n\n"

    log_file = open(result_folder + "/log_file.txt", "wa")
    log_file.write("\n\n\nDATA ARE LOADED FROM " + ANN_data_folder + "\n\n\n")
    log_file.close()

    training_set = pn.read_pickle(path=ANN_data_folder + "/training_set.pkl")
    O_train = training_set.values[:, 0]
    S_train = training_set.values[:, 1]
    Z_train = training_set.values[:, 2]
    # Z_train_enc = preprocess.array_one_hot_encoder(supervision_=Z_train)
    """Z_train_list = []

    O_train_unq = np.unique(O_train)

    for otu in O_train_unq:
        idx = np.where(O_train == otu)[0]
        tmp = np.mean(Z_train_enc[idx, :], axis=0)
        idx_max = np.argmax(tmp)
        for r in range(len(tmp)):
            if r != idx_max:
                tmp[r] = 0.
            else:
                tmp[r] = 1.
        Z_train_list.append(tmp)

    Z_train_final_list = []

    for ot in O_train:
        idx = np.where(O_train_unq == ot)[0]
        for el in idx:
            Z_train_final_list.append(Z_train_list[el])

    # Z_train = np.array(Z_train_final_list).reshape((Z_train_enc.shape[0], Z_train_enc.shape[1]))
    Z_train = np.array(Z_train_list).reshape((len(O_train_unq), Z_train_enc.shape[1]))"""

    val_set = pn.read_pickle(path=ANN_data_folder + "/validation_set.pkl")
    O_val = val_set.values[:, 0]
    S_val = val_set.values[:, 1]
    Z_val = val_set.values[:, 2]
    # Z_val_enc = preprocess.array_one_hot_encoder(supervision_=Z_val)
    """Z_val_list = []

    O_val_unq = np.unique(O_val)

    for otu in O_val_unq:
        idx = np.where(O_val == otu)[0]
        tmp = np.mean(Z_val_enc[idx, :], axis=0)
        idx_max = np.argmax(tmp)
        for r in range(len(tmp)):
            if r != idx_max:
                tmp[r] = 0.
            else:
                tmp[r] = 1.
        Z_val_list.append(tmp)

    Z_val_final_list = []

    for ot in O_val:
        idx = np.where(O_val_unq == ot)[0]
        for el in idx:
            Z_val_final_list.append(Z_val_list[el])

    # Z_val = np.array(Z_val_final_list).reshape((Z_val_enc.shape[0], Z_val_enc.shape[1]))
    Z_val = np.array(Z_val_list).reshape((len(O_val_unq), Z_val_enc.shape[1]))"""
    """O_train = preprocess.scaler_between_minus_one_and_one(column=O_train, min_column=MIN_OBSERVABLE,
                                                          max_column=MAX_OBSERVABLE)

    O_val = preprocess.scaler_between_minus_one_and_one(column=O_val, min_column=MIN_OBSERVABLE,
                                                        max_column=MAX_OBSERVABLE)"""

    min_max_scaler = preprocessing.MinMaxScaler()

    O_train = O_train.reshape(-1, 1)
    O_train = min_max_scaler.fit_transform(O_train)

    O_val = O_val.reshape(-1, 1)
    O_val = min_max_scaler.transform(O_val)

    Z_train_enc = one_hot_enc(y=Z_train, num_classes=N_CLASSES)

    Z_val_enc = one_hot_enc(y=Z_val, num_classes=N_CLASSES)

    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  ANN: instantiate, train, evaluate %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    if len(O_train.shape) == 1:
        input_x_dimension = 1
    else:
        input_x_dimension = O_train.shape[1]

    if thismodule.BATCH_SIZE == None:
        thismodule.BATCH_SIZE = O_train.shape[0]

    secrets_classifier_manager = secrets_classifier.ClassifierNetworkManager(
        number_of_classes=Z_train_enc.shape[1],
        learning_rate=LEARNING_RATE,
        hidden_layers_card=HIDDEN_LAYERS_CARD,
        hidden_neurons_card=HIDDEN_NEAURONS_CARD,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        id_gpu=ID_GPU,
        perc_gpu=PERC_GPU,
        input_x_dimension=input_x_dimension)

    secrets_classifier_manager.train_classifier_net(
        training_set=O_train,
        training_supervision=Z_train_enc,
        validation_set=O_val,
        validation_supervision=Z_val_enc,
        results_folder=result_folder)
Example #11
0
import numpy as np
import pandas as pn
from tqdm import tqdm
from utilities_pckg import utilities
from qif import channel, measure, probab

tqdm.monitor_interval = 0

TRAINING_SET_SIZE = [10000, 30000, 50000]  # [90000, 270000, 450000]
TEST_SET_SIZE = [90000]  # [90000, 270000, 450000]
VALIDATION_SET_SIZE = [1000, 3000, 5000]  # [9000, 27000, 45000]
TEST_ITERATIONS = 50
TRAIN_ITERATIONS = 5

BIS_EXP_G_VULN_MULTIPLE_GUESSES_FOLDER = "/home/comete/mromanel/MILES_EXP/BIS_EXP_G_VULN_MULTIPLE_GUESSES/"
utilities.createFolder(BIS_EXP_G_VULN_MULTIPLE_GUESSES_FOLDER)

CHANNEL_FILE = BIS_EXP_G_VULN_MULTIPLE_GUESSES_FOLDER + "channel_df_norm.pkl"
G_MAT_FILE = BIS_EXP_G_VULN_MULTIPLE_GUESSES_FOLDER + "G_MAT_FOLDER/g_matrix_10_secrets_2_guesses.pkl"

DATA_FOLDER = BIS_EXP_G_VULN_MULTIPLE_GUESSES_FOLDER + "DATA_FOLDER/"
utilities.createFolder(DATA_FOLDER)


##### draw from rho/RC, black box
def execute_C(
    x, C
):  # we only have black box access to C. This function runs C under secret x and returns an output y
    # C_x = np.array(C[x, :])
    return probab.draw(C[x, :])
Example #12
0
def main_BIS_EXP_G_VULN_DP_train_single_ANN_remapping():
    read_command_line_options()

    thismodule = sys.modules[__name__]

    BIS_EXP_G_VULN_DP_FOLDER = "/home/comete/mromanel/MILES_EXP/BIS_EXP_G_VULN_DP_FOLDER/"
    utilities.createFolder(BIS_EXP_G_VULN_DP_FOLDER)

    RESULT_FOLDER = BIS_EXP_G_VULN_DP_FOLDER + "RESULT_FOLDER_REMAPPING/"
    utilities.createFolder(RESULT_FOLDER)

    result_folder = RESULT_FOLDER + MODEL_NAME + "/"
    utilities.createFolder(result_folder)

    result_folder = result_folder + str(
        TRAINING_SIZE) + "_training_size_and_" + str(
            VALIDATION_SIZE) + "_validation_size_iteration_" + str(
                TRAINING_ITERATION) + "/"
    utilities.createFolder(result_folder)

    DATA_FOLDER = BIS_EXP_G_VULN_DP_FOLDER + "DATA_FOLDER/"

    ANN_data_folder = DATA_FOLDER + str(TRAINING_SIZE) + "_training_and_" + str(
        VALIDATION_SIZE) + "_validation_store_folder_train_iteration_" + str(
            TRAINING_ITERATION)

    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  load datasets  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    print "\n\n\nDATA ARE LOADED FROM ", ANN_data_folder, "\n\n\n"

    log_file = open(result_folder + "/log_file.txt", "wa")
    log_file.write("\n\n\nDATA ARE LOADED FROM " + ANN_data_folder + "\n\n\n")
    log_file.close()

    training_set = pn.read_pickle(path=ANN_data_folder + "/training_set.pkl")
    O_train = training_set[:, 0:training_set.shape[1] - 2]
    print O_train.shape
    S_train = training_set[:, -2]
    Z_train = training_set[:, -1]
    Z_train_enc = to_categorical(y=Z_train, num_classes=NUM_CLASSES)

    val_set = pn.read_pickle(path=ANN_data_folder + "/validation_set.pkl")
    O_val = val_set[:, 0:val_set.shape[1] - 2]
    S_val = val_set[:, -2]
    Z_val = val_set[:, -1]
    Z_val_enc = to_categorical(y=Z_val, num_classes=NUM_CLASSES)

    min_ = np.min(O_train)
    # print min_
    max_ = np.max(O_train)
    # print max_

    O_train = preprocess.scaler_zero_one_all_cols(data=O_train,
                                                  min_=min_,
                                                  max_=max_)

    O_val = preprocess.scaler_zero_one_all_cols(data=O_val,
                                                min_=min_,
                                                max_=max_)

    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  ANN: instantiate, train, evaluate %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    if len(O_train.shape) == 1:
        input_x_dimension = 1
    else:
        input_x_dimension = O_train.shape[1]

    if thismodule.BATCH_SIZE is None:
        thismodule.BATCH_SIZE = O_train.shape[0]

    secrets_classifier_manager = secrets_classifier.ClassifierNetworkManager(
        number_of_classes=Z_train_enc.shape[1],
        learning_rate=LEARNING_RATE,
        hidden_layers_card=HIDDEN_LAYERS_CARD,
        hidden_neurons_card=HIDDEN_NEAURONS_CARD,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        id_gpu=ID_GPU,
        perc_gpu=PERC_GPU,
        input_x_dimension=input_x_dimension)

    secrets_classifier_manager.train_classifier_net(
        training_set=O_train,
        training_supervision=Z_train_enc,
        validation_set=O_val,
        validation_supervision=Z_val_enc,
        results_folder=result_folder)
Example #13
0
def main_EXP_GEO_LOCATION_QIF_LIB_SETTING_create_channel_and_g_mat_and_data():
    # grid

    # diagonal of the grid
    diag = euclid(point(0, 0), point(CELL_SIZE * WIDTH, CELL_SIZE * HEIGHT))

    # loss function, just euclidean distance
    loss = euclid_cell

    # some sanity checks
    sanity_checks(considered_cell=132)

    max_vuln = f(CELL_SIZE)  # maximum allowed posterior g-vulnerability
    hard_max_loss = 2 * CELL_SIZE  # loss(x,y) > hard_max_loss => C[x,y] = 0
    n_secrets = n_outputs = n_guesses = WIDTH * HEIGHT
    pi_dic = pn.read_pickle(
        path=
        "/home/comete/mromanel/MILES_EXP/EXP_GEO_LOCATION_QIF_LIB_SETTING/file_prior_distr.pkl"
    )
    # print("\n\n\npi dictionary ---> pi[cell]:cell_probability")
    print(pi_dic)

    pi_mat = np.zeros((WIDTH, HEIGHT))
    for i_ter in range(WIDTH):
        for j_ter in range(HEIGHT):
            cell_id = WIDTH * i_ter + j_ter
            pi_mat[i_ter, j_ter] = pi_dic[cell_id]
    print(
        "\n\n\n Table for pi where up is south, down is north, left is west, right is east."
    )
    headers = [
        "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12",
        "13", "14", "15", "16", "17", "18", "19"
    ]
    table_pi_mat = tabulate(pi_mat, headers, tablefmt="fancy_grid")
    print(table_pi_mat)

    # pi_mat_map = np.flip(pi_mat, 0)
    # print("\n\n\n")
    # print("\n\n\n Table for pi where up is north, down is south, left is west, right is east.")
    # table_pi_mat_map = tabulate(pi_mat_map, headers, tablefmt="fancy_grid")
    # print(table_pi_mat_map)

    pi = pi_mat.flatten()  # probab.uniform(n_secrets)  # uniform prior
    # print("\n\n\npi ---> such that pi[i] = prob_cell[i]")
    print(pi)

    ############################

    list_of_cells_probs = []
    for id_cell_ind in range(len(pi_dic)):
        list_of_cells_probs.append(pi_dic[id_cell_ind])

    # sanity check

    for i in range(len(pi)):
        if pi[i] != list_of_cells_probs[i]:
            sys.exit("ERROR in prior")

    ############################

    gmat1 = create_gain_matrix()
    gmat2 = create_gain_matrix2()

    for i in range(gmat1.shape[0]):
        for j in range(gmat1.shape[1]):
            if gmat1[i, j] != gmat2[i, j]:
                sys.exit("BAZINGAAAAAA")

    print(euclid_cell(13, 20))
    print(euclid_cell(20, 13))

    # solve
    C = mechanism.g_vuln.min_loss_given_max_vuln(pi, n_outputs, n_guesses,
                                                 max_vuln, gain, loss,
                                                 hard_max_loss)
    # print("\n\nC:\n", C)
    # print("\n\nmax_vuln:", max_vuln)
    # print("\n\nVg(pi, C)", measure.g_vuln.posterior(gain, pi, C))
    # print("\n\nUtility C:", utility.expected_distance(loss, pi, C))
    # print("-----------------\n")
    # """
    # # Inverse problem
    # max_loss = 300
    # C = mechanism.g_vuln.min_vuln_given_max_loss(pi, n_outputs, n_guesses, max_loss, gain, loss, hard_max_loss)
    # print("C:\n", C)
    # print("max_vuln:", max_vuln)
    # print("Vg(pi, C)", measure.g_vuln.posterior(gain, pi, C))
    # print("Utility:", utility.expected_distance(loss, pi, C))
    # """
    #
    # C_copy = copy.deepcopy(C)
    # for i_ter in tqdm(range(C.shape[0])):
    #     prob_observables_given_secret = C_copy[i_ter, :]
    #     # print(np.sum(prob_observables_given_secret))
    #     prob_observables_given_secret_norm = tuple(
    #         p / sum(prob_observables_given_secret) for p in prob_observables_given_secret)
    #     C_copy[i_ter, :] = prob_observables_given_secret_norm
    #
    # print("\n\nVg(pi, C_copy)", measure.g_vuln.posterior(gain, pi, C_copy))
    # print("\n\nUtility C_copy:", utility.expected_distance(loss, pi, C_copy))
    # print("-----------------\n")
    #
    # for i in range(C_copy.shape[0]):
    #     print(sum(C_copy[i, :]))
    #
    # for r in range(10):
    #     print("\n\n\n###########################################\n\n\n")
    #
    # C_copy_transposed = np.transpose(C_copy)
    # for j_ter in range(C_copy_transposed.shape[1]):
    #     sum_ = sum(C_copy_transposed[:, j_ter])
    #     print(sum_)
    # pn.to_pickle(obj=C_copy_transposed, path=CHANNEL_PATH, protocol=2)
    #
    # g_mat = create_gain_matrix()
    # g_mat_rows = np.arange(start=0, stop=HEIGHT ** 2, step=1)
    # g_mat_cols = np.arange(start=0, stop=WIDTH ** 2, step=1)
    # pn.to_pickle(obj=g_mat, path=G_MAT_PATH, protocol=2)
    # pn.to_pickle(obj=g_mat_rows, path=G_MAT_ROWS_PATH, protocol=2)
    # pn.to_pickle(obj=g_mat_cols, path=G_MAT_COLS_PATH, protocol=2)

    if len(VALIDATION_SET_SIZE) != len(TRAINING_SET_SIZE):
        err_hndl(str_="array_sizes_not_matching", add=inspect.stack()[0][3])

    for size_list_iterator in range(len(TRAINING_SET_SIZE)):
        training_set_size = TRAINING_SET_SIZE[size_list_iterator]
        validation_set_size = VALIDATION_SET_SIZE[size_list_iterator]

        for train_iteration in range(TRAIN_ITERATIONS):
            training_set_mat = create_single_dataset(size=training_set_size,
                                                     C=C,
                                                     pi=pi)

            training_and_validation_and_test_set_store_folder = DATA_FOLDER + str(
                training_set_size) + "_training_and_" + str(
                    validation_set_size
                ) + "_validation_store_folder_train_iteration_" + str(
                    train_iteration) + "/"

            utilities.createFolder(
                path=training_and_validation_and_test_set_store_folder)

            training_df = pn.DataFrame(data=training_set_mat,
                                       columns=["O_train", "S_train"])
            pn.to_pickle(
                obj=training_df.values,
                path=training_and_validation_and_test_set_store_folder +
                "/training_set.pkl",
                protocol=2)

            ################################################################################################################

            validation_set_mat = create_single_dataset(
                size=validation_set_size, C=C, pi=pi)

            validation_df = pn.DataFrame(data=validation_set_mat,
                                         columns=["O_val", "S_val"])
            pn.to_pickle(
                obj=validation_df.values,
                path=training_and_validation_and_test_set_store_folder +
                "/validation_set.pkl",
                protocol=2)

            # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
            # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  create test sets  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
            # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    if CREATE_TEST_SET:
        list_unq = []
        print(
            "\n####################################################################################"
        )
        print(
            "#################################  create test sets  ################################"
        )
        print(
            "####################################################################################\n"
        )
        test_set_size = TEST_SET_SIZE[0]
        for test_iteration in range(TEST_ITERATIONS):
            test_set_mat = create_single_dataset(size=test_set_size,
                                                 C=C,
                                                 pi=pi)
            list_unq.append(len(np.unique(test_set_mat[:, 0])))

            test_set_store_folder = DATA_FOLDER + str(
                test_set_size) + "_size_test_sets/"
            utilities.createFolder(path=test_set_store_folder)

            test_df = pn.DataFrame(data=test_set_mat,
                                   columns=["O_test", "S_test"])
            pn.to_pickle(obj=test_df.values,
                         path=test_set_store_folder + "/test_set_" +
                         str(test_iteration) + ".pkl",
                         protocol=2)

        print(list_unq)
Example #14
0
def main_EMPIRICAL_ESTIMATES_create_data():
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  geometric distribution loading  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    print("\n####################################################################################")
    print("#########################  geometric distribution loading  #########################")
    print("####################################################################################\n")

    utilities.createFolder(DATA_FOLDER)

    channel_matrix_df = pn.read_pickle(path=CHANNEL_PATH)

    channel_matrix = channel_matrix_df.values
    print channel_matrix.shape

    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  create training sets  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    #   X are the observables and y are the secrets (respectively col 0 and 1), stratify wrt to secret
    #   split training and test data

    if len(TEST_SET_SIZE) != len(TRAINING_SET_SIZE) or len(VALIDATION_SET_SIZE) != len(TRAINING_SET_SIZE):
        err_hndl(str_="array_sizes_not_matching", add=inspect.stack()[0][3])

    for size_list_iterator in range(len(TRAINING_SET_SIZE)):
        training_set_size = TRAINING_SET_SIZE[size_list_iterator]
        validation_set_size = VALIDATION_SET_SIZE[size_list_iterator]
        test_set_size = TEST_SET_SIZE[size_list_iterator]

        for train_iteration in range(TRAIN_ITERATIONS):
            training_set_mat = linear_geometric_mechanism.sample_from_distribution(
                channel_matrix_df_path=CHANNEL_PATH,
                rndmstt=utilities.create_new_rndm_state(),
                samples_per_secret=int(
                    training_set_size / len(
                        channel_matrix_df.columns.values)))

            training_and_validation_and_test_set_store_folder = DATA_FOLDER + str(
                training_set_size) + "_training_and_" + str(validation_set_size) + "_validation_and_" + str(
                test_set_size) + "_test_store_folder_train_iteration_" + str(train_iteration) + "/"

            utilities.createFolder(path=training_and_validation_and_test_set_store_folder)

            training_df = pn.DataFrame(data=training_set_mat, columns=["O_train", "S_train"])
            training_df.to_pickle(path=training_and_validation_and_test_set_store_folder + "/training_set.pkl")

            ################################################################################################################

            validation_set_mat = linear_geometric_mechanism.sample_from_distribution(
                channel_matrix_df_path=CHANNEL_PATH,
                rndmstt=utilities.create_new_rndm_state(),
                samples_per_secret=int(
                    validation_set_size / len(
                        channel_matrix_df.columns.values)))

            validation_df = pn.DataFrame(data=validation_set_mat, columns=["O_val", "S_val"])
            validation_df.to_pickle(path=training_and_validation_and_test_set_store_folder + "/validation_set.pkl")

            # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
            # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  create test sets  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
            # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

            if CREATE_TEST_SET:
                print("\n####################################################################################")
                print("#################################  create test sets  ################################")
                print("####################################################################################\n")
                for test_iteration in range(TEST_ITERATIONS):
                    test_set_mat = linear_geometric_mechanism.sample_from_distribution(
                        channel_matrix_df_path=CHANNEL_PATH,
                        rndmstt=utilities.create_new_rndm_state(),
                        samples_per_secret=int(
                            test_set_size / len(
                                channel_matrix_df.columns.values)))

                    test_set_store_folder = training_and_validation_and_test_set_store_folder + str(
                        test_set_size) + "_size_test_sets/"
                    utilities.createFolder(path=test_set_store_folder)

                    test_df = pn.DataFrame(data=test_set_mat, columns=["O_test", "S_test"])
                    test_df.to_pickle(
                        path=test_set_store_folder + "/test_set_" + str(test_iteration) + ".pkl")
Example #15
0
import time
import numpy as np
import pandas as pn
from tqdm import tqdm
from scipy import stats
from utilities_pckg import utilities
from qif import channel, measure, mechanism, probab

TRAINING_SET_SIZE = [10000, 30000, 50000]
TEST_SET_SIZE = [50000]
VALIDATION_SET_SIZE = [1000, 3000, 5000]
TEST_ITERATIONS = 50
TRAIN_ITERATIONS = 5

BIS_EXP_G_VULN_DP_FOLDER = "/home/comete/mromanel/MILES_EXP/BIS_EXP_G_VULN_DP_FOLDER/"
utilities.createFolder(BIS_EXP_G_VULN_DP_FOLDER)
DATA_FOLDER = BIS_EXP_G_VULN_DP_FOLDER + "DATA_FOLDER/"
utilities.createFolder(DATA_FOLDER)

DATA_FOLDER_TEST = DATA_FOLDER + str(TEST_SET_SIZE[0]) + "_size_test_set/"
utilities.createFolder(DATA_FOLDER_TEST)

G_OBJ = BIS_EXP_G_VULN_DP_FOLDER + "G_OBJ/"
utilities.createFolder(G_OBJ)

#   real counts, replace with those from the real db
#   order: 0, 1, 2, 3, 4 ---> 164  55  36  35  13
real_counts = np.array([164, 55, 36, 35, 13])  # true counts

# real_counts = np.array([40, 55, 36, 35, 13])  # fake counts for safety check
Example #16
0
from geometric_mechanisms import linear_geometric_mechanism
from utilities_pckg import g_vuln_computation, utilities
from utilities_pckg.runtime_error_handler import runtime_error_handler as err_hndl
import pandas as pn
import inspect

EXP_G_VULN_MULTIPLE_GUESSES_FOLDER_PATH = "/home/comete/mromanel/MILES_EXP/EXP_G_VULN_MULTIPLE_GUESSES/"
CHANNEL_PATH = EXP_G_VULN_MULTIPLE_GUESSES_FOLDER_PATH + "channel_df_norm.pkl"

DATA_FOLDER = EXP_G_VULN_MULTIPLE_GUESSES_FOLDER_PATH + "DATA_FOLDER/"
utilities.createFolder(DATA_FOLDER)

TRAINING_SET_SIZE = [10000, 30000, 50000]
TEST_SET_SIZE = [10000, 30000, 50000]
VALIDATION_SET_SIZE = [1000, 3000, 5000]
TEST_ITERATIONS = 100
TRAIN_ITERATIONS = 10

CREATE_TEST_SET = True


def main_EXP_G_VULN_MULTIPLE_GUESSES_create_data():
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  geometric distribution loading  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    print(
        "\n####################################################################################"
    )
    print(
        "#########################  geometric distribution loading  #########################"
Example #17
0
from utilities_pckg import g_function_manager, utilities
import pandas as pn

EXP_G_VULN_MULTIPLE_GUESSES_FOLDER_PATH = "/home/comete/mromanel/MILES_EXP/EXP_G_VULN_MULTIPLE_GUESSES/"
CHANNEL_PATH = EXP_G_VULN_MULTIPLE_GUESSES_FOLDER_PATH + "channel_df_norm.pkl"
G_MAT_FOLDER = EXP_G_VULN_MULTIPLE_GUESSES_FOLDER_PATH + "G_MAT_FOLDER/"
utilities.createFolder(G_MAT_FOLDER)
N_GUESSES = 2


def main_EXP_G_VULN_MULTIPLE_GUESSES_create_g_matrix():
    channel_colnames = pn.read_pickle(path=CHANNEL_PATH).columns.values
    g_function_manager.create_g_function_matrix_n_guesses(
        list_unique_secrets=channel_colnames,
        n_guesses=N_GUESSES,
        save_g_path=G_MAT_FOLDER)