def run():
    # Following are the code for stage 1: Datasets creation and reduction
    # Please note that stage 1 must be done before stage 2
    localtime = time.asctime(time.localtime(time.time()))
    print("Local current time :", localtime)
    print("Stage 1: creating the five artificial dataset and reading the MNIST natural dataset, then generate datasets with reduced dimentionality, using LLE and PCA\n")

    # Note running 5000 samples may take awfully long time, while 1000 samples takes only around 30 mins.
    print("Now generating the five artificial datasets and reading the MNIST dataset")
    swiss_roll_dataset, swiss_roll_labels = dg.get_swiss_roll_dataset_with_labels2(5000)
    helix_dataset, helix_labels = dg.get_helix_dataset_with_label2(5000)
    twin_peaks_dataset, twin_peak_labels = dg.get_twin_peaks_with_label2(5000)
    broken_swiss_dataset, broken_swiss_labels = dg.get_broken_swiss_roll_dataset_with_label2(5000)
    hd_dataset, hd_labels = dg.get_hd_dataset_with_label2(5000)
    MNIST_images, MNIST_labels = evaluation.get_natural_dataset_samples(5000)
    original_datasets = {"swiss_roll": swiss_roll_dataset, "helix": helix_dataset, "twin_peaks": twin_peaks_dataset,
                         "broken_swiss_roll": broken_swiss_dataset, "hd": hd_dataset, "MNIST": MNIST_images}
    pk.dump(original_datasets, open('original_datasets.p', 'wb'))
    print("Finished! \n")

    print("Now getting labels for all datasets")
    datasets_labels = {"swiss_roll": swiss_roll_labels, "helix": helix_labels, "twin_peaks": twin_peak_labels,
                         "broken_swiss_roll": broken_swiss_labels, "hd": hd_labels, "MNIST": MNIST_labels}
    pk.dump(datasets_labels, open('datasets_labels.p', 'wb'))
    print("Finished! \n")

    # The following code reduces dimensionality using PCA and LLE
    print("Now using PCA to reduce dimensionality of each dataset")
    pca_reduced_swiss_roll = evaluation.pca_dim_reduction(swiss_roll_dataset, 2)
    pca_reduced_helix = evaluation.pca_dim_reduction(helix_dataset, 1)
    pca_reduced_twin_peaks = evaluation.pca_dim_reduction(twin_peaks_dataset, 2)
    pca_reduced_broken_swiss = evaluation.pca_dim_reduction(broken_swiss_dataset, 2)
    pca_reduced_hd = evaluation.pca_dim_reduction(hd_dataset, 2)
    pca_reduced_MNIST_images = evaluation.pca_dim_reduction(MNIST_images, 20)
    pca_reduced_datasets = {"swiss_roll": pca_reduced_swiss_roll, "helix": pca_reduced_helix, "twin_peaks": pca_reduced_twin_peaks,
                         "broken_swiss_roll": pca_reduced_broken_swiss, "hd": pca_reduced_hd, "MNIST": pca_reduced_MNIST_images}
    # pca_reduced_datasets = [pca_reduced_swiss_roll, pca_reduced_helix, pca_reduced_twin_peaks, pca_reduced_broken_swiss,
    #                         pca_reduced_hd, pca_reduced_MNIST_images]
    pk.dump(pca_reduced_datasets, open('pca_reduced_datasets.p', 'wb'))
    print("Finished! \n")

    lle_reduced_datasets_under_diff_k = []  # this list contains results under different k parameter, where idx i is the result for k = i + 5
    print("Now using LLE to reduce dimensionality of each dataset. Note that the parameter k ranges from 5 to 15 so this step is gonna take a while")
    for k in range(5, 16):
        lle_reduced_swiss_roll = lle.locally_linear_embedding(np.array(swiss_roll_dataset, np.float64), k, 2)[0].tolist()
        lle_reduced_helix = lle.locally_linear_embedding(np.array(helix_dataset, np.float64), k, 1)[0].tolist()
        lle_reduced_twin_peaks = lle.locally_linear_embedding(np.array(twin_peaks_dataset, np.float64), k, 2)[0].tolist()
        lle_reduced_broken_swiss = lle.locally_linear_embedding(np.array(broken_swiss_dataset, np.float64), k, 2)[0].tolist()
        lle_reduced_hd = lle.locally_linear_embedding(np.array(hd_dataset, np.float64), k, 5)[0].tolist()
        lle_reduced_MNIST_images = lle.locally_linear_embedding(np.array(MNIST_images, np.float64), k, 20)[0].tolist()
        curr_k_results = {"swiss_roll": lle_reduced_swiss_roll, "helix": lle_reduced_helix,
                                "twin_peaks": lle_reduced_twin_peaks,
                                "broken_swiss_roll": lle_reduced_broken_swiss, "hd": lle_reduced_hd,
                                "MNIST": lle_reduced_MNIST_images}
        lle_reduced_datasets_under_diff_k.append(curr_k_results)
    pk.dump(lle_reduced_datasets_under_diff_k, open('lle_reduced_datasets_under_diff_k.p', 'wb'))
    print("Finished! \n")
    localtime = time.asctime(time.localtime(time.time()))
    print("Local current time :", localtime)
def generate_original_datasets():
    print("Now generating the five artificial datasets and reading the MNIST dataset")
    swiss_roll_dataset, swiss_roll_labels = dg.get_swiss_roll_dataset_with_labels2(5000)
    helix_dataset, helix_labels = dg.get_helix_dataset_with_label2(5000)
    twin_peaks_dataset, twin_peak_labels = dg.get_twin_peaks_with_label2(5000)
    broken_swiss_dataset, broken_swiss_labels = dg.get_broken_swiss_roll_dataset_with_label2(5000)
    hd_dataset, hd_labels = dg.get_hd_dataset_with_label2(5000)
    MNIST_images, MNIST_labels = evaluation.get_natural_dataset_samples(5000)
    original_datasets = {"swiss_roll": swiss_roll_dataset, "helix": helix_dataset, "twin_peaks": twin_peaks_dataset,
                         "broken_swiss_roll": broken_swiss_dataset, "hd": hd_dataset, "MNIST": MNIST_images}
    pk.dump(original_datasets, open('original_datasets.p', 'wb'))
    print("Finished! \n")
def handle_demand():
    print('1 Load Demand')
    print('2 Generate Demand')
    selection = input('Please Select: ')
    
    if selection == '1':
        return read_csv_dataset(input('Filename of demand dataset: '))
    elif selection == '2':
        dg.generate_demand()
        print('Demand generated')
        return read_csv_dataset('demand_data.csv')
    else:
        raise  # Throw error to main()
Beispiel #4
0
def main():
    # X = np.arange(9).reshape(9,1).astype(np.float64)
    X = np.array(dg.get_swiss_roll_dataset(5000), np.float64)
    print("The input data:")
    print(X) # for test only
    Y, error = locally_linear_embedding(X, 5, 2)
    print("The output data:")
    print(Y) # for test only
    print(error) # for test only
def handle_grades():
    print('1 Load Grades')
    print('2 Generate Grades')
    selection = input('Please Select: ')
    grades_df_list = []

    if selection == '1':
        amt_loading = input('How many grade datasets would you like to load? ')
        return amt_loading,[read_csv_dataset\
                            (input('Filename of course grades dataset: '))\
                            for file in range(int(amt_loading))]

    elif selection == '2':
        return dg.generate_grades(input('How many grade datasets would you like to generate? '))
    else:
        raise  # Throw error to main()
Beispiel #6
0
import Dataset_Generator as dg
import Evaluation as eval
import Plot_Graph as ploter

hd_dataset = dg.get_hd_dataset(5000)
reduced_hd = eval.pca_dim_reduction(hd_dataset, 3)
ploter.plot3D(reduced_hd)

broken_swiss_roll_dataset = dg.get_broken_swiss_roll_dataset(5000)
ploter.plot3D(broken_swiss_roll_dataset)
reduced_broken_swiss = eval.pca_dim_reduction(broken_swiss_roll_dataset, 2)
ploter.plot2D(reduced_broken_swiss)

broken_helix_dataset = dg.get_helix_dataset(5000)
ploter.plot3D(broken_helix_dataset)
reduced_helix = eval.pca_dim_reduction(broken_helix_dataset, 2)
ploter.plot2D(reduced_helix)

swiss_roll_dataset = dg.get_swiss_roll_dataset(5000)
ploter.plot3D(swiss_roll_dataset)
reduced_swiss = eval.pca_dim_reduction(swiss_roll_dataset, 2)
ploter.plot2D(reduced_swiss)

twin_peaks_dataset = dg.get_twin_peaks(5000)
ploter.plot3D(twin_peaks_dataset)
reduced_twin_peaks = eval.pca_dim_reduction(twin_peaks_dataset, 2)
ploter.plot2D(reduced_twin_peaks)

# ***********************************scripts to evaluate Trust

# Swiss roll
assert NUMBER_OF_SAMPLES % MAX_GAME_SIZE == 0,\
    "The number of samples must be evenly divisible by max game size."

# Generate the datasets
for i in range(len(player_number)):
    for j in range(int(NUMBER_OF_SAMPLES / MAX_GAME_SIZE)):

        # Print a message
        print('+++Starting to generate a dataset of games with size ' + str(PURE_STRATEGIES_PER_PLAYER[i]) + ' - number: ' + str(j + 1))

        # Set the subfolder to save the data in
        save_subfolder = str(DATASET_NUMBER) + "/" + str(PURE_STRATEGIES_PER_PLAYER[i]).rstrip(")]").lstrip("[(").replace(",", "x").replace(" ", "") + "/"

        # Run the dataset generator
        Dataset_Generator.multi_process_generator(games_dataset_name=games_dataset_name[i] + "_" + str(j),
                                                  equilibria_dataset_name=equilibria_dataset_name[i] + "_" + str(j),
                                                  number_of_samples=MAX_GAME_SIZE,
                                                  max_equilibria_per_game=MAXIMUM_EQUILIBRIA_PER_GAME,
                                                  player_number=player_number[i],
                                                  strategies_per_player=PURE_STRATEGIES_PER_PLAYER[i],
                                                  discard_non_mixed_strategy_games=DISCARD_NON_MIXED_STRATEGY_GAMES,
                                                  filter_pure_strategies=FILTER_PURE_STRATEGIES,
                                                  discard_single_equilibrium_games=DISCARD_SINGLE_EQUILIBRIUM_GAMES,
                                                  use_gambit=USE_GAMBIT,
                                                  cpu_cores=CPU_CORES,
                                                  timeout_per_sample=TIMEOUT_PER_SAMPLE,
                                                  game_type="Random",
                                                  save_subfolder=save_subfolder)

    print('***Finished generating ' + str(NUMBER_OF_SAMPLES) + ' samples of size ' + str(PURE_STRATEGIES_PER_PLAYER[i]) + ' - number: ' + str(j + 1) + '\n')