def run(): # Following are the code for stage 1: Datasets creation and reduction # Please note that stage 1 must be done before stage 2 localtime = time.asctime(time.localtime(time.time())) print("Local current time :", localtime) print("Stage 1: creating the five artificial dataset and reading the MNIST natural dataset, then generate datasets with reduced dimentionality, using LLE and PCA\n") # Note running 5000 samples may take awfully long time, while 1000 samples takes only around 30 mins. print("Now generating the five artificial datasets and reading the MNIST dataset") swiss_roll_dataset, swiss_roll_labels = dg.get_swiss_roll_dataset_with_labels2(5000) helix_dataset, helix_labels = dg.get_helix_dataset_with_label2(5000) twin_peaks_dataset, twin_peak_labels = dg.get_twin_peaks_with_label2(5000) broken_swiss_dataset, broken_swiss_labels = dg.get_broken_swiss_roll_dataset_with_label2(5000) hd_dataset, hd_labels = dg.get_hd_dataset_with_label2(5000) MNIST_images, MNIST_labels = evaluation.get_natural_dataset_samples(5000) original_datasets = {"swiss_roll": swiss_roll_dataset, "helix": helix_dataset, "twin_peaks": twin_peaks_dataset, "broken_swiss_roll": broken_swiss_dataset, "hd": hd_dataset, "MNIST": MNIST_images} pk.dump(original_datasets, open('original_datasets.p', 'wb')) print("Finished! \n") print("Now getting labels for all datasets") datasets_labels = {"swiss_roll": swiss_roll_labels, "helix": helix_labels, "twin_peaks": twin_peak_labels, "broken_swiss_roll": broken_swiss_labels, "hd": hd_labels, "MNIST": MNIST_labels} pk.dump(datasets_labels, open('datasets_labels.p', 'wb')) print("Finished! \n") # The following code reduces dimensionality using PCA and LLE print("Now using PCA to reduce dimensionality of each dataset") pca_reduced_swiss_roll = evaluation.pca_dim_reduction(swiss_roll_dataset, 2) pca_reduced_helix = evaluation.pca_dim_reduction(helix_dataset, 1) pca_reduced_twin_peaks = evaluation.pca_dim_reduction(twin_peaks_dataset, 2) pca_reduced_broken_swiss = evaluation.pca_dim_reduction(broken_swiss_dataset, 2) pca_reduced_hd = evaluation.pca_dim_reduction(hd_dataset, 2) pca_reduced_MNIST_images = evaluation.pca_dim_reduction(MNIST_images, 20) pca_reduced_datasets = {"swiss_roll": pca_reduced_swiss_roll, "helix": pca_reduced_helix, "twin_peaks": pca_reduced_twin_peaks, "broken_swiss_roll": pca_reduced_broken_swiss, "hd": pca_reduced_hd, "MNIST": pca_reduced_MNIST_images} # pca_reduced_datasets = [pca_reduced_swiss_roll, pca_reduced_helix, pca_reduced_twin_peaks, pca_reduced_broken_swiss, # pca_reduced_hd, pca_reduced_MNIST_images] pk.dump(pca_reduced_datasets, open('pca_reduced_datasets.p', 'wb')) print("Finished! \n") lle_reduced_datasets_under_diff_k = [] # this list contains results under different k parameter, where idx i is the result for k = i + 5 print("Now using LLE to reduce dimensionality of each dataset. Note that the parameter k ranges from 5 to 15 so this step is gonna take a while") for k in range(5, 16): lle_reduced_swiss_roll = lle.locally_linear_embedding(np.array(swiss_roll_dataset, np.float64), k, 2)[0].tolist() lle_reduced_helix = lle.locally_linear_embedding(np.array(helix_dataset, np.float64), k, 1)[0].tolist() lle_reduced_twin_peaks = lle.locally_linear_embedding(np.array(twin_peaks_dataset, np.float64), k, 2)[0].tolist() lle_reduced_broken_swiss = lle.locally_linear_embedding(np.array(broken_swiss_dataset, np.float64), k, 2)[0].tolist() lle_reduced_hd = lle.locally_linear_embedding(np.array(hd_dataset, np.float64), k, 5)[0].tolist() lle_reduced_MNIST_images = lle.locally_linear_embedding(np.array(MNIST_images, np.float64), k, 20)[0].tolist() curr_k_results = {"swiss_roll": lle_reduced_swiss_roll, "helix": lle_reduced_helix, "twin_peaks": lle_reduced_twin_peaks, "broken_swiss_roll": lle_reduced_broken_swiss, "hd": lle_reduced_hd, "MNIST": lle_reduced_MNIST_images} lle_reduced_datasets_under_diff_k.append(curr_k_results) pk.dump(lle_reduced_datasets_under_diff_k, open('lle_reduced_datasets_under_diff_k.p', 'wb')) print("Finished! \n") localtime = time.asctime(time.localtime(time.time())) print("Local current time :", localtime)
def generate_original_datasets(): print("Now generating the five artificial datasets and reading the MNIST dataset") swiss_roll_dataset, swiss_roll_labels = dg.get_swiss_roll_dataset_with_labels2(5000) helix_dataset, helix_labels = dg.get_helix_dataset_with_label2(5000) twin_peaks_dataset, twin_peak_labels = dg.get_twin_peaks_with_label2(5000) broken_swiss_dataset, broken_swiss_labels = dg.get_broken_swiss_roll_dataset_with_label2(5000) hd_dataset, hd_labels = dg.get_hd_dataset_with_label2(5000) MNIST_images, MNIST_labels = evaluation.get_natural_dataset_samples(5000) original_datasets = {"swiss_roll": swiss_roll_dataset, "helix": helix_dataset, "twin_peaks": twin_peaks_dataset, "broken_swiss_roll": broken_swiss_dataset, "hd": hd_dataset, "MNIST": MNIST_images} pk.dump(original_datasets, open('original_datasets.p', 'wb')) print("Finished! \n")
def handle_demand(): print('1 Load Demand') print('2 Generate Demand') selection = input('Please Select: ') if selection == '1': return read_csv_dataset(input('Filename of demand dataset: ')) elif selection == '2': dg.generate_demand() print('Demand generated') return read_csv_dataset('demand_data.csv') else: raise # Throw error to main()
def main(): # X = np.arange(9).reshape(9,1).astype(np.float64) X = np.array(dg.get_swiss_roll_dataset(5000), np.float64) print("The input data:") print(X) # for test only Y, error = locally_linear_embedding(X, 5, 2) print("The output data:") print(Y) # for test only print(error) # for test only
def handle_grades(): print('1 Load Grades') print('2 Generate Grades') selection = input('Please Select: ') grades_df_list = [] if selection == '1': amt_loading = input('How many grade datasets would you like to load? ') return amt_loading,[read_csv_dataset\ (input('Filename of course grades dataset: '))\ for file in range(int(amt_loading))] elif selection == '2': return dg.generate_grades(input('How many grade datasets would you like to generate? ')) else: raise # Throw error to main()
import Dataset_Generator as dg import Evaluation as eval import Plot_Graph as ploter hd_dataset = dg.get_hd_dataset(5000) reduced_hd = eval.pca_dim_reduction(hd_dataset, 3) ploter.plot3D(reduced_hd) broken_swiss_roll_dataset = dg.get_broken_swiss_roll_dataset(5000) ploter.plot3D(broken_swiss_roll_dataset) reduced_broken_swiss = eval.pca_dim_reduction(broken_swiss_roll_dataset, 2) ploter.plot2D(reduced_broken_swiss) broken_helix_dataset = dg.get_helix_dataset(5000) ploter.plot3D(broken_helix_dataset) reduced_helix = eval.pca_dim_reduction(broken_helix_dataset, 2) ploter.plot2D(reduced_helix) swiss_roll_dataset = dg.get_swiss_roll_dataset(5000) ploter.plot3D(swiss_roll_dataset) reduced_swiss = eval.pca_dim_reduction(swiss_roll_dataset, 2) ploter.plot2D(reduced_swiss) twin_peaks_dataset = dg.get_twin_peaks(5000) ploter.plot3D(twin_peaks_dataset) reduced_twin_peaks = eval.pca_dim_reduction(twin_peaks_dataset, 2) ploter.plot2D(reduced_twin_peaks) # ***********************************scripts to evaluate Trust # Swiss roll
assert NUMBER_OF_SAMPLES % MAX_GAME_SIZE == 0,\ "The number of samples must be evenly divisible by max game size." # Generate the datasets for i in range(len(player_number)): for j in range(int(NUMBER_OF_SAMPLES / MAX_GAME_SIZE)): # Print a message print('+++Starting to generate a dataset of games with size ' + str(PURE_STRATEGIES_PER_PLAYER[i]) + ' - number: ' + str(j + 1)) # Set the subfolder to save the data in save_subfolder = str(DATASET_NUMBER) + "/" + str(PURE_STRATEGIES_PER_PLAYER[i]).rstrip(")]").lstrip("[(").replace(",", "x").replace(" ", "") + "/" # Run the dataset generator Dataset_Generator.multi_process_generator(games_dataset_name=games_dataset_name[i] + "_" + str(j), equilibria_dataset_name=equilibria_dataset_name[i] + "_" + str(j), number_of_samples=MAX_GAME_SIZE, max_equilibria_per_game=MAXIMUM_EQUILIBRIA_PER_GAME, player_number=player_number[i], strategies_per_player=PURE_STRATEGIES_PER_PLAYER[i], discard_non_mixed_strategy_games=DISCARD_NON_MIXED_STRATEGY_GAMES, filter_pure_strategies=FILTER_PURE_STRATEGIES, discard_single_equilibrium_games=DISCARD_SINGLE_EQUILIBRIUM_GAMES, use_gambit=USE_GAMBIT, cpu_cores=CPU_CORES, timeout_per_sample=TIMEOUT_PER_SAMPLE, game_type="Random", save_subfolder=save_subfolder) print('***Finished generating ' + str(NUMBER_OF_SAMPLES) + ' samples of size ' + str(PURE_STRATEGIES_PER_PLAYER[i]) + ' - number: ' + str(j + 1) + '\n')