def train_all(self): # Only execute if we have data if len(self.dataset_combobox['values']) == 0: messagebox.showerror("Error:", "No data are there to be used.") return # Get some params from the GUI data_dir = self.data_folder_textfield.get() kset_val = np.arange(self.k_slider.get()) l_val = self.l_slider.get() algorithm_val = self.algorithm_combobox.get() for dataset_name in self.dataset_combobox['values']: print('## Running dataset', dataset_name, ' ##') train_data = dataset.parse(data_dir + "/" + dataset_name + ".train.csv") test_data = dataset.parse(data_dir + "/" + dataset_name + ".test.csv") output_path = data_dir + "/" + dataset_name + ".result.csv" start_time = time.time() # Execute per dataset if algorithm_val == "sklearn": k_best, f_rate, self.result_data = self.classify_function( train_data, test_data, output_path, kset=kset_val, l=l_val, algorithm=algorithm_val) else: k_best, f_rate, self.result_data, dd = self.classify_function( train_data, test_data, output_path, kset=kset_val, l=l_val, algorithm=algorithm_val) end_time = time.time() - start_time # Other data will be print to the console via the print(...) statements in the algorithm impl print('Elapsed time:', end_time) # Print the elapsed time print("## -------- ##\n") # Inform the user that the plot was finished messagebox.showinfo( "Information:", "The classifications were done and the results were saved at the canonical locations." )
import model as gradtype_model import utils as gradtype_utils TOTAL_EPOCHS = 2000000 RESHUFFLE_EPOCHS = 50 # Save weights every `SAVE_EPOCHS` epochs SAVE_EPOCHS = 1000 # # Prepare dataset # print('Loading dataset') datasets = dataset.parse() full_sequence = dataset.gen_full_sequence(datasets) sampling_table = dataset.gen_sampling_table(full_sequence) # # Load model # encoder = gradtype_model.create_encoder() start_epoch = gradtype_utils.load_weights(encoder, 'gradtype-skipgrams-') adam = Adam(lr=0.001) encoder.compile(adam, loss='binary_crossentropy', metrics=['accuracy']) #
def _build_review_stat(self): token_list = dataset.parse(TEST_DATA) review_stat = ReviewStat() for token in token_list: review_stat.add(token) return review_stat
def train(self): # Only execute if there are datasets if len(self.dataset_combobox['values']) == 0: messagebox.showerror("Error:", "No dataset was selected.") return # Extract some information from the GUI and compute the dataset file names (assuming they follow the canonical name scheme) dataset_name = self.dataset_combobox.get() data_dir = self.data_folder_textfield.get() algo_val = self.algorithm_combobox.get() display_grid = self.grid_checkbox_var.get() # Grid is only supported for brute_sort and 2D datasets if display_grid and algo_val != "brute_sort": messagebox.showerror("Error:", "The grid is only supported for brutesort!") return if display_grid and not dataset_name.endswith("2d"): messagebox.showerror( "Error:", "The grid is only supported for 2D datasets.") return self.train_data = dataset.parse(data_dir + "/" + dataset_name + ".train.csv") self.test_data = dataset.parse(data_dir + "/" + dataset_name + ".test.csv") # The result data will be stored here output_path = data_dir + "/" + dataset_name + ".result.csv" # Plot the training and test data with matplotlib and embedd them into the window self.train_data_plot = FigureCanvasTkAgg(visual.display_2d_dataset( self.train_data, "Training data:", micro=True), master=self.frame) # Display training data of grid is disabled if not display_grid: self.test_data_plot = FigureCanvasTkAgg(visual.display_2d_dataset( self.test_data, "Test data:", micro=True), master=self.frame) # Position the components self.train_data_plot._tkcanvas.grid(column=0, row=15, padx=10, pady=4) start_time = time.time() # Actually run the algorithm with the parameters from the GUI if algo_val != "sklearn": # Take grid into consideration k_best, f_rate, self.result_data, dd = self.classify_function( self.train_data, self.test_data, output_path, kset=np.arange(1, self.k_slider.get() + 1), l=self.l_slider.get(), algorithm=algo_val) end_time = time.time( ) - start_time # The time the algorithm did take - don't measure the grid time if algo_val == "brute_sort" and display_grid: # If displayed, plot it into the test data plot grid = self.grid_function(dd, k_best, 100) # Hardcoded grid-size of 100 self.test_data_plot = FigureCanvasTkAgg( visual.display_2d_dataset(grid, "Grid:", micro=True), master=self.frame) self.test_data = grid # Set test data to grid else: # Else plot as normal k_best, f_rate, self.result_data = self.classify_function( self.train_data, self.test_data, output_path, kset=np.arange(1, self.k_slider.get() + 1), l=self.l_slider.get(), algorithm=algo_val) end_time = time.time( ) - start_time # The time the algorithm did take self.test_data_plot._tkcanvas.grid(column=1, row=15, padx=10, pady=4) print("Elapsed time:", end_time, "s") # Plot the result data self.result_data_plot = FigureCanvasTkAgg(visual.display_2d_dataset( self.result_data, "Result data:", micro=True), master=self.frame) self.result_data_plot._tkcanvas.grid(column=2, row=15, padx=10, pady=4) # Plot some stats about the current run self.data_label = Message( self.frame, anchor="w", text="Time: {:.4f}s \nFailure rate: {:.4f}\n k*: {}".format( end_time, f_rate, k_best), width=125) self.data_label.grid(column=3, row=15, padx=10, pady=4, sticky="NW") # Inform the user that the plot was finished messagebox.showinfo( "Information:", "The classification was done and the results were saved at " + output_path + ".")