def train_all(self):

        # Only execute if we have data
        if len(self.dataset_combobox['values']) == 0:
            messagebox.showerror("Error:", "No data are there to be used.")
            return

        # Get some params from the GUI
        data_dir = self.data_folder_textfield.get()

        kset_val = np.arange(self.k_slider.get())
        l_val = self.l_slider.get()
        algorithm_val = self.algorithm_combobox.get()

        for dataset_name in self.dataset_combobox['values']:
            print('## Running dataset', dataset_name, ' ##')

            train_data = dataset.parse(data_dir + "/" + dataset_name +
                                       ".train.csv")
            test_data = dataset.parse(data_dir + "/" + dataset_name +
                                      ".test.csv")

            output_path = data_dir + "/" + dataset_name + ".result.csv"

            start_time = time.time()

            # Execute per dataset
            if algorithm_val == "sklearn":
                k_best, f_rate, self.result_data = self.classify_function(
                    train_data,
                    test_data,
                    output_path,
                    kset=kset_val,
                    l=l_val,
                    algorithm=algorithm_val)
            else:
                k_best, f_rate, self.result_data, dd = self.classify_function(
                    train_data,
                    test_data,
                    output_path,
                    kset=kset_val,
                    l=l_val,
                    algorithm=algorithm_val)

            end_time = time.time() - start_time

            # Other data will be print to the console via the print(...) statements in the algorithm impl

            print('Elapsed time:', end_time)  # Print the elapsed time

            print("## -------- ##\n")

        # Inform the user that the plot was finished
        messagebox.showinfo(
            "Information:",
            "The classifications were done and the results were saved at the canonical locations."
        )
Esempio n. 2
0
import model as gradtype_model
import utils as gradtype_utils

TOTAL_EPOCHS = 2000000

RESHUFFLE_EPOCHS = 50

# Save weights every `SAVE_EPOCHS` epochs
SAVE_EPOCHS = 1000

#
# Prepare dataset
#

print('Loading dataset')
datasets = dataset.parse()
full_sequence = dataset.gen_full_sequence(datasets)
sampling_table = dataset.gen_sampling_table(full_sequence)

#
# Load model
#

encoder = gradtype_model.create_encoder()
start_epoch = gradtype_utils.load_weights(encoder, 'gradtype-skipgrams-')

adam = Adam(lr=0.001)

encoder.compile(adam, loss='binary_crossentropy', metrics=['accuracy'])

#
 def _build_review_stat(self):
     token_list = dataset.parse(TEST_DATA)
     review_stat = ReviewStat()
     for token in token_list:
         review_stat.add(token)
     return review_stat
    def train(self):

        # Only execute if there are datasets
        if len(self.dataset_combobox['values']) == 0:
            messagebox.showerror("Error:", "No dataset was selected.")
            return

        # Extract some information from the GUI and compute the dataset file names (assuming they follow the canonical name scheme)
        dataset_name = self.dataset_combobox.get()
        data_dir = self.data_folder_textfield.get()
        algo_val = self.algorithm_combobox.get()
        display_grid = self.grid_checkbox_var.get()

        # Grid is only supported for brute_sort and 2D datasets
        if display_grid and algo_val != "brute_sort":
            messagebox.showerror("Error:",
                                 "The grid is only supported for brutesort!")
            return

        if display_grid and not dataset_name.endswith("2d"):
            messagebox.showerror(
                "Error:", "The grid is only supported for 2D datasets.")
            return

        self.train_data = dataset.parse(data_dir + "/" + dataset_name +
                                        ".train.csv")
        self.test_data = dataset.parse(data_dir + "/" + dataset_name +
                                       ".test.csv")

        # The result data will be stored here
        output_path = data_dir + "/" + dataset_name + ".result.csv"

        # Plot the training and test data with matplotlib and embedd them into the window
        self.train_data_plot = FigureCanvasTkAgg(visual.display_2d_dataset(
            self.train_data, "Training data:", micro=True),
                                                 master=self.frame)

        # Display training data of grid is disabled
        if not display_grid:
            self.test_data_plot = FigureCanvasTkAgg(visual.display_2d_dataset(
                self.test_data, "Test data:", micro=True),
                                                    master=self.frame)

        # Position the components
        self.train_data_plot._tkcanvas.grid(column=0, row=15, padx=10, pady=4)

        start_time = time.time()

        # Actually run the algorithm with the parameters from the GUI
        if algo_val != "sklearn":  # Take grid into consideration
            k_best, f_rate, self.result_data, dd = self.classify_function(
                self.train_data,
                self.test_data,
                output_path,
                kset=np.arange(1,
                               self.k_slider.get() + 1),
                l=self.l_slider.get(),
                algorithm=algo_val)
            end_time = time.time(
            ) - start_time  # The time the algorithm did take - don't measure the grid time

            if algo_val == "brute_sort" and display_grid:  # If displayed, plot it into the test data plot
                grid = self.grid_function(dd, k_best,
                                          100)  # Hardcoded grid-size of 100

                self.test_data_plot = FigureCanvasTkAgg(
                    visual.display_2d_dataset(grid, "Grid:", micro=True),
                    master=self.frame)
                self.test_data = grid  # Set test data to grid

        else:  # Else plot as normal
            k_best, f_rate, self.result_data = self.classify_function(
                self.train_data,
                self.test_data,
                output_path,
                kset=np.arange(1,
                               self.k_slider.get() + 1),
                l=self.l_slider.get(),
                algorithm=algo_val)
            end_time = time.time(
            ) - start_time  # The time the algorithm did take

        self.test_data_plot._tkcanvas.grid(column=1, row=15, padx=10, pady=4)

        print("Elapsed time:", end_time, "s")

        # Plot the result data
        self.result_data_plot = FigureCanvasTkAgg(visual.display_2d_dataset(
            self.result_data, "Result data:", micro=True),
                                                  master=self.frame)

        self.result_data_plot._tkcanvas.grid(column=2, row=15, padx=10, pady=4)

        # Plot some stats about the current run
        self.data_label = Message(
            self.frame,
            anchor="w",
            text="Time: {:.4f}s \nFailure rate: {:.4f}\n k*: {}".format(
                end_time, f_rate, k_best),
            width=125)

        self.data_label.grid(column=3, row=15, padx=10, pady=4, sticky="NW")

        # Inform the user that the plot was finished
        messagebox.showinfo(
            "Information:",
            "The classification was done and the results were saved at " +
            output_path + ".")