Exemple #1
0
    def fit(self, x, y, trainer_args=None):
        """Trains the model on the dataset given.

        Args:
            x: A numpy.ndarray instance containing the training data or the training data combined with the
               validation data.
            y: A numpy.ndarray instance containing the label of the training data. or the label of the training data
               combined with the validation label.
            trainer_args: A dictionary containing the parameters of the ModelTrainer constructor.
        """
        validate_xy(x, y)
        self.y_encoder.fit(y)
        y = self.y_encoder.transform(y)
        # Divide training data into training and testing data.
        validation_set_size = int(len(y) * Constant.VALIDATION_SET_SIZE)
        validation_set_size = min(validation_set_size, 500)
        validation_set_size = max(validation_set_size, 1)
        x_train, x_test, y_train, y_test = train_test_split(x, y,
                                                            test_size=validation_set_size,
                                                            random_state=42)

        #initialize data_transformer
        self.data_transformer = self.data_transformer_class(x_train)
        # Wrap the data into DataLoaders
        train_loader = self.data_transformer.transform_train(x_train, y_train)
        test_loader = self.data_transformer.transform_test(x_test, y_test)

        self.generator = self._init_generator(self.y_encoder.n_classes, x_train.shape[1:])
        graph = self.generator.generate()

        if trainer_args is None:
            trainer_args = {'max_no_improvement_num': 30}
        _, _1, self.graph = train(None, graph, train_loader, test_loader,
                                  trainer_args, self.metric, self.loss,
                                  self.verbose, self.path)
    def fit(self, x, y, x_test=None, y_test=None, time_limit=None):
        x = np.array(x)
        y = np.array(y).flatten()
        validate_xy(x, y)
        y = self.transform_y(y)
        if x_test is None or y_test is None:
            # Divide training data into training and testing data.
            validation_set_size = int(len(y) * Constant.VALIDATION_SET_SIZE)
            validation_set_size = min(validation_set_size, 500)
            validation_set_size = max(validation_set_size, 1)
            x_train, x_test, y_train, y_test = train_test_split(
                x, y, test_size=validation_set_size, random_state=42)
        else:
            x_train = x
            y_train = y
        # Transform x_train
        if self.data_transformer is None:
            self.data_transformer = ImageDataTransformer(x,
                                                         augment=self.augment)

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data,
                     test_data, time_limit)
Exemple #3
0
    def fit(self, x, y, trainer_args=None):
        """Trains the model on the dataset given.

        Args:
            x: A numpy.ndarray instance containing the training data or the training data combined with the
               validation data.
            y: A numpy.ndarray instance containing the label of the training data. or the label of the training data
               combined with the validation label.
            trainer_args: A dictionary containing the parameters of the ModelTrainer constructor.
        """
        validate_xy(x, y)
        self.y_encoder.fit(y)
        y = self.y_encoder.transform(y)
        # Divide training data into training and testing data.
        validation_set_size = int(len(y) * Constant.VALIDATION_SET_SIZE)
        validation_set_size = min(validation_set_size, 500)
        validation_set_size = max(validation_set_size, 1)
        x_train, x_test, y_train, y_test = train_test_split(
            x, y, test_size=validation_set_size, random_state=42)

        # initialize data_transformer
        self.data_transformer = self.data_transformer_class(x_train)
        # Wrap the data into DataLoaders
        train_loader = self.data_transformer.transform_train(x_train, y_train)
        test_loader = self.data_transformer.transform_test(x_test, y_test)

        self.generator = self._init_generator(self.y_encoder.n_classes,
                                              x_train.shape[1:])
        graph = self.generator.generate()

        if trainer_args is None:
            trainer_args = {'max_no_improvement_num': 30}
        _, _1, self.graph = train(None, graph, train_loader, test_loader,
                                  trainer_args, self.metric, self.loss,
                                  self.verbose, self.path)
Exemple #4
0
    def fit(self,
            x,
            y,
            x_test=None,
            y_test=None,
            batch_size=None,
            time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through `x_train`, `y_train`.

        Args:
            x: A numpy.ndarray instance containing the training data.
            y: A numpy.ndarray instance containing the label of the training data.
            y_test: A numpy.ndarray instance containing the testing data.
            x_test: A numpy.ndarray instance containing the label of the testing data.
            batch_size: int, define the batch size.
            time_limit: The time limit for the search in seconds.
        """
        x = text_preprocess(x, path=self.path)

        x = np.array(x)
        y = np.array(y)
        validate_xy(x, y)
        y = self.transform_y(y)

        if batch_size is None:
            batch_size = Constant.MAX_BATCH_SIZE
        # Divide training data into training and testing data.
        if x_test is None or y_test is None:
            x_train, x_test, y_train, y_test = train_test_split(
                x,
                y,
                test_size=min(Constant.VALIDATION_SET_SIZE, int(len(y) * 0.2)),
                random_state=42)
        else:
            x_train = x
            y_train = y

        # Wrap the data into DataLoaders
        if self.data_transformer is None:
            self.data_transformer = TextDataTransformer()

        train_data = self.data_transformer.transform_train(
            x_train, y_train, batch_size=batch_size)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle.dump(self, open(os.path.join(self.path, 'text_classifier'),
                               'wb'))
        pickle_to_file(self, os.path.join(self.path, 'text_classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data,
                     test_data, time_limit)
Exemple #5
0
    def fit(self, x, y, x_test=None, y_test=None, time_limit=None):
        x = np.array(x)

        if len(x.shape) != 0 and len(x[0].shape) == 3:
            if self.verbose:
                print("Preprocessing the images.")
            self.resize_height, self.resize_width = compute_image_resize_params(
                x)
            x = resize_image_data(x, self.resize_height, self.resize_width)
            if x_test is not None:
                x_test = resize_image_data(x_test, self.resize_height,
                                           self.resize_width)
            if self.verbose:
                print("Preprocessing finished.")

        y = np.array(y).flatten()
        validate_xy(x, y)
        y = self.transform_y(y)
        if x_test is None or y_test is None:
            # Divide training data into training and testing data.
            validation_set_size = int(len(y) * Constant.VALIDATION_SET_SIZE)
            validation_set_size = min(validation_set_size, 500)
            validation_set_size = max(validation_set_size, 1)
            x_train, x_test, y_train, y_test = train_test_split(
                x, y, test_size=validation_set_size, random_state=42)
        else:
            x_train = x
            y_train = y
        # Transform x_train
        if self.data_transformer is None:
            self.data_transformer = ImageDataTransformer(x,
                                                         augment=self.augment)

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data,
                     test_data, time_limit)
Exemple #6
0
    def fit(self, x, y, time_limit=None):
        """Find the best neural architecture for classifying the training data and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset must be in numpy.ndarray format.
        The training and validation data should be passed through `x`, `y`. This method will automatically split
        the training and validation data into training and validation sets.

        Args:
            x: A numpy.ndarray instance containing the training data or the training data combined with the
               validation data.
            y: A numpy.ndarray instance containing the labels of the training data. or the label of the training data
               combined with the validation label.
            time_limit: The time limit for the search in seconds. (optional, default = None, which turns into 24 hours in method)
            
        Effects:
            Trains a model that fits the data using the best neural architecture
        """
        validate_xy(x, y)
        y, flags = y[:, 0], y[:, 1]
        y = self.transform_y(y)
        # Divide training data into training and validation data.
        validation_set_size = int(len(y) * Constant.VALIDATION_SET_SIZE)
        validation_set_size = min(validation_set_size, 500)
        validation_set_size = max(validation_set_size, 1)
        import numpy as np
        y = np.concatenate((y, flags.reshape(-1, 1)), axis=1)
        x_train, x_valid, y_train, y_valid = train_test_split(
            x, y, test_size=validation_set_size, random_state=42)
        self.init_transformer(x)
        # Transform x_train

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        valid_data = self.data_transformer.transform_test(x_valid, y_valid)

        # Save the classifier
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data,
                     valid_data, time_limit)
Exemple #7
0
    def fit(self, x, y, time_limit=None):
        """Find the best neural architecture for classifying the training data and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset must be in numpy.ndarray format.
        The training and validation data should be passed through `x`, `y`. This method will automatically split
        the training and validation data into training and validation sets.

        Args:
            x: A numpy.ndarray instance containing the training data or the training data combined with the
               validation data.
            y: A numpy.ndarray instance containing the labels of the training data. or the label of the training data
               combined with the validation label.
            time_limit: The time limit for the search in seconds. (optional, default = None, which turns into 24 hours in method)
            
        Effects:
            Trains a model that fits the data using the best neural architecture
        """
        validate_xy(x, y)
        y = self.transform_y(y)
        # Divide training data into training and validation data.
        validation_set_size = int(len(y) * Constant.VALIDATION_SET_SIZE)
        validation_set_size = min(validation_set_size, 500)
        validation_set_size = max(validation_set_size, 1)
        x_train, x_valid, y_train, y_valid = train_test_split(x, y,
                                                            test_size=validation_set_size,
                                                            random_state=42)
        # DEVELOPERS - WHY DOES THIS TRANSFORMER OCCUR AFTER SPLITTING THE DATA?
        self.init_transformer(x)
        # Transform x_train

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        valid_data = self.data_transformer.transform_test(x_valid, y_valid)

        # Save the classifier
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        # DEVELOPERS - WHY NOT PUT THIS BIT IN THE FUNCTION DEFINITION IF time_limit IS BEING ASSIGNED A DEFAULT VALUE OF None?
        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data, valid_data, time_limit)
    def fit(self, x_train, y_train, time_limit=None):
        """Trains the model on the dataset given.

        Args:
            x_train: A numpy.ndarray instance containing the training data,
                or the training data combined with the validation data.
            y_train: A numpy.ndarray instance containing the label of the training data,
                or the label of the training data combined with the validation label.
            time_limit: A dictionary containing the parameters of the ModelTrainer constructor.
        """
        validate_xy(x_train, y_train)
        self.resize_shape = compute_image_resize_params(x_train)
        x_train = self.preprocess(x_train)
        self.y_encoder.fit(y_train)
        y_train = self.transform_y(y_train)
        # Divide training data into training and testing data.
        validation_set_size = int(len(y_train) * Constant.VALIDATION_SET_SIZE)
        validation_set_size = min(validation_set_size, 500)
        validation_set_size = max(validation_set_size, 1)
        x_train_new, x_test, y_train_new, y_test = train_test_split(
            x_train, y_train, test_size=validation_set_size, random_state=42)

        # initialize data_transformer
        self.data_transformer = ImageDataTransformer(x_train_new)
        # Wrap the data into DataLoaders
        train_loader = self.data_transformer.transform_train(
            x_train_new, y_train_new)
        test_loader = self.data_transformer.transform_test(x_test, y_test)

        self.generator = self._init_generator(self.y_encoder.n_classes,
                                              x_train_new.shape[1:])
        graph = self.generator.generate()

        if time_limit is None:
            time_limit = {'max_no_improvement_num': 30}
        _, _1, self.graph = train(None, graph, train_loader, test_loader,
                                  time_limit, self.metric, self.loss,
                                  self.verbose, self.path)
Exemple #9
0
    def fit(self, x, y, time_limit=None):
        validate_xy(x, y)
        y = self.transform_y(y)
        # Divide training data into training and testing data.
        validation_set_size = int(len(y) * Constant.VALIDATION_SET_SIZE)
        validation_set_size = min(validation_set_size, 500)
        validation_set_size = max(validation_set_size, 1)
        x_train, x_test, y_train, y_test = train_test_split(
            x, y, test_size=validation_set_size, random_state=42)
        self.init_transformer(x)
        # Transform x_train

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data,
                     test_data, time_limit)
Exemple #10
0
    def fit(self, x, y, time_limit=None):
        validate_xy(x, y)
        y = self.transform_y(y)
        # Divide training data into training and testing data.
        validation_set_size = int(len(y) * Constant.VALIDATION_SET_SIZE)
        validation_set_size = min(validation_set_size, 500)
        validation_set_size = max(validation_set_size, 1)
        x_train, x_test, y_train, y_test = train_test_split(x, y,
                                                            test_size=validation_set_size,
                                                            random_state=42)
        self.init_transformer(x)
        # Transform x_train

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data, test_data, time_limit)