Beispiel #1
0
    def final_fit(self, x_train=None, y_train=None, x_test=None, y_test=None, trainer_args=None, retrain=False):
        """Final training after found the best architecture.

        Args:
            x_train: A numpy.ndarray of training data.
            y_train: A numpy.ndarray of training targets.
            x_test: A numpy.ndarray of testing data.
            y_test: A numpy.ndarray of testing targets.
            trainer_args: A dictionary containing the parameters of the ModelTrainer constructor.
            retrain: A boolean of whether reinitialize the weights of the model.
        """
        if trainer_args is None:
            trainer_args = {'max_no_improvement_num': 30}

        if x_test is None:
            x_train, x_test, y_train, y_test = train_test_split(x_train, y_train,
                                                                test_size=min(Constant.VALIDATION_SET_SIZE,
                                                                              int(len(y_train) * 0.2)),
                                                                random_state=42)

        x_train = text_preprocess(x_train, path=self.path)
        x_test = text_preprocess(x_test, path=self.path)

        y_train = self.transform_y(y_train)
        y_test = self.transform_y(y_test)

        train_data = self.data_transformer.transform_train(x_train, y_train, batch_size=Constant.MAX_BATCH_SIZE)
        test_data = self.data_transformer.transform_test(x_test, y_test, batch_size=Constant.MAX_BATCH_SIZE)

        self.cnn.final_fit(train_data, test_data, trainer_args, retrain)
Beispiel #2
0
    def fit(self,
            x,
            y,
            x_test=None,
            y_test=None,
            batch_size=None,
            time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through `x_train`, `y_train`.

        Args:
            x: A numpy.ndarray instance containing the training data.
            y: A numpy.ndarray instance containing the label of the training data.
            y_test: A numpy.ndarray instance containing the testing data.
            x_test: A numpy.ndarray instance containing the label of the testing data.
            batch_size: int, define the batch size.
            time_limit: The time limit for the search in seconds.
        """
        x = text_preprocess(x)

        x = np.array(x)
        y = np.array(y).flatten()

        super().fit(x, y, x_test, y_test, time_limit)
Beispiel #3
0
    def fit(self,
            x,
            y,
            x_test=None,
            y_test=None,
            batch_size=None,
            time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through `x_train`, `y_train`.

        Args:
            x: A numpy.ndarray instance containing the training data.
            y: A numpy.ndarray instance containing the label of the training data.
            y_test: A numpy.ndarray instance containing the testing data.
            x_test: A numpy.ndarray instance containing the label of the testing data.
            batch_size: int, define the batch size.
            time_limit: The time limit for the search in seconds.
        """
        x = text_preprocess(x, path=self.path)

        x = np.array(x)
        y = np.array(y)
        validate_xy(x, y)
        y = self.transform_y(y)

        if batch_size is None:
            batch_size = Constant.MAX_BATCH_SIZE
        # Divide training data into training and testing data.
        if x_test is None or y_test is None:
            x_train, x_test, y_train, y_test = train_test_split(
                x,
                y,
                test_size=min(Constant.VALIDATION_SET_SIZE, int(len(y) * 0.2)),
                random_state=42)
        else:
            x_train = x
            y_train = y

        # Wrap the data into DataLoaders
        if self.data_transformer is None:
            self.data_transformer = TextDataTransformer()

        train_data = self.data_transformer.transform_train(
            x_train, y_train, batch_size=batch_size)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle.dump(self, open(os.path.join(self.path, 'text_classifier'),
                               'wb'))
        pickle_to_file(self, os.path.join(self.path, 'text_classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data,
                     test_data, time_limit)
Beispiel #4
0
    def fit(self, x, y, time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through `x_train`, `y_train`.

        Args:
            x: A numpy.ndarray instance containing the training data.
            y: A numpy.ndarray instance containing the label of the training data.
            time_limit: The time limit for the search in seconds.
        """
        x = text_preprocess(x)

        x = np.array(x)
        y = np.array(y).flatten()

        super().fit(x, y, time_limit)
Beispiel #5
0
    def fit(self,
            x_train=None,
            y_train=None,
            batch_size=None,
            time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through `x_train`, `y_train`.

        Args:
            x_train: A numpy.ndarray instance containing the training data.
            y_train: A numpy.ndarray instance containing the label of the training data.
            time_limit: The time limit for the search in seconds.
        """
        if y_train is None:
            y_train = []
        if x_train is None:
            x_train = []
        if self.augment:
            x_train = text_preprocess(x_train, path=self.path)

        x_train = np.array(x_train)
        y_train = np.array(y_train)
        _validate(x_train, y_train)
        y_train = self.transform_y(y_train)

        if batch_size is None:
            batch_size = Constant.MAX_BATCH_SIZE
        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = x_train.shape[1:]
            self.searcher_args['n_output_node'] = self.get_n_output_node()
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['metric'] = self.metric
            self.searcher_args['loss'] = self.loss
            self.searcher_args['verbose'] = self.verbose
            searcher = Searcher(**self.searcher_args)
            self.save_searcher(searcher)
            self.searcher = True

        # Divide training data into training and testing data.
        x_train, x_test, y_train, y_test = train_test_split(
            x_train,
            y_train,
            test_size=min(Constant.VALIDATION_SET_SIZE,
                          int(len(y_train) * 0.2)),
            random_state=42)

        # Wrap the data into DataLoaders
        train_data = text_dataloader(x_train,
                                     y_train,
                                     batch_size=batch_size,
                                     shuffle=True)
        test_data = text_dataloader(x_test, y_test, shuffle=True)

        # Save the classifier
        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data,
                     test_data, time_limit)
Beispiel #6
0
 def evaluate(self, x_test, y_test):
     if self.augment:
         x_test = text_preprocess(x_test, path=self.path)
     """Return the accuracy score between predict value and `y_test`."""
     y_predict = self.predict(x_test)
     return self.metric().evaluate(y_test, y_predict)
Beispiel #7
0
 def preprocess(self, x):
     return text_preprocess(x)
Beispiel #8
0
 def preprocess(self, x):
     return text_preprocess(x)