Exemple #1
0
    def __init__(self,
                 verbose=False,
                 path=None,
                 resume=False,
                 searcher_args=None):
        super().__init__(verbose)

        if searcher_args is None:
            searcher_args = {}

        if path is None:
            path = temp_folder_generator()

        self.cnn = CnnModule(self.loss, self.metric, searcher_args, path,
                             verbose)

        self.path = path
        if has_file(os.path.join(self.path, 'text_classifier')) and resume:
            classifier = pickle_from_file(
                os.path.join(self.path, 'text_classifier'))
            self.__dict__ = classifier.__dict__
        else:
            self.y_encoder = None
            self.data_transformer = None
            self.verbose = verbose
    def __init__(self, verbose=False, path=None, resume=False, searcher_args=None,
                 search_type=BayesianSearcher):
        """Initialize the instance.

        The classifier will be loaded from the files in 'path' if parameter 'resume' is True.
        Otherwise it would create a new one.
        Args:
            verbose: A boolean of whether the search process will be printed to stdout.
            path: A string. The path to a directory, where the intermediate results are saved.
            resume: A boolean. If True, the classifier will continue to previous work saved in path.
                Otherwise, the classifier will start a new search.
            searcher_args: A dictionary containing the parameters for the searcher's __init__ function.
            search_type: A constant denoting the type of hyperparameter search algorithm that must be used.
        """
        super().__init__(verbose)

        if searcher_args is None:
            searcher_args = {}

        if path is None:
            path = rand_temp_folder_generator()

        self.path = path
        ensure_dir(path)
        if resume:
            classifier = pickle_from_file(os.path.join(self.path, 'classifier'))
            self.__dict__ = classifier.__dict__
            self.cnn = pickle_from_file(os.path.join(self.path, 'module'))
        else:
            self.y_encoder = None
            self.data_transformer = None
            self.verbose = verbose
            self.cnn = CnnModule(self.loss, self.metric, searcher_args, path, verbose, search_type)
Exemple #3
0
def test_batch_dataset(_, _1):
    Constant.MAX_ITER_NUM = 1
    Constant.MAX_MODEL_NUM = 4
    Constant.SEARCH_MAX_ITER = 1
    Constant.T_MIN = 0.8
    data_path = 'tests/resources'
    clean_dir(TEST_TEMP_DIR)
    csv_file_path = os.path.join(data_path, "images_test/images_name.csv")
    image_path = os.path.join(data_path, "images_test/Color_images")
    train_dataset = BatchDataset(csv_file_path, image_path, has_target=True)
    test_dataset = BatchDataset(csv_file_path, image_path, has_target=True)
    train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=False)
    test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)
    cnn = CnnModule(classification_loss, Accuracy, {}, TEST_TEMP_DIR, True)
    cnn.fit(2, (4, 250, 250, 3), train_dataloader, test_dataloader, 12 * 60 * 60)
    clean_dir(TEST_TEMP_DIR)
Exemple #4
0
    def __init__(self, verbose=False, path=None, resume=False, searcher_args=None,
                 search_type=BayesianSearcher):
        """Initialize the instance.

        The classifier will be loaded from the files in 'path' if parameter 'resume' is True.
        Otherwise it would create a new one.
        Args:
            verbose: A boolean of whether the search process will be printed to stdout.
            path: A string. The path to a directory, where the intermediate results are saved.
            resume: A boolean. If True, the classifier will continue to previous work saved in path.
                Otherwise, the classifier will start a new search.
            searcher_args: A dictionary containing the parameters for the searcher's __init__ function.
            search_type: A constant denoting the type of hyperparameter search algorithm that must be used.
        """
        super().__init__(verbose)

        if searcher_args is None:
            searcher_args = {}

        if path is None:
            path = rand_temp_folder_generator()

        self.path = path
        ensure_dir(path)
        if resume:
            classifier = pickle_from_file(os.path.join(self.path, 'classifier'))
            self.__dict__ = classifier.__dict__
            self.cnn = pickle_from_file(os.path.join(self.path, 'module'))
        else:
            self.y_encoder = None
            self.data_transformer = None
            self.verbose = verbose
            self.cnn = CnnModule(self.loss, self.metric, searcher_args, path, verbose, search_type)
Exemple #5
0
    def __init__(self,
                 verbose=False,
                 path=None,
                 resume=False,
                 searcher_args=None,
                 augment=None):
        """Initialize the instance.

        The classifier will be loaded from the files in 'path' if parameter 'resume' is True.
        Otherwise it would create a new one.

        Args:
            verbose: A boolean of whether the search process will be printed to stdout.
            path: A string. The path to a directory, where the intermediate results are saved.
            resume: A boolean. If True, the classifier will continue to previous work saved in path.
                Otherwise, the classifier will start a new search.
            augment: A boolean value indicating whether the data needs augmentation. If not define, then it
                will use the value of Constant.DATA_AUGMENTATION which is True by default.

        """
        super().__init__(verbose)

        if searcher_args is None:
            searcher_args = {}

        if path is None:
            path = temp_folder_generator()

        if augment is None:
            augment = Constant.DATA_AUGMENTATION

        self.path = path
        if has_file(os.path.join(self.path, 'classifier')) and resume:
            classifier = pickle_from_file(os.path.join(self.path,
                                                       'classifier'))
            self.__dict__ = classifier.__dict__
            self.cnn = pickle_from_file(os.path.join(self.path, 'module'))
        else:
            self.y_encoder = None
            self.data_transformer = None
            self.verbose = verbose
            self.augment = augment
            self.cnn = CnnModule(self.loss, self.metric, searcher_args, path,
                                 verbose)

        self.resize_height = None
        self.resize_width = None
Exemple #6
0
def test_batch_dataset(_, _1):
    Constant.MAX_ITER_NUM = 1
    Constant.MAX_MODEL_NUM = 4
    Constant.SEARCH_MAX_ITER = 1
    Constant.T_MIN = 0.8
    data_path = 'tests/resources'
    clean_dir(TEST_TEMP_DIR)
    csv_file_path = os.path.join(data_path, "images_test/images_name.csv")
    image_path = os.path.join(data_path, "images_test/Color_images")
    train_dataset = BatchDataset(csv_file_path, image_path, has_target=True)
    test_dataset = BatchDataset(csv_file_path, image_path, has_target=True)
    train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=False)
    test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)
    cnn = CnnModule(classification_loss, Accuracy, {}, TEST_TEMP_DIR, True)
    cnn.fit(2, (4, 250, 250, 3), train_dataloader, test_dataloader,
            12 * 60 * 60)
    clean_dir(TEST_TEMP_DIR)
Exemple #7
0
class DeepTaskSupervised(SearchSupervised):
    """
    Inherits from SearchSupervised class.
    
    Attributes:
        verbose: A boolean value indicating the verbosity mode. (optional, default = False)
        path: A string indicating the path to a directory where the intermediate results are saved. (optional, default = None)
        resume: A boolean. If True, the classifier will continue to previous work saved in path.
            Otherwise, the classifier will start a new search. (optional, default = False)
        searcher_args: A dictionary containing the parameters for the searcher's __init__ function. (optional, default = None)
        search_type: A constant denoting the type of hyperparameter search algorithm that must be used. (optional, default = BayesianSearcher)
    """
    def __init__(self,
                 verbose=False,
                 path=None,
                 resume=False,
                 searcher_args=None,
                 search_type=BayesianSearcher):
        """Initialize the instance of a DeepTaskSupervised class.

        The classifier will be loaded from the files in 'path' if parameter 'resume' is True.
        Otherwise it would create a new one.
        
        Args:
            verbose: A boolean of whether the search process will be printed to stdout.
            path: A string. The path to a directory, where the intermediate results are saved.
            resume: A boolean. If True, the classifier will continue to previous work saved in path.
                Otherwise, the classifier will start a new search.
            searcher_args: A dictionary containing the parameters for the searcher's __init__ function.
            search_type: A constant denoting the type of hyperparameter search algorithm that must be used.
        """
        super().__init__(verbose)

        if searcher_args is None:
            searcher_args = {}

        if path is None:
            path = rand_temp_folder_generator()

        self.path = path
        ensure_dir(path)
        if resume:
            classifier = pickle_from_file(os.path.join(self.path,
                                                       'classifier'))
            self.__dict__ = classifier.__dict__
            self.cnn = pickle_from_file(os.path.join(self.path, 'module'))
        else:
            self.y_encoder = None
            self.data_transformer = None
            self.verbose = verbose
            self.cnn = CnnModule(self.loss, self.metric, searcher_args, path,
                                 verbose, search_type)

    def fit(self, x, y, time_limit=None):
        """Find the best neural architecture for classifying the training data and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset must be in numpy.ndarray format.
        The training and validation data should be passed through `x`, `y`. This method will automatically split
        the training and validation data into training and validation sets.

        Args:
            x: A numpy.ndarray instance containing the training data or the training data combined with the
               validation data.
            y: A numpy.ndarray instance containing the labels of the training data. or the label of the training data
               combined with the validation label.
            time_limit: The time limit for the search in seconds. (optional, default = None, which turns into 24 hours in method)
            
        Effects:
            Trains a model that fits the data using the best neural architecture
        """
        validate_xy(x, y)
        y = self.transform_y(y)
        # Divide training data into training and validation data.
        validation_set_size = int(len(y) * Constant.VALIDATION_SET_SIZE)
        validation_set_size = min(validation_set_size, 500)
        validation_set_size = max(validation_set_size, 1)
        x_train, x_valid, y_train, y_valid = train_test_split(
            x, y, test_size=validation_set_size, random_state=42)
        self.init_transformer(x)
        # Transform x_train

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        valid_data = self.data_transformer.transform_test(x_valid, y_valid)

        # Save the classifier
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data,
                     valid_data, time_limit)

    def final_fit(self,
                  x_train,
                  y_train,
                  x_test,
                  y_test,
                  trainer_args=None,
                  retrain=False):
        """Final training after found the best architecture.

        Args:
            x_train: A numpy.ndarray of training data.
            y_train: A numpy.ndarray of training targets.
            x_test: A numpy.ndarray of testing data.
            y_test: A numpy.ndarray of testing targets.
            trainer_args: A dictionary containing the parameters of the ModelTrainer constructor.
            retrain: A boolean indicating whether or not to reinitialize the weights of the model.
        """
        x_train = self.preprocess(x_train)
        x_test = self.preprocess(x_test)
        if trainer_args is None:
            trainer_args = {'max_no_improvement_num': 30}

        y_train = self.transform_y(y_train)
        y_test = self.transform_y(y_test)

        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        self.cnn.final_fit(train_data, test_data, trainer_args, retrain)

    @property
    @abstractmethod
    def metric(self):
        pass

    @property
    @abstractmethod
    def loss(self):
        pass

    @abstractmethod
    def get_n_output_node(self):
        pass

    @staticmethod
    def transform_y(y_train):
        return y_train

    @staticmethod
    def inverse_transform_y(output):
        return output

    @abstractmethod
    def init_transformer(self, x):
        pass

    @abstractmethod
    def preprocess(self, x):
        pass

    def export_keras_model(self, model_file_name):
        """Exports the best Keras model to the given filename.
        
        Args:
            model_file_name: A string of the filename to which the best model will be exported
        
        Effects:
            Save the architecture, weights, and optimizer state of the best model
        """
        self.cnn.best_model.produce_keras_model().save(model_file_name)

    def predict(self, x_test, raw=False):
        """Return predict results for the testing data.

        Args:
            x_test: An instance of numpy.ndarray containing the testing data.

        Returns:
            A numpy.ndarray containing the predictions for the testing data.
        """
        x_test = self.preprocess(x_test)
        test_loader = self.data_transformer.transform_test(x_test)
        if (raw):
            return self.cnn.predict(test_loader)
        return self.inverse_transform_y(self.cnn.predict(test_loader))

    def evaluate(self, x_test, y_test):
        """Return the accuracy score between predict value and `y_test`.
        
        Predict the labels for the testing data.
        Calculate the accuracy metric between the predicted and actual labels of the testing data.
        
        Args:
            x_test: An instance of numpy.ndarray containing the testing data
            y_test: An instance of numpy.ndarray containing the labels of the testing data
            
        Returns:
            A float value of the accuracy of the predictions given the labels for the testing data
        """
        y_predict = self.predict(x_test)
        return self.metric().evaluate(y_predict, y_test)
Exemple #8
0
class DeepSupervised(Supervised):
    def __init__(self,
                 verbose=False,
                 path=None,
                 resume=False,
                 searcher_args=None,
                 search_type=BayesianSearcher):
        """Initialize the instance.

        The classifier will be loaded from the files in 'path' if parameter 'resume' is True.
        Otherwise it would create a new one.
        Args:
            verbose: A boolean of whether the search process will be printed to stdout.
            path: A string. The path to a directory, where the intermediate results are saved.
            resume: A boolean. If True, the classifier will continue to previous work saved in path.
                Otherwise, the classifier will start a new search.
            searcher_args: A dictionary containing the parameters for the searcher's __init__ function.
            search_type: A constant denoting the type of hyperparameter search algorithm that must be used.
        """
        super().__init__(verbose)

        if searcher_args is None:
            searcher_args = {}

        if path is None:
            path = rand_temp_folder_generator()

        self.path = path
        ensure_dir(path)
        if resume:
            classifier = pickle_from_file(os.path.join(self.path,
                                                       'classifier'))
            self.__dict__ = classifier.__dict__
            self.cnn = pickle_from_file(os.path.join(self.path, 'module'))
        else:
            self.y_encoder = None
            self.data_transformer = None
            self.verbose = verbose
            self.cnn = CnnModule(self.loss, self.metric, searcher_args, path,
                                 verbose, search_type)

    def fit(self, x, y, time_limit=None):
        validate_xy(x, y)
        y = self.transform_y(y)
        # Divide training data into training and testing data.
        validation_set_size = int(len(y) * Constant.VALIDATION_SET_SIZE)
        validation_set_size = min(validation_set_size, 500)
        validation_set_size = max(validation_set_size, 1)
        x_train, x_test, y_train, y_test = train_test_split(
            x, y, test_size=validation_set_size, random_state=42)
        self.init_transformer(x)
        # Transform x_train

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data,
                     test_data, time_limit)

    def final_fit(self,
                  x_train,
                  y_train,
                  x_test,
                  y_test,
                  trainer_args=None,
                  retrain=False):
        """Final training after found the best architecture.

        Args:
            x_train: A numpy.ndarray of training data.
            y_train: A numpy.ndarray of training targets.
            x_test: A numpy.ndarray of testing data.
            y_test: A numpy.ndarray of testing targets.
            trainer_args: A dictionary containing the parameters of the ModelTrainer constructor.
            retrain: A boolean of whether reinitialize the weights of the model.
        """
        x_train = self.preprocess(x_train)
        x_test = self.preprocess(x_test)
        if trainer_args is None:
            trainer_args = {'max_no_improvement_num': 30}

        y_train = self.transform_y(y_train)
        y_test = self.transform_y(y_test)

        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        self.cnn.final_fit(train_data, test_data, trainer_args, retrain)

    @property
    @abstractmethod
    def metric(self):
        pass

    @property
    @abstractmethod
    def loss(self):
        pass

    @abstractmethod
    def get_n_output_node(self):
        pass

    def transform_y(self, y_train):
        return y_train

    def inverse_transform_y(self, output):
        return output

    @abstractmethod
    def init_transformer(self, x):
        pass

    @abstractmethod
    def preprocess(self, x):
        pass

    def export_keras_model(self, model_file_name):
        """ Exports the best Keras model to the given filename. """
        self.cnn.best_model.produce_keras_model().save(model_file_name)

    def predict(self, x_test):
        """Return predict results for the testing data.

        Args:
            x_test: An instance of numpy.ndarray containing the testing data.

        Returns:
            A numpy.ndarray containing the results.
        """
        x_test = self.preprocess(x_test)
        test_loader = self.data_transformer.transform_test(x_test)
        return self.inverse_transform_y(self.cnn.predict(test_loader))

    def evaluate(self, x_test, y_test):
        """Return the accuracy score between predict value and `y_test`."""
        y_predict = self.predict(x_test)
        return self.metric().evaluate(y_test, y_predict)
Exemple #9
0
class ImageSupervised(Supervised):
    """The image classifier class.

    It is used for image classification. It searches convolutional neural network architectures
    for the best configuration for the dataset.

    Attributes:
        path: A path to the directory to save the classifier.
        y_encoder: An instance of OneHotEncoder for `y_train` (array of categorical labels).
        verbose: A boolean value indicating the verbosity mode.
        searcher_args: A dictionary containing the parameters for the searcher's __init__ function.
        augment: A boolean value indicating whether the data needs augmentation.  If not define, then it
                will use the value of Constant.DATA_AUGMENTATION which is True by default.
    """

    def __init__(self, verbose=False, path=None, resume=False, searcher_args=None, augment=None):
        """Initialize the instance.

        The classifier will be loaded from the files in 'path' if parameter 'resume' is True.
        Otherwise it would create a new one.

        Args:
            verbose: A boolean of whether the search process will be printed to stdout.
            path: A string. The path to a directory, where the intermediate results are saved.
            resume: A boolean. If True, the classifier will continue to previous work saved in path.
                Otherwise, the classifier will start a new search.
            augment: A boolean value indicating whether the data needs augmentation. If not define, then it
                will use the value of Constant.DATA_AUGMENTATION which is True by default.

        """
        super().__init__(verbose)

        if searcher_args is None:
            searcher_args = {}

        if path is None:
            path = temp_folder_generator()

        if augment is None:
            augment = Constant.DATA_AUGMENTATION

        self.path = path
        if has_file(os.path.join(self.path, 'classifier')) and resume:
            classifier = pickle_from_file(os.path.join(self.path, 'classifier'))
            self.__dict__ = classifier.__dict__
            self.cnn = pickle_from_file(os.path.join(self.path, 'module'))
        else:
            self.y_encoder = None
            self.data_transformer = None
            self.verbose = verbose
            self.augment = augment
            self.cnn = CnnModule(self.loss, self.metric, searcher_args, path, verbose)

        self.resize_height = None
        self.resize_width = None

    @property
    @abstractmethod
    def metric(self):
        pass

    @property
    @abstractmethod
    def loss(self):
        pass

    def fit(self, x, y, x_test=None, y_test=None, time_limit=None):
        x = np.array(x)

        if len(x.shape) != 0 and len(x[0].shape) == 3:
            if self.verbose:
                print("Preprocessing the images.")
            self.resize_height, self.resize_width = compute_image_resize_params(x)
            x = resize_image_data(x, self.resize_height, self.resize_width)
            if x_test is not None:
                x_test = resize_image_data(x_test, self.resize_height, self.resize_width)
            if self.verbose:
                print("Preprocessing finished.")

        y = np.array(y).flatten()
        validate_xy(x, y)
        y = self.transform_y(y)
        if x_test is None or y_test is None:
            # Divide training data into training and testing data.
            validation_set_size = int(len(y) * Constant.VALIDATION_SET_SIZE)
            validation_set_size = min(validation_set_size, 500)
            validation_set_size = max(validation_set_size, 1)
            x_train, x_test, y_train, y_test = train_test_split(x, y,
                                                                test_size=validation_set_size,
                                                                random_state=42)
        else:
            x_train = x
            y_train = y
        # Transform x_train
        if self.data_transformer is None:
            self.data_transformer = ImageDataTransformer(x, augment=self.augment)

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data, test_data, time_limit)

    @abstractmethod
    def get_n_output_node(self):
        pass

    def transform_y(self, y_train):
        return y_train

    def predict(self, x_test):
        """Return predict results for the testing data.

        Args:
            x_test: An instance of numpy.ndarray containing the testing data.

        Returns:
            A numpy.ndarray containing the results.
        """
        if Constant.LIMIT_MEMORY:
            pass
        test_loader = self.data_transformer.transform_test(x_test)
        model = self.cnn.best_model.produce_model()
        model.eval()

        outputs = []
        with torch.no_grad():
            for index, inputs in enumerate(test_loader):
                outputs.append(model(inputs).numpy())
        output = reduce(lambda x, y: np.concatenate((x, y)), outputs)
        return self.inverse_transform_y(output)

    def inverse_transform_y(self, output):
        return output

    def evaluate(self, x_test, y_test):
        """Return the accuracy score between predict value and `y_test`."""
        if len(x_test.shape) != 0 and len(x_test[0].shape) == 3:
            x_test = resize_image_data(x_test, self.resize_height, self.resize_width)
        y_predict = self.predict(x_test)
        return self.metric().evaluate(y_test, y_predict)

    def final_fit(self, x_train, y_train, x_test, y_test, trainer_args=None, retrain=False):
        """Final training after found the best architecture.

        Args:
            x_train: A numpy.ndarray of training data.
            y_train: A numpy.ndarray of training targets.
            x_test: A numpy.ndarray of testing data.
            y_test: A numpy.ndarray of testing targets.
            trainer_args: A dictionary containing the parameters of the ModelTrainer constructor.
            retrain: A boolean of whether reinitialize the weights of the model.
        """
        if trainer_args is None:
            trainer_args = {'max_no_improvement_num': 30}

        if len(x_train.shape) != 0 and len(x_train[0].shape) == 3:
            x_train = resize_image_data(x_train, self.resize_height, self.resize_width)
            if x_test is not None:
                x_test = resize_image_data(x_test, self.resize_height, self.resize_width)

        y_train = self.transform_y(y_train)
        y_test = self.transform_y(y_test)

        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        self.cnn.final_fit(train_data, test_data, trainer_args, retrain)

    def export_keras_model(self, model_file_name):
        """ Exports the best Keras model to the given filename. """
        self.cnn.best_model.produce_keras_model().save(model_file_name)

    def export_autokeras_model(self, model_file_name):
        """ Creates and Exports the AutoKeras model to the given filename. """
        portable_model = PortableImageSupervised(graph=self.cnn.best_model,
                                                 y_encoder=self.y_encoder,
                                                 data_transformer=self.data_transformer,
                                                 metric=self.metric,
                                                 inverse_transform_y_method=self.inverse_transform_y,
                                                 resize_params=(self.resize_height, self.resize_width))
        pickle_to_file(portable_model, model_file_name)
Exemple #10
0
class TextClassifier(Supervised):
    """TextClassifier class.

    Attributes:
        cnn: CNN module from net_module.py.
        path: A path to the directory to save the classifier as well as intermediate results.
        y_encoder: Label encoder, used in transform_y or inverse_transform_y for encode the label. For example,
                    if one hot encoder needed, y_encoder can be OneHotEncoder.
        data_transformer: A transformer class to process the data. See example as ImageDataTransformer.
        verbose: A boolean value indicating the verbosity mode which determines whether the search process
                will be printed to stdout.
    """
    def __init__(self,
                 verbose=False,
                 path=None,
                 resume=False,
                 searcher_args=None):
        """Initialize the instance.

        The classifier will be loaded from the files in 'path' if parameter 'resume' is True.
        Otherwise it would create a new one.
        Args:
            verbose: A boolean of whether the search process will be printed to stdout.
            path: A string. The path to a directory, where the intermediate results are saved.
            resume: A boolean. If True, the classifier will continue to previous work saved in path.
                Otherwise, the classifier will start a new search.
            searcher_args: A dictionary containing the parameters for the searcher's __init__ function.
        """
        super().__init__(verbose)

        if searcher_args is None:
            searcher_args = {}

        if path is None:
            path = temp_folder_generator()

        self.cnn = CnnModule(self.loss, self.metric, searcher_args, path,
                             verbose)

        self.path = path
        if has_file(os.path.join(self.path, 'text_classifier')) and resume:
            classifier = pickle_from_file(
                os.path.join(self.path, 'text_classifier'))
            self.__dict__ = classifier.__dict__
        else:
            self.y_encoder = None
            self.data_transformer = None
            self.verbose = verbose

    def fit(self,
            x,
            y,
            x_test=None,
            y_test=None,
            batch_size=None,
            time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through `x_train`, `y_train`.

        Args:
            x: A numpy.ndarray instance containing the training data.
            y: A numpy.ndarray instance containing the label of the training data.
            y_test: A numpy.ndarray instance containing the testing data.
            x_test: A numpy.ndarray instance containing the label of the testing data.
            batch_size: int, define the batch size.
            time_limit: The time limit for the search in seconds.
        """
        x = text_preprocess(x, path=self.path)

        x = np.array(x)
        y = np.array(y)
        validate_xy(x, y)
        y = self.transform_y(y)

        if batch_size is None:
            batch_size = Constant.MAX_BATCH_SIZE
        # Divide training data into training and testing data.
        if x_test is None or y_test is None:
            x_train, x_test, y_train, y_test = train_test_split(
                x,
                y,
                test_size=min(Constant.VALIDATION_SET_SIZE, int(len(y) * 0.2)),
                random_state=42)
        else:
            x_train = x
            y_train = y

        # Wrap the data into DataLoaders
        if self.data_transformer is None:
            self.data_transformer = TextDataTransformer()

        train_data = self.data_transformer.transform_train(
            x_train, y_train, batch_size=batch_size)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle.dump(self, open(os.path.join(self.path, 'text_classifier'),
                               'wb'))
        pickle_to_file(self, os.path.join(self.path, 'text_classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data,
                     test_data, time_limit)

    def final_fit(self,
                  x_train=None,
                  y_train=None,
                  x_test=None,
                  y_test=None,
                  trainer_args=None,
                  retrain=False):
        """Final training after found the best architecture.

        Args:
            x_train: A numpy.ndarray of training data.
            y_train: A numpy.ndarray of training targets.
            x_test: A numpy.ndarray of testing data.
            y_test: A numpy.ndarray of testing targets.
            trainer_args: A dictionary containing the parameters of the ModelTrainer constructor.
            retrain: A boolean of whether reinitialize the weights of the model.
        """
        if trainer_args is None:
            trainer_args = {'max_no_improvement_num': 30}

        if x_test is None:
            x_train, x_test, y_train, y_test = train_test_split(
                x_train,
                y_train,
                test_size=min(Constant.VALIDATION_SET_SIZE,
                              int(len(y_train) * 0.2)),
                random_state=42)

        x_train = text_preprocess(x_train, path=self.path)
        x_test = text_preprocess(x_test, path=self.path)

        y_train = self.transform_y(y_train)
        y_test = self.transform_y(y_test)

        train_data = self.data_transformer.transform_train(
            x_train, y_train, batch_size=Constant.MAX_BATCH_SIZE)
        test_data = self.data_transformer.transform_test(
            x_test, y_test, batch_size=Constant.MAX_BATCH_SIZE)

        self.cnn.final_fit(train_data, test_data, trainer_args, retrain)

    def predict(self, x_test):
        """Return predict results for the testing data.

        Args:
            x_test: An instance of numpy.ndarray containing the testing data.

        Returns:
            A numpy.ndarray containing the results.
        """
        if Constant.LIMIT_MEMORY:
            pass
        test_loader = self.data_transformer.transform_test(x_test)
        model = self.cnn.best_model.produce_model()
        model.eval()

        outputs = []
        with torch.no_grad():
            for index, inputs in enumerate(test_loader):
                outputs.append(model(inputs).numpy())
        output = reduce(lambda x, y: np.concatenate((x, y)), outputs)
        return self.inverse_transform_y(output)

    def evaluate(self, x_test, y_test):
        x_test = text_preprocess(x_test, path=self.path)
        """Return the accuracy score between predict value and `y_test`."""
        y_predict = self.predict(x_test)
        return self.metric().evaluate(y_test, y_predict)

    @property
    def metric(self):
        return Accuracy

    @property
    def loss(self):
        return classification_loss

    def transform_y(self, y_train):
        # Transform y_train.
        if self.y_encoder is None:
            self.y_encoder = OneHotEncoder()
            self.y_encoder.fit(y_train)
        y_train = self.y_encoder.transform(y_train)
        return y_train

    def inverse_transform_y(self, output):
        return self.y_encoder.inverse_transform(output)

    def load_searcher(self):
        return pickle_from_file(os.path.join(self.path, 'searcher'))

    def get_n_output_node(self):
        return self.y_encoder.n_classes
Exemple #11
0
class DeepSupervised(Supervised):

    def __init__(self, verbose=False, path=None, resume=False, searcher_args=None,
                 search_type=BayesianSearcher):
        """Initialize the instance.

        The classifier will be loaded from the files in 'path' if parameter 'resume' is True.
        Otherwise it would create a new one.
        Args:
            verbose: A boolean of whether the search process will be printed to stdout.
            path: A string. The path to a directory, where the intermediate results are saved.
            resume: A boolean. If True, the classifier will continue to previous work saved in path.
                Otherwise, the classifier will start a new search.
            searcher_args: A dictionary containing the parameters for the searcher's __init__ function.
            search_type: A constant denoting the type of hyperparameter search algorithm that must be used.
        """
        super().__init__(verbose)

        if searcher_args is None:
            searcher_args = {}

        if path is None:
            path = rand_temp_folder_generator()

        self.path = path
        ensure_dir(path)
        if resume:
            classifier = pickle_from_file(os.path.join(self.path, 'classifier'))
            self.__dict__ = classifier.__dict__
            self.cnn = pickle_from_file(os.path.join(self.path, 'module'))
        else:
            self.y_encoder = None
            self.data_transformer = None
            self.verbose = verbose
            self.cnn = CnnModule(self.loss, self.metric, searcher_args, path, verbose, search_type)

    def fit(self, x, y, time_limit=None):
        validate_xy(x, y)
        y = self.transform_y(y)
        # Divide training data into training and testing data.
        validation_set_size = int(len(y) * Constant.VALIDATION_SET_SIZE)
        validation_set_size = min(validation_set_size, 500)
        validation_set_size = max(validation_set_size, 1)
        x_train, x_test, y_train, y_test = train_test_split(x, y,
                                                            test_size=validation_set_size,
                                                            random_state=42)
        self.init_transformer(x)
        # Transform x_train

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data, test_data, time_limit)

    def final_fit(self, x_train, y_train, x_test, y_test, trainer_args=None, retrain=False):
        """Final training after found the best architecture.

        Args:
            x_train: A numpy.ndarray of training data.
            y_train: A numpy.ndarray of training targets.
            x_test: A numpy.ndarray of testing data.
            y_test: A numpy.ndarray of testing targets.
            trainer_args: A dictionary containing the parameters of the ModelTrainer constructor.
            retrain: A boolean of whether reinitialize the weights of the model.
        """
        x_train = self.preprocess(x_train)
        x_test = self.preprocess(x_test)
        if trainer_args is None:
            trainer_args = {'max_no_improvement_num': 30}

        y_train = self.transform_y(y_train)
        y_test = self.transform_y(y_test)

        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        self.cnn.final_fit(train_data, test_data, trainer_args, retrain)

    @property
    @abstractmethod
    def metric(self):
        pass

    @property
    @abstractmethod
    def loss(self):
        pass

    @abstractmethod
    def get_n_output_node(self):
        pass

    def transform_y(self, y_train):
        return y_train

    def inverse_transform_y(self, output):
        return output

    @abstractmethod
    def init_transformer(self, x):
        pass

    @abstractmethod
    def preprocess(self, x):
        pass

    def export_keras_model(self, model_file_name):
        """ Exports the best Keras model to the given filename. """
        self.cnn.best_model.produce_keras_model().save(model_file_name)

    def predict(self, x_test):
        """Return predict results for the testing data.

        Args:
            x_test: An instance of numpy.ndarray containing the testing data.

        Returns:
            A numpy.ndarray containing the results.
        """
        x_test = self.preprocess(x_test)
        test_loader = self.data_transformer.transform_test(x_test)
        return self.inverse_transform_y(self.cnn.predict(test_loader))

    def evaluate(self, x_test, y_test):
        """Return the accuracy score between predict value and `y_test`."""
        y_predict = self.predict(x_test)
        return self.metric().evaluate(y_test, y_predict)