Example #1
0
def get_classification_dataloaders():
    x_train = np.random.rand(200, 28, 28, 3)
    y_train = np.random.rand(200, 3)
    x_test = np.random.rand(190, 28, 28, 3)
    y_test = np.random.rand(190, 3)
    data_transformer = DataTransformer(x_train, augment=True)
    train_data = data_transformer.transform_train(x_train, y_train)
    test_data = data_transformer.transform_test(x_test, y_test)
    return train_data, test_data
Example #2
0
def get_classification_data_loaders():
    x_train = np.random.rand(200, 28, 28, 3)
    y_train = np.random.rand(200, 3)
    x_test = np.random.rand(190, 28, 28, 3)
    y_test = np.random.rand(190, 3)
    data_transformer = DataTransformer(x_train, augment=True)
    train_data = data_transformer.transform_train(x_train, y_train)
    test_data = data_transformer.transform_test(x_test, y_test)
    return train_data, test_data
Example #3
0
def get_processed_data():
    x_train = np.random.rand(20, 28, 28, 3)
    y_train = np.random.rand(20, 3)
    x_test = np.random.rand(10, 28, 28, 3)
    y_test = np.random.rand(10, 3)
    data_transformer = DataTransformer(x_train, augment=True)
    train_data = data_transformer.transform_train(x_train, y_train)
    test_data = data_transformer.transform_test(x_test, y_test)
    return train_data, test_data
Example #4
0
File: gan.py Project: ifuding/TC
    def fit(self, x_train):
        """ Train only

        Args:
            x_train: ndarray contained the training data

        Returns:

        """
        # input size stay the same, enable  cudnn optimization
        cudnn.benchmark = True
        self.data_transformer = DataTransformer(x_train, augment=self.augment)
        train_dataloader = self.data_transformer.transform_train(x_train)
        GANModelTrainer(self.net_g, self.net_d, train_dataloader,
                        binary_classification_loss, self.verbose,
                        self.gen_training_result).train_model()
    def fit(self, x_train=None, y_train=None, time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through `x_train`, `y_train`.

        Args:
            x_train: A numpy.ndarray instance containing the training data.
            y_train: A numpy.ndarray instance containing the label of the training data.
            time_limit: The time limit for the search in seconds.
        """
        if y_train is None:
            y_train = []
        if x_train is None:
            x_train = []

        x_train = np.array(x_train)
        y_train = np.array(y_train).flatten()

        _validate(x_train, y_train)

        y_train = self.transform_y(y_train)

        # Transform x_train
        if self.data_transformer is None:
            self.data_transformer = DataTransformer(x_train, augment=self.augment)

        # Divide training data into training and testing data.
        x_train, x_test, y_train, y_test = train_test_split(x_train, y_train,
                                                            test_size=min(Constant.VALIDATION_SET_SIZE,
                                                                          int(len(y_train) * 0.2)),
                                                            random_state=42)

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data, test_data, time_limit)
    def fit(self, x_train=None, y_train=None, time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through `x_train`, `y_train`.

        Args:
            x_train: A numpy.ndarray instance containing the training data.
            y_train: A numpy.ndarray instance containing the label of the training data.
            time_limit: The time limit for the search in seconds.
        """
        if y_train is None:
            y_train = []
        if x_train is None:
            x_train = []

        x_train = np.array(x_train)
        y_train = np.array(y_train).flatten()

        _validate(x_train, y_train)

        y_train = self.transform_y(y_train)

        # Transform x_train
        if self.data_transformer is None:
            self.data_transformer = DataTransformer(x_train,
                                                    augment=self.augment)

        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = x_train.shape[1:]
            self.searcher_args['n_output_node'] = self.get_n_output_node()
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['metric'] = self.metric
            self.searcher_args['loss'] = self.loss
            self.searcher_args['verbose'] = self.verbose
            searcher = Searcher(**self.searcher_args)
            self.save_searcher(searcher)
            self.searcher = True

        # Divide training data into training and testing data.
        x_train, x_test, y_train, y_test = train_test_split(
            x_train,
            y_train,
            test_size=min(Constant.VALIDATION_SET_SIZE,
                          int(len(y_train) * 0.2)),
            random_state=42)

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        start_time = time.time()
        time_remain = time_limit
        try:
            while time_remain > 0:
                run_searcher_once(train_data, test_data, self.path,
                                  int(time_remain))
                if len(self.load_searcher().history) >= Constant.MAX_MODEL_NUM:
                    break
                time_elapsed = time.time() - start_time
                time_remain = time_limit - time_elapsed
            # if no search executed during the time_limit, then raise an error
            if time_remain <= 0:
                raise TimeoutError
        except TimeoutError:
            if len(self.load_searcher().history) == 0:
                raise TimeoutError(
                    "Search Time too short. No model was found during the search time."
                )
            elif self.verbose:
                print('Time is out.')
class ImageSupervised(Supervised):
    """The image classifier class.

    It is used for image classification. It searches convolutional neural network architectures
    for the best configuration for the dataset.

    Attributes:
        path: A path to the directory to save the classifier.
        y_encoder: An instance of OneHotEncoder for `y_train` (array of categorical labels).
        verbose: A boolean value indicating the verbosity mode.
        searcher: An instance of BayesianSearcher. It searches different
            neural architecture to find the best model.
        searcher_args: A dictionary containing the parameters for the searcher's __init__ function.
        augment: A boolean value indicating whether the data needs augmentation.
    """
    def __init__(self,
                 verbose=False,
                 path=None,
                 resume=False,
                 searcher_args=None,
                 augment=None):
        """Initialize the instance.

        The classifier will be loaded from the files in 'path' if parameter 'resume' is True.
        Otherwise it would create a new one.

        Args:
            verbose: A boolean of whether the search process will be printed to stdout.
            path: A string. The path to a directory, where the intermediate results are saved.
            resume: A boolean. If True, the classifier will continue to previous work saved in path.
                Otherwise, the classifier will start a new search.
            augment: A boolean value indicating whether the data needs augmentation.

        """
        super().__init__(verbose)
        if searcher_args is None:
            searcher_args = {}

        if path is None:
            path = temp_folder_generator()

        if augment is None:
            augment = Constant.DATA_AUGMENTATION

        if has_file(os.path.join(path, 'classifier')) and resume:
            classifier = pickle_from_file(os.path.join(path, 'classifier'))
            self.__dict__ = classifier.__dict__
            self.path = path
        else:
            self.y_encoder = None
            self.data_transformer = None
            self.verbose = verbose
            self.searcher = False
            self.path = path
            self.searcher_args = searcher_args
            self.augment = augment
            ensure_dir(path)

    @property
    @abstractmethod
    def metric(self):
        pass

    @property
    @abstractmethod
    def loss(self):
        pass

    def fit(self, x_train=None, y_train=None, time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through `x_train`, `y_train`.

        Args:
            x_train: A numpy.ndarray instance containing the training data.
            y_train: A numpy.ndarray instance containing the label of the training data.
            time_limit: The time limit for the search in seconds.
        """
        if y_train is None:
            y_train = []
        if x_train is None:
            x_train = []

        x_train = np.array(x_train)
        y_train = np.array(y_train).flatten()

        _validate(x_train, y_train)

        y_train = self.transform_y(y_train)

        # Transform x_train
        if self.data_transformer is None:
            self.data_transformer = DataTransformer(x_train,
                                                    augment=self.augment)

        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = x_train.shape[1:]
            self.searcher_args['n_output_node'] = self.get_n_output_node()
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['metric'] = self.metric
            self.searcher_args['loss'] = self.loss
            self.searcher_args['verbose'] = self.verbose
            searcher = Searcher(**self.searcher_args)
            self.save_searcher(searcher)
            self.searcher = True

        # Divide training data into training and testing data.
        x_train, x_test, y_train, y_test = train_test_split(
            x_train,
            y_train,
            test_size=min(Constant.VALIDATION_SET_SIZE,
                          int(len(y_train) * 0.2)),
            random_state=42)

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        start_time = time.time()
        time_remain = time_limit
        try:
            while time_remain > 0:
                run_searcher_once(train_data, test_data, self.path,
                                  int(time_remain))
                if len(self.load_searcher().history) >= Constant.MAX_MODEL_NUM:
                    break
                time_elapsed = time.time() - start_time
                time_remain = time_limit - time_elapsed
            # if no search executed during the time_limit, then raise an error
            if time_remain <= 0:
                raise TimeoutError
        except TimeoutError:
            if len(self.load_searcher().history) == 0:
                raise TimeoutError(
                    "Search Time too short. No model was found during the search time."
                )
            elif self.verbose:
                print('Time is out.')

    @abstractmethod
    def get_n_output_node(self):
        pass

    def transform_y(self, y_train):
        return y_train

    def predict(self, x_test):
        """Return predict results for the testing data.

        Args:
            x_test: An instance of numpy.ndarray containing the testing data.

        Returns:
            A numpy.ndarray containing the results.
        """
        if Constant.LIMIT_MEMORY:
            pass
        test_loader = self.data_transformer.transform_test(x_test)
        model = self.load_searcher().load_best_model().produce_model()
        model.eval()

        outputs = []
        with torch.no_grad():
            for index, inputs in enumerate(test_loader):
                outputs.append(model(inputs).numpy())
        output = reduce(lambda x, y: np.concatenate((x, y)), outputs)
        return self.inverse_transform_y(output)

    def inverse_transform_y(self, output):
        return output

    def evaluate(self, x_test, y_test):
        """Return the accuracy score between predict value and `y_test`."""
        y_predict = self.predict(x_test)
        return accuracy_score(y_test, y_predict)

    def save_searcher(self, searcher):
        pickle.dump(searcher, open(os.path.join(self.path, 'searcher'), 'wb'))

    def load_searcher(self):
        return pickle_from_file(os.path.join(self.path, 'searcher'))

    def final_fit(self,
                  x_train,
                  y_train,
                  x_test,
                  y_test,
                  trainer_args=None,
                  retrain=False):
        """Final training after found the best architecture.

        Args:
            x_train: A numpy.ndarray of training data.
            y_train: A numpy.ndarray of training targets.
            x_test: A numpy.ndarray of testing data.
            y_test: A numpy.ndarray of testing targets.
            trainer_args: A dictionary containing the parameters of the ModelTrainer constructor.
            retrain: A boolean of whether reinitialize the weights of the model.
        """
        if trainer_args is None:
            trainer_args = {'max_no_improvement_num': 30}

        y_train = self.transform_y(y_train)
        y_test = self.transform_y(y_test)

        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        searcher = self.load_searcher()
        graph = searcher.load_best_model()

        if retrain:
            graph.weighted = False
        _, _1, graph = train((graph, train_data, test_data, trainer_args, None,
                              self.metric, self.loss, self.verbose))

    def get_best_model_id(self):
        """ Return an integer indicating the id of the best model."""
        return self.load_searcher().get_best_model_id()

    def export_keras_model(self, model_file_name):
        """ Exports the best Keras model to the given filename. """
        self.load_searcher().load_best_model().produce_keras_model().save(
            model_file_name)

    def export_autokeras_model(self, model_file_name):
        """ Creates and Exports the AutoKeras model to the given filename. """
        portable_model = PortableImageSupervised(graph=self.load_searcher().load_best_model(), \
                                                 y_encoder=self.y_encoder, data_transformer=self.data_transformer)
        pickle_to_file(portable_model, model_file_name)
Example #8
0
    def fit(self, x_train=None, y_train=None, time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through `x_train`, `y_train`.

        Args:
            x_train: A numpy.ndarray instance containing the training data.
            y_train: A numpy.ndarray instance containing the label of the training data.
            time_limit: The time limit for the search in seconds.
        """
        if y_train is None:
            y_train = []
        if x_train is None:
            x_train = []

        x_train = np.array(x_train)
        y_train = np.array(y_train).flatten()

        _validate(x_train, y_train)

        y_train = self.transform_y(y_train)

        # Transform x_train
        if self.data_transformer is None:
            self.data_transformer = DataTransformer(x_train, augment=self.augment)

        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = x_train.shape[1:]
            self.searcher_args['n_output_node'] = self.get_n_output_node()
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['metric'] = self.metric
            self.searcher_args['loss'] = self.loss
            self.searcher_args['verbose'] = self.verbose
            searcher = Searcher(**self.searcher_args)
            self.save_searcher(searcher)
            self.searcher = True

        # Divide training data into training and testing data.
        x_train, x_test, y_train, y_test = train_test_split(x_train, y_train,
                                                            test_size=min(Constant.VALIDATION_SET_SIZE,
                                                                          int(len(y_train) * 0.2)),
                                                            random_state=42)

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        start_time = time.time()
        time_remain = time_limit
        try:
            while time_remain > 0:
                run_searcher_once(train_data, test_data, self.path, int(time_remain))
                if len(self.load_searcher().history) >= Constant.MAX_MODEL_NUM:
                    break
                time_elapsed = time.time() - start_time
                time_remain = time_limit - time_elapsed
            # if no search executed during the time_limit, then raise an error
            if time_remain <= 0:
                raise TimeoutError
        except TimeoutError:
            if len(self.load_searcher().history) == 0:
                raise TimeoutError("Search Time too short. No model was found during the search time.")
            elif self.verbose:
                print('Time is out.')
Example #9
0
class ImageSupervised(Supervised):
    """The image classifier class.

    It is used for image classification. It searches convolutional neural network architectures
    for the best configuration for the dataset.

    Attributes:
        path: A path to the directory to save the classifier.
        y_encoder: An instance of OneHotEncoder for `y_train` (array of categorical labels).
        verbose: A boolean value indicating the verbosity mode.
        searcher: An instance of BayesianSearcher. It searches different
            neural architecture to find the best model.
        searcher_args: A dictionary containing the parameters for the searcher's __init__ function.
        augment: A boolean value indicating whether the data needs augmentation.  If not define, then it
                will use the value of Constant.DATA_AUGMENTATION which is True by default.
    """

    def __init__(self, verbose=False, path=None, resume=False, searcher_args=None, augment=None):
        """Initialize the instance.

        The classifier will be loaded from the files in 'path' if parameter 'resume' is True.
        Otherwise it would create a new one.

        Args:
            verbose: A boolean of whether the search process will be printed to stdout.
            path: A string. The path to a directory, where the intermediate results are saved.
            resume: A boolean. If True, the classifier will continue to previous work saved in path.
                Otherwise, the classifier will start a new search.
            augment: A boolean value indicating whether the data needs augmentation. If not define, then it
                will use the value of Constant.DATA_AUGMENTATION which is True by default.

        """
        super().__init__(verbose)
        if searcher_args is None:
            searcher_args = {}

        if path is None:
            path = temp_folder_generator()

        if augment is None:
            augment = Constant.DATA_AUGMENTATION

        if has_file(os.path.join(path, 'classifier')) and resume:
            classifier = pickle_from_file(os.path.join(path, 'classifier'))
            self.__dict__ = classifier.__dict__
            self.path = path
        else:
            self.y_encoder = None
            self.data_transformer = None
            self.verbose = verbose
            self.searcher = False
            self.path = path
            self.searcher_args = searcher_args
            self.augment = augment
            ensure_dir(path)

    @property
    @abstractmethod
    def metric(self):
        pass

    @property
    @abstractmethod
    def loss(self):
        pass

    def fit(self, x_train=None, y_train=None, time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through `x_train`, `y_train`.

        Args:
            x_train: A numpy.ndarray instance containing the training data.
            y_train: A numpy.ndarray instance containing the label of the training data.
            time_limit: The time limit for the search in seconds.
        """
        if y_train is None:
            y_train = []
        if x_train is None:
            x_train = []

        x_train = np.array(x_train)
        y_train = np.array(y_train).flatten()

        _validate(x_train, y_train)

        y_train = self.transform_y(y_train)

        # Transform x_train
        if self.data_transformer is None:
            self.data_transformer = DataTransformer(x_train, augment=self.augment)

        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = x_train.shape[1:]
            self.searcher_args['n_output_node'] = self.get_n_output_node()
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['metric'] = self.metric
            self.searcher_args['loss'] = self.loss
            self.searcher_args['verbose'] = self.verbose
            searcher = Searcher(**self.searcher_args)
            self.save_searcher(searcher)
            self.searcher = True

        # Divide training data into training and testing data.
        x_train, x_test, y_train, y_test = train_test_split(x_train, y_train,
                                                            test_size=min(Constant.VALIDATION_SET_SIZE,
                                                                          int(len(y_train) * 0.2)),
                                                            random_state=42)

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        start_time = time.time()
        time_remain = time_limit
        try:
            while time_remain > 0:
                run_searcher_once(train_data, test_data, self.path, int(time_remain))
                if len(self.load_searcher().history) >= Constant.MAX_MODEL_NUM:
                    break
                time_elapsed = time.time() - start_time
                time_remain = time_limit - time_elapsed
            # if no search executed during the time_limit, then raise an error
            if time_remain <= 0:
                raise TimeoutError
        except TimeoutError:
            if len(self.load_searcher().history) == 0:
                raise TimeoutError("Search Time too short. No model was found during the search time.")
            elif self.verbose:
                print('Time is out.')

    @abstractmethod
    def get_n_output_node(self):
        pass

    def transform_y(self, y_train):
        return y_train

    def predict(self, x_test):
        """Return predict results for the testing data.

        Args:
            x_test: An instance of numpy.ndarray containing the testing data.

        Returns:
            A numpy.ndarray containing the results.
        """
        if Constant.LIMIT_MEMORY:
            pass
        test_loader = self.data_transformer.transform_test(x_test)
        model = self.load_searcher().load_best_model().produce_model()
        model.eval()

        outputs = []
        with torch.no_grad():
            for index, inputs in enumerate(test_loader):
                outputs.append(model(inputs).numpy())
        output = reduce(lambda x, y: np.concatenate((x, y)), outputs)
        return self.inverse_transform_y(output)

    def inverse_transform_y(self, output):
        return output

    def evaluate(self, x_test, y_test):
        """Return the accuracy score between predict value and `y_test`."""
        y_predict = self.predict(x_test)
        return self.metric().compute(y_test, y_predict)

    def save_searcher(self, searcher):
        pickle.dump(searcher, open(os.path.join(self.path, 'searcher'), 'wb'))

    def load_searcher(self):
        return pickle_from_file(os.path.join(self.path, 'searcher'))

    def final_fit(self, x_train, y_train, x_test, y_test, trainer_args=None, retrain=False):
        """Final training after found the best architecture.

        Args:
            x_train: A numpy.ndarray of training data.
            y_train: A numpy.ndarray of training targets.
            x_test: A numpy.ndarray of testing data.
            y_test: A numpy.ndarray of testing targets.
            trainer_args: A dictionary containing the parameters of the ModelTrainer constructor.
            retrain: A boolean of whether reinitialize the weights of the model.
        """
        if trainer_args is None:
            trainer_args = {'max_no_improvement_num': 30}

        y_train = self.transform_y(y_train)
        y_test = self.transform_y(y_test)

        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        searcher = self.load_searcher()
        graph = searcher.load_best_model()

        if retrain:
            graph.weighted = False
        _, _1, graph = train((graph, train_data, test_data, trainer_args, None, self.metric, self.loss, self.verbose))

    def get_best_model_id(self):
        """ Return an integer indicating the id of the best model."""
        return self.load_searcher().get_best_model_id()

    def export_keras_model(self, model_file_name):
        """ Exports the best Keras model to the given filename. """
        self.load_searcher().load_best_model().produce_keras_model().save(model_file_name)

    def export_autokeras_model(self, model_file_name):
        """ Creates and Exports the AutoKeras model to the given filename. """
        portable_model = PortableImageSupervised(graph=self.load_searcher().load_best_model(),
                                                 y_encoder=self.y_encoder,
                                                 data_transformer=self.data_transformer,
                                                 metric=self.metric,
                                                 inverse_transform_y_method=self.inverse_transform_y)
        pickle_to_file(portable_model, model_file_name)
Example #10
0
    def fit(self, x_train=None, y_train=None, time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through `x_train`, `y_train`.

        Args:
            x_train: A numpy.ndarray instance containing the training data.
            y_train: A numpy.ndarray instance containing the label of the training data.
            time_limit: The time limit for the search in seconds.
        """

        if y_train is None:
            y_train = []
        if x_train is None:
            x_train = []

        x_train = np.array(x_train)
        y_train = np.array(y_train).flatten()

        _validate(x_train, y_train)

        # Transform y_train.
        if self.y_encoder is None:
            self.y_encoder = OneHotEncoder()
            self.y_encoder.fit(y_train)

        y_train = self.y_encoder.transform(y_train)

        # Transform x_train
        if self.data_transformer is None:
            self.data_transformer = DataTransformer(x_train,
                                                    augment=self.augment)

        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = x_train.shape[1:]
            n_classes = self.y_encoder.n_classes
            self.searcher_args['n_classes'] = n_classes
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['metric'] = self.metric
            self.searcher_args['verbose'] = self.verbose
            searcher = BayesianSearcher(**self.searcher_args)
            self.save_searcher(searcher)
            self.searcher = True

        # Divide training data into training and testing data.
        x_train, x_test, y_train, y_test = train_test_split(
            x_train,
            y_train,
            test_size=min(Constant.VALIDATION_SET_SIZE,
                          int(len(y_train) * 0.2)),
            random_state=42)

        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)
        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        start_time = time.time()
        while time.time() - start_time <= time_limit:
            run_searcher_once(train_data, test_data, self.path)
            if len(self.load_searcher().history) >= Constant.MAX_MODEL_NUM:
                break
Example #11
0
class ImageClassifier:
    """The image classifier class.

    It is used for image classification. It searches convolutional neural network architectures
    for the best configuration for the dataset.

    Attributes:
        path: A path to the directory to save the classifier.
        y_encoder: An instance of OneHotEncoder for y_train (array of categorical labels).
        verbose: A boolean value indicating the verbosity mode.
        searcher: An instance of BayesianSearcher. It search different
            neural architecture to find the best model.
        searcher_args: A dictionary containing the parameters for the searcher's __init__ function.
        augment: A boolean value indicating whether the data needs augmentation.
    """
    def __init__(self,
                 verbose=False,
                 path=Constant.DEFAULT_SAVE_PATH,
                 resume=False,
                 searcher_args=None,
                 augment=None):
        """Initialize the instance.

        The classifier will be loaded from the files in 'path' if parameter 'resume' is True.
        Otherwise it would create a new one.

        Args:
            verbose: An boolean of whether the search process will be printed to stdout.
            path: A string. The path to a directory, where the intermediate results are saved.
            resume: An boolean. If True, the classifier will continue to previous work saved in path.
                Otherwise, the classifier will start a new search.
            augment: A boolean value indicating whether the data needs augmentation.

        """
        if searcher_args is None:
            searcher_args = {}

        if augment is None:
            augment = Constant.DATA_AUGMENTATION

        if has_file(os.path.join(path, 'classifier')) and resume:
            classifier = pickle_from_file(os.path.join(path, 'classifier'))
            self.__dict__ = classifier.__dict__
            self.path = path
        else:
            self.y_encoder = None
            self.data_transformer = None
            self.verbose = verbose
            self.searcher = False
            self.path = path
            self.searcher_args = searcher_args
            self.augment = augment
            ensure_dir(path)

    def fit(self, x_train=None, y_train=None, time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through x_train, y_train.

        Args:
            x_train: An numpy.ndarray instance contains the training data.
            y_train: An numpy.ndarray instance contains the label of the training data.
            time_limit: The time limit for the search in seconds.
        """

        if y_train is None:
            y_train = []
        if x_train is None:
            x_train = []

        x_train = np.array(x_train)
        y_train = np.array(y_train).flatten()

        _validate(x_train, y_train)

        # Transform y_train.
        if self.y_encoder is None:
            self.y_encoder = OneHotEncoder()
            self.y_encoder.fit(y_train)

        y_train = self.y_encoder.transform(y_train)

        # Transform x_train
        if self.data_transformer is None:
            self.data_transformer = DataTransformer(x_train,
                                                    augment=self.augment)

        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = x_train.shape[1:]
            n_classes = self.y_encoder.n_classes
            self.searcher_args['n_classes'] = n_classes
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['verbose'] = self.verbose
            searcher = BayesianSearcher(**self.searcher_args)
            self.save_searcher(searcher)
            self.searcher = True

        # Divide training data into training and testing data.
        x_train, x_test, y_train, y_test = train_test_split(
            x_train,
            y_train,
            test_size=min(Constant.VALIDATION_SET_SIZE,
                          int(len(y_train) * 0.2)),
            random_state=42)

        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)
        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        start_time = time.time()
        while time.time() - start_time <= time_limit:
            run_searcher_once(train_data, test_data, self.path)
            if len(self.load_searcher().history) >= Constant.MAX_MODEL_NUM:
                break

    def predict(self, x_test):
        """Return predict result for the testing data.

        Args:
            x_test: An instance of numpy.ndarray contains the testing data.

        Returns:
            An numpy.ndarray containing the results.
        """
        if Constant.LIMIT_MEMORY:
            pass
        test_data = self.data_transformer.transform_test(x_test)
        test_loader = DataLoader(test_data,
                                 batch_size=Constant.MAX_BATCH_SIZE,
                                 shuffle=False)
        model = self.load_searcher().load_best_model().produce_model()
        model.eval()

        outputs = []
        with torch.no_grad():
            for index, inputs in enumerate(test_loader):
                outputs.append(model(inputs).numpy())
        output = reduce(lambda x, y: np.concatenate((x, y)), outputs)
        return self.y_encoder.inverse_transform(output)

    def evaluate(self, x_test, y_test):
        """Return the accuracy score between predict value and test_y."""
        y_predict = self.predict(x_test)
        return accuracy_score(y_test, y_predict)

    def save_searcher(self, searcher):
        pickle.dump(searcher, open(os.path.join(self.path, 'searcher'), 'wb'))

    def load_searcher(self):
        return pickle_from_file(os.path.join(self.path, 'searcher'))

    def final_fit(self,
                  x_train,
                  y_train,
                  x_test,
                  y_test,
                  trainer_args=None,
                  retrain=False):
        """Final training after found the best architecture.

        Args:
            x_train: An numpy.ndarray of training data.
            y_train: An numpy.ndarray of training targets.
            x_test: An numpy.ndarray of testing data.
            y_test: An numpy.ndarray of testing targets.
            trainer_args: A dictionary containing the parameters of the ModelTrainer constructure.
            retrain: A boolean of whether reinitialize the weights of the model.
        """
        if trainer_args is None:
            trainer_args = {}

        y_train = self.y_encoder.transform(y_train)
        y_test = self.y_encoder.transform(y_test)

        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        searcher = self.load_searcher()
        graph = searcher.load_best_model()

        if retrain:
            graph.weighted = False
        _, _1, graph = train(
            (graph, train_data, test_data, trainer_args, None, self.verbose))

    def get_best_model_id(self):
        """
        Returns:
            An integer. The best model id.
        """
        return self.load_searcher().get_best_model_id()
Example #12
0
File: gan.py Project: ifuding/TC
class DCGAN(Unsupervised):
    """ Deep Convolution Generative Adversary Network
    """
    def __init__(self,
                 nz=100,
                 ngf=32,
                 ndf=32,
                 nc=3,
                 verbose=False,
                 gen_training_result=None,
                 augment=Constant.DATA_AUGMENTATION):
        """
       Args:
            nz: size of the latent z vector
            ngf: of gen filters in first conv layer
            ndf: of discrim filters in first conv layer
            nc: number of input chanel
            verbose: A boolean of whether the search process will be printed to stdout.
            gen_training_result: A tuple of (path, size) to denote where to output the intermediate result with size
            augment: A boolean value indicating whether the data needs augmentation.
        """
        super().__init__(verbose)
        self.nz = nz
        self.ngf = ngf
        self.ndf = ndf
        self.nc = nc
        self.verbose = verbose
        self.gen_training_result = gen_training_result
        self.augment = augment
        self.data_transformer = None
        self.net_d = Discriminator(self.nc, self.ndf)
        self.net_g = Generator(self.nc, self.nz, self.ngf)

    def fit(self, x_train):
        """ Train only

        Args:
            x_train: ndarray contained the training data

        Returns:

        """
        # input size stay the same, enable  cudnn optimization
        cudnn.benchmark = True
        self.data_transformer = DataTransformer(x_train, augment=self.augment)
        train_dataloader = self.data_transformer.transform_train(x_train)
        GANModelTrainer(self.net_g, self.net_d, train_dataloader,
                        binary_classification_loss, self.verbose,
                        self.gen_training_result).train_model()

    def generate(self, input_sample=None):
        if input_sample is None:
            input_sample = torch.randn(self.gen_training_result[1],
                                       self.nz,
                                       1,
                                       1,
                                       device=get_device())
        if not isinstance(input_sample, torch.Tensor) and \
                isinstance(input_sample, np.ndarray):
            input_sample = torch.from_numpy(input_sample)
        if not isinstance(input_sample, torch.Tensor) and \
                not isinstance(input_sample, np.ndarray):
            raise TypeError(
                "Input should be a torch.tensor or a numpy.ndarray")
        self.net_g.eval()
        with torch.no_grad():
            generated_fake = self.net_g(input_sample)
        vutils.save_image(generated_fake.detach(),
                          '%s/evaluation.png' % self.gen_training_result[0],
                          normalize=True)
Example #13
0
from autokeras.net_transformer import default_transform
from autokeras.preprocessor import OneHotEncoder, DataTransformer
from autokeras.utils import ModelTrainer


if __name__ == '__main__':
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()

    print('Start Encoding')
    encoder = OneHotEncoder()
    encoder.fit(y_train)

    y_train = encoder.transform(y_train)
    y_test = encoder.transform(y_test)

    data_transformer = DataTransformer(x_train, augment=True)

    train_data = data_transformer.transform_train(x_train, y_train)
    test_data = data_transformer.transform_test(x_test, y_test)

    print('Start Generating')
    graphs = [DefaultClassifierGenerator(10, x_train.shape[1:]).generate()]
    keras_model = graphs[0].produce_model()


    print('Start Training')
    loss, acc = ModelTrainer(keras_model,
                             train_data,
                             test_data,
                             True).train_model(max_no_improvement_num=100, batch_size=128)
    print(loss, acc)
Example #14
0
def get_classification_train_data_loaders():
    x_train = np.random.rand(200, 32, 32, 3)
    data_transformer = DataTransformer(x_train, augment=True)
    train_data = data_transformer.transform_train(x_train)
    return train_data
                              'max_iter_num': 5
                          }})
    #clf = ImageClassifier(verbose=True, path='d:/tmp/autokeras/', searcher_args={'trainer_args':{'max_iter_num':5}})

    # 3. Fitting
    # time_limit : 초단위, 시간이 지나면 작동을 자동으로 멈춥니다.
    clf.fit(x_train, y_train, time_limit=24 * 60 * 60)

    # 3-1. Load saved model (3번 항목 실행후 3 주석처리 필요)

    # if you reloaded your saved clf, y_encoder & data_transformer should be defined like following.
    from autokeras.preprocessor import OneHotEncoder, DataTransformer
    from autokeras.constant import Constant
    clf.y_encoder = OneHotEncoder()
    clf.y_encoder.fit(y_train)
    clf.data_transformer = DataTransformer(x_train,
                                           augment=Constant.DATA_AUGMENTATION)

    #print(clf.get_best_model_id())

    searcher = clf.load_searcher()
    #print(searcher.history)

    # 3-2. fitting finally and saving model
    clf.final_fit(x_train,
                  y_train,
                  x_test,
                  y_test,
                  retrain=False,
                  trainer_args={'max_iter_num': 10})
    y = clf.evaluate(x_test, y_test)
    print(y)