Ejemplo n.º 1
0
    def search(self, train_data, test_data):
        if not self.history:
            self.init_search()

        # Start the new process for training.
        graph, father_id, model_id = self.training_queue.pop(0)
        if self.verbose:
            print('Training model ', model_id)
        pool = multiprocessing.Pool(1)
        train_results = pool.map_async(
            train, [(graph, train_data, test_data, self.trainer_args,
                     os.path.join(self.path,
                                  str(model_id) + '.png'))])

        # Do the search in current thread.
        if not self.training_queue:
            new_graph, new_father_id = self.maximize_acq()
            new_model_id = self.model_count
            self.model_count += 1
            self.training_queue.append(
                (new_graph, new_father_id, new_model_id))

        accuracy, loss, graph = train_results.get()[0]
        pool.terminate()
        pool.join()
        self.add_model(accuracy, loss, graph, model_id)
        self.search_tree.add_child(father_id, model_id)
        self.gpr.fit(self.x_queue, self.y_queue)
        self.x_queue = []
        self.y_queue = []

        pickle_to_file(self, os.path.join(self.path, 'searcher'))
Ejemplo n.º 2
0
    def fit(self, x, y, x_test=None, y_test=None, time_limit=None):
        x = np.array(x)
        y = np.array(y).flatten()
        validate_xy(x, y)
        y = self.transform_y(y)
        if x_test is None or y_test is None:
            # Divide training data into training and testing data.
            validation_set_size = int(len(y) * Constant.VALIDATION_SET_SIZE)
            validation_set_size = min(validation_set_size, 500)
            validation_set_size = max(validation_set_size, 1)
            x_train, x_test, y_train, y_test = train_test_split(
                x, y, test_size=validation_set_size, random_state=42)
        else:
            x_train = x
            y_train = y
        # Transform x_train
        if self.data_transformer is None:
            self.data_transformer = ImageDataTransformer(x,
                                                         augment=self.augment)

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data,
                     test_data, time_limit)
Ejemplo n.º 3
0
    def add_model(self, metric_value, loss, graph, model_id):
        if self.verbose:
            print('\nSaving model.')

        pickle_to_file(graph, os.path.join(self.path, str(model_id) + '.h5'))

        # Update best_model text file
        ret = {'model_id': model_id, 'loss': loss, 'metric_value': metric_value}
        self.history.append(ret)
        if model_id == self.get_best_model_id():
            file = open(os.path.join(self.path, 'best_model.txt'), 'w')
            file.write('best model: ' + str(model_id))
            file.close()

        if self.verbose:
            idx = ['model_id', 'loss', 'metric_value']
            header = ['Model ID', 'Loss', 'Metric Value']
            line = '|'.join(x.center(24) for x in header)
            print('+' + '-' * len(line) + '+')
            print('|' + line + '|')
            for i, r in enumerate(self.history):
                print('+' + '-' * len(line) + '+')
                line = '|'.join(str(r[x]).center(24) for x in idx)
                print('|' + line + '|')
            print('+' + '-' * len(line) + '+')

        descriptor = graph.extract_descriptor()
        self.x_queue.append(descriptor)
        self.y_queue.append(metric_value)

        return ret
Ejemplo n.º 4
0
    def add_model(self, metric_value, loss, graph, model_id):
        """Append the information of evaluated architecture to history."""
        if self.verbose:
            print('\nSaving model.')

        graph.clear_operation_history()
        pickle_to_file(graph, os.path.join(self.path, str(model_id) + '.graph'))

        ret = {'model_id': model_id, 'loss': loss, 'metric_value': metric_value}
        self.neighbour_history.append(ret)
        self.history.append(ret)

        # Update best_model text file
        if model_id == self.get_best_model_id():
            file = open(os.path.join(self.path, 'best_model.txt'), 'w')
            file.write('best model: ' + str(model_id))
            file.close()

        if self.verbose:
            idx = ['model_id', 'loss', 'metric_value']
            header = ['Model ID', 'Loss', 'Metric Value']
            line = '|'.join(x.center(24) for x in header)
            print('+' + '-' * len(line) + '+')
            print('|' + line + '|')

            if self.history:
                r = self.history[-1]
                print('+' + '-' * len(line) + '+')
                line = '|'.join(str(r[x]).center(24) for x in idx)
                print('|' + line + '|')
            print('+' + '-' * len(line) + '+')

        return ret
Ejemplo n.º 5
0
    def add_model(self, metric_value, loss, graph, model_id):
        """Append the information of evaluated architecture to history."""
        if self.verbose:
            print('\nSaving model.')

        graph.clear_operation_history()
        pickle_to_file(graph, os.path.join(self.path, str(model_id) + '.graph'))

        ret = {'model_id': model_id, 'loss': loss, 'metric_value': metric_value}
        self.neighbour_history.append(ret)
        self.history.append(ret)

        # Update best_model text file
        if model_id == self.get_best_model_id():
            file = open(os.path.join(self.path, 'best_model.txt'), 'w')
            file.write('best model: ' + str(model_id))
            file.close()

        if self.verbose:
            idx = ['model_id', 'loss', 'metric_value']
            header = ['Model ID', 'Loss', 'Metric Value']
            line = '|'.join(x.center(24) for x in header)
            print('+' + '-' * len(line) + '+')
            print('|' + line + '|')

            if self.history:
                r = self.history[-1]
                print('+' + '-' * len(line) + '+')
                line = '|'.join(str(r[x]).center(24) for x in idx)
                print('|' + line + '|')
            print('+' + '-' * len(line) + '+')

        return ret
Ejemplo n.º 6
0
    def search(self, train_data, test_data, timeout=60 * 60 * 24):
        start_time = time.time()
        torch.cuda.empty_cache()
        if not self.history:
            self.init_search()

        # Start the new process for training.
        graph, father_id, model_id = self.training_queue.pop(0)
        if self.verbose:
            print('Training model ', model_id)
        multiprocessing.set_start_method('spawn', force=True)
        pool = multiprocessing.Pool(1)
        train_results = pool.map_async(
            train, [(graph, train_data, test_data, self.trainer_args,
                     os.path.join(self.path,
                                  str(model_id) + '.png'), self.metric,
                     self.loss, self.verbose)])

        # Do the search in current thread.
        try:
            if not self.training_queue:
                new_graph, new_father_id = self.bo.optimize_acq(
                    self.search_tree.adj_list.keys(), self.descriptors,
                    timeout)
                new_model_id = self.model_count
                self.model_count += 1
                self.training_queue.append(
                    (new_graph, new_father_id, new_model_id))
                self.descriptors.append(new_graph.extract_descriptor())

                if self.verbose:
                    print('Father ID: ', new_father_id)
                    print(new_graph.operation_history)
            remaining_time = timeout - (time.time() - start_time)
            if remaining_time > 0:
                metric_value, loss, graph = train_results.get(
                    timeout=remaining_time)[0]
            else:
                raise TimeoutError
        except multiprocessing.TimeoutError as e:
            # if no model found in the time limit, raise TimeoutError
            if self.model_count == 0:
                # convert multiprocessing.TimeoutError to builtin TimeoutError for ux
                raise TimeoutError("search Timeout") from e
            # else return the result found in the time limit
            else:
                return
        finally:
            # terminate and join the subprocess to prevent any resource leak
            pool.terminate()
            pool.join()

        self.add_model(metric_value, loss, graph, model_id)
        self.search_tree.add_child(father_id, model_id)
        self.bo.fit(self.x_queue, self.y_queue)
        self.x_queue = []
        self.y_queue = []

        pickle_to_file(self, os.path.join(self.path, 'searcher'))
        self.export_json(os.path.join(self.path, 'history.json'))
Ejemplo n.º 7
0
    def add_model(self, metric_value, loss, graph, model_id):
        if self.verbose:
            print('Saving model.')

        pickle_to_file(graph, os.path.join(self.path, str(model_id) + '.h5'))

        # Update best_model text file

        if self.verbose:
            print('Model ID:', model_id)
            print('Loss:', loss)
            print('Metric Value:', metric_value)

        ret = {
            'model_id': model_id,
            'loss': loss,
            'metric_value': metric_value
        }
        self.history.append(ret)
        if model_id == self.get_best_model_id():
            file = open(os.path.join(self.path, 'best_model.txt'), 'w')
            file.write('best model: ' + str(model_id))
            file.close()

        descriptor = graph.extract_descriptor()
        self.x_queue.append(descriptor)
        self.y_queue.append(metric_value)

        return ret
Ejemplo n.º 8
0
    def search(self, x_train, y_train, x_test, y_test):
        """Override parent's search function. First model is randomly generated"""
        if not self.history:
            model = DefaultClassifierGenerator(self.n_classes,
                                               self.input_shape).generate()
            self.add_model(model, x_train, y_train, x_test, y_test)
            pickle_to_file(self, os.path.join(self.path, 'searcher'))

        else:
            model = self.load_best_model()
            new_graphs = transform(Graph(model, False))
            new_models = []
            for graph in new_graphs:
                nm_graph = Graph(model, True)
                for args in graph.operation_history:
                    getattr(nm_graph, args[0])(*list(args[1:]))
                    new_models.append(nm_graph.produce_model())
            new_models = self._remove_duplicate(list(new_models))

            for model in new_models:
                if self.model_count < constant.MAX_MODEL_NUM:
                    self.add_model(model, x_train, y_train, x_test, y_test)
                    pickle_to_file(self, os.path.join(self.path, 'searcher'))

            backend.clear_session()

        return self.load_best_model()
Ejemplo n.º 9
0
    def add_model(self, metric_value, loss, graph, model_id):
        if self.verbose:
            print('\nSaving model.')

        pickle_to_file(graph, os.path.join(self.path, str(model_id) + '.h5'))

        # Update best_model text file
        ret = {
            'model_id': model_id,
            'loss': loss,
            'metric_value': metric_value
        }
        self.history.append(ret)
        if model_id == self.get_best_model_id():
            file = open(os.path.join(self.path, 'best_model.txt'), 'w')
            file.write('best model: ' + str(model_id))
            file.close()

        if self.verbose:
            idx = ['model_id', 'loss', 'metric_value']
            header = ['Model ID', 'Loss', 'Metric Value']
            line = '|'.join(x.center(24) for x in header)
            print('+' + '-' * len(line) + '+')
            print('|' + line + '|')
            for i, r in enumerate(self.history):
                print('+' + '-' * len(line) + '+')
                line = '|'.join(str(r[x]).center(24) for x in idx)
                print('|' + line + '|')
            print('+' + '-' * len(line) + '+')

        descriptor = graph.extract_descriptor()
        self.x_queue.append(descriptor)
        self.y_queue.append(metric_value)

        return ret
Ejemplo n.º 10
0
    def add_model(self, accuracy, loss, graph, model_id):
        if self.verbose:
            print('Saving model.')

        pickle_to_file(graph, os.path.join(self.path, str(model_id) + '.h5'))

        # Update best_model text file

        if self.verbose:
            print('Model ID:', model_id)
            print('Loss:', loss)
            print('Accuracy', accuracy)

        ret = {'model_id': model_id, 'loss': loss, 'accuracy': accuracy}
        descriptor = graph.extract_descriptor()
        self.history.append(ret)
        if model_id == self.get_best_model_id():
            file = open(os.path.join(self.path, 'best_model.txt'), 'w')
            file.write('best model: ' + str(model_id))
            file.close()

        self.descriptors[descriptor] = True
        self.x_queue.append(descriptor)
        self.y_queue.append(accuracy)

        return ret
Ejemplo n.º 11
0
    def fit(self, x_train=None, y_train=None, time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through x_train, y_train.

        Args:
            x_train: An numpy.ndarray instance contains the training data.
            y_train: An numpy.ndarray instance contains the label of the training data.
            time_limit: The time limit for the search in seconds.
        """

        if y_train is None:
            y_train = []
        if x_train is None:
            x_train = []

        x_train = np.array(x_train)
        y_train = np.array(y_train).flatten()

        _validate(x_train, y_train)

        # Transform y_train.
        if self.y_encoder is None:
            self.y_encoder = OneHotEncoder()
            self.y_encoder.fit(y_train)

        y_train = self.y_encoder.transform(y_train)

        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = x_train.shape[1:]
            n_classes = self.y_encoder.n_classes
            self.searcher_args['n_classes'] = n_classes
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['verbose'] = self.verbose
            searcher = BayesianSearcher(**self.searcher_args)
            self.save_searcher(searcher)
            self.searcher = True

        # Divide training data into training and testing data.
        x_train, x_test, y_train, y_test = train_test_split(x_train,
                                                            y_train,
                                                            test_size=0.25,
                                                            random_state=42)

        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        start_time = time.time()
        while time.time() - start_time <= time_limit:
            run_searcher_once(x_train, y_train, x_test, y_test, self.path)
            if len(self.load_searcher().history) >= constant.MAX_MODEL_NUM:
                break
Ejemplo n.º 12
0
    def fit(self,
            x,
            y,
            x_test=None,
            y_test=None,
            batch_size=None,
            time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through `x_train`, `y_train`.

        Args:
            x: A numpy.ndarray instance containing the training data.
            y: A numpy.ndarray instance containing the label of the training data.
            y_test: A numpy.ndarray instance containing the testing data.
            x_test: A numpy.ndarray instance containing the label of the testing data.
            batch_size: int, define the batch size.
            time_limit: The time limit for the search in seconds.
        """
        x = text_preprocess(x, path=self.path)

        x = np.array(x)
        y = np.array(y)
        validate_xy(x, y)
        y = self.transform_y(y)

        if batch_size is None:
            batch_size = Constant.MAX_BATCH_SIZE
        # Divide training data into training and testing data.
        if x_test is None or y_test is None:
            x_train, x_test, y_train, y_test = train_test_split(
                x,
                y,
                test_size=min(Constant.VALIDATION_SET_SIZE, int(len(y) * 0.2)),
                random_state=42)
        else:
            x_train = x
            y_train = y

        # Wrap the data into DataLoaders
        if self.data_transformer is None:
            self.data_transformer = TextDataTransformer()

        train_data = self.data_transformer.transform_train(
            x_train, y_train, batch_size=batch_size)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle.dump(self, open(os.path.join(self.path, 'text_classifier'),
                               'wb'))
        pickle_to_file(self, os.path.join(self.path, 'text_classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data,
                     test_data, time_limit)
Ejemplo n.º 13
0
 def export_autokeras_model(self, model_file_name):
     """ Creates and Exports the AutoKeras model to the given filename. """
     portable_model = PortableImageRegressor(graph=self.cnn.best_model,
                                             y_encoder=self.y_encoder,
                                             data_transformer=self.data_transformer,
                                             resize_params=self.resize_shape,
                                             path=self.path)
     pickle_to_file(portable_model, model_file_name)
Ejemplo n.º 14
0
 def export_autokeras_model(self, model_file_name):
     """ Creates and Exports the AutoKeras model to the given filename. """
     portable_model = PortableImageRegressor(graph=self.cnn.best_model,
                                             y_encoder=self.y_encoder,
                                             data_transformer=self.data_transformer,
                                             resize_params=self.resize_shape,
                                             path=self.path)
     pickle_to_file(portable_model, model_file_name)
Ejemplo n.º 15
0
 def export_autokeras_model(self, model_file_name):
     """ Creates and Exports the AutoKeras model to the given filename. """
     portable_model = PortableImageSupervised(graph=self.load_searcher().load_best_model(),
                                              y_encoder=self.y_encoder,
                                              data_transformer=self.data_transformer,
                                              metric=self.metric,
                                              inverse_transform_y_method=self.inverse_transform_y)
     pickle_to_file(portable_model, model_file_name)
Ejemplo n.º 16
0
 def export_autokeras_model(self, model_file_name):
     """ Creates and Exports the AutoKeras model to the given filename. """
     portable_model = PortableImageSupervised(graph=self.load_searcher().load_best_model(),
                                              y_encoder=self.y_encoder,
                                              data_transformer=self.data_transformer,
                                              metric=self.metric,
                                              inverse_transform_y_method=self.inverse_transform_y)
     pickle_to_file(portable_model, model_file_name)
Ejemplo n.º 17
0
 def export_autokeras_model(self, model_file_name):
     """ Creates and Exports the AutoKeras model to the given filename. """
     portable_model = PortableImageSupervised(graph=self.cnn.best_model,
                                              y_encoder=self.y_encoder,
                                              data_transformer=self.data_transformer,
                                              metric=self.metric,
                                              inverse_transform_y_method=self.inverse_transform_y,
                                              resize_params=(self.resize_height, self.resize_width))
     pickle_to_file(portable_model, model_file_name)
Ejemplo n.º 18
0
 def save_preprocessors(self, path):
     configs = {}
     weights = {}
     for block in self._blocks:
         if isinstance(block, base.Preprocessor):
             configs[block.name] = block.get_config()
             weights[block.name] = block.get_weights()
     preprocessors = {'configs': configs, 'weights': weights}
     utils.pickle_to_file(preprocessors, path)
Ejemplo n.º 19
0
    def search(self, x_train, y_train, x_test, y_test):
        """Override parent's search function. First model is randomly generated"""
        while self.model_count < constant.MAX_MODEL_NUM:
            model = RandomConvClassifierGenerator(self.n_classes,
                                                  self.input_shape).generate()
            self.add_model(model, x_train, y_train, x_test, y_test)
            pickle_to_file(self, os.path.join(self.path, 'searcher'))
            backend.clear_session()

        return self.load_best_model()
Ejemplo n.º 20
0
    def fit(self,
            n_output_node,
            input_shape,
            train_data,
            test_data,
            time_limit=24 * 60 * 60):
        """ Search the best network.

        Args:
            n_output_node: A integer value represent the number of output node in the final layer.
            input_shape: A tuple to express the shape of every train entry. For example,
                MNIST dataset would be (28,28,1).
            train_data: A PyTorch DataLoader instance representing the training data.
            test_data: A PyTorch DataLoader instance representing the testing data.
            time_limit: A integer value represents the time limit on searching for models.
        """
        # Create the searcher and save on disk

        if not self.searcher:
            input_shape = input_shape[1:]
            self.searcher_args['n_output_node'] = n_output_node
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['metric'] = self.metric
            self.searcher_args['loss'] = self.loss
            self.searcher_args['generators'] = self.generators
            self.searcher_args['verbose'] = self.verbose
            pickle_to_file(self, os.path.join(self.path, 'module'))
            if self.search_type == Constant.BAYESIAN_SEARCH:
                self.searcher = BayesianSearcher(**self.searcher_args)
            elif self.search_type == Constant.GRID_SEARCH:
                self.searcher = GridSearcher(**self.searcher_args)
            else:
                self.searcher = GreedySearcher(**self.searcher_args)

        start_time = time.time()
        time_remain = time_limit
        try:
            while time_remain > 0:
                self.searcher.search(train_data, test_data, int(time_remain))
                pickle_to_file(self, os.path.join(self.path, 'module'))
                if len(self.searcher.history) >= Constant.MAX_MODEL_NUM:
                    break
                time_elapsed = time.time() - start_time
                time_remain = time_limit - time_elapsed
            # if no search executed during the time_limit, then raise an error
            if time_remain <= 0:
                raise TimeoutError
        except TimeoutError:
            if len(self.searcher.history) == 0:
                raise TimeoutError(
                    "Search Time too short. No model was found during the search time."
                )
            elif self.verbose:
                print('Time is out.')
Ejemplo n.º 21
0
 def export_autokeras_model(self, model_file_name):
     """
     Creates and Exports the AutoKeras model to the given filename.
     Args:
         model_file_name: the name of the model to safe
     """
     portable_model = PortableRegressor(
         graph=self.cnn.best_model,
         y_encoder=self.y_encoder,
         data_transformer=self.data_transformer,
         path=self.path)
     pickle_to_file(portable_model, model_file_name)
Ejemplo n.º 22
0
    def save_preprocessors(self, path):
        """Save the preprocessors in the hypermodel in a single file.

        Args:
            path: String. The path to a single file.
        """
        if self.contains_hyper_block():
            self._plain_graph_hm.save_preprocessors(path)
            return
        preprocessors = {}
        for block in self._blocks:
            if isinstance(block, preprocessor.Preprocessor):
                preprocessors[block.name] = block.get_state()
        utils.pickle_to_file(preprocessors, path)
Ejemplo n.º 23
0
 def export_autokeras_model(self, model_file_name):
     """ Creates and Exports the AutoKeras model to the given filename.
     
     Args:
         model_file_name: A string containing the name of the file to which the model should be saved
     
     Effects:
         Saves the AutoKeras model to a file.
     """
     portable_model = PortableImageClassifier(graph=self.cnn.best_model,
                                              y_encoder=self.y_encoder,
                                              data_transformer=self.data_transformer,
                                              resize_params=self.resize_shape,
                                              path=self.path)
     pickle_to_file(portable_model, model_file_name)
Ejemplo n.º 24
0
    def fit(self, x_train=None, y_train=None, time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through `x_train`, `y_train`.

        Args:
            x_train: A numpy.ndarray instance containing the training data.
            y_train: A numpy.ndarray instance containing the label of the training data.
            time_limit: The time limit for the search in seconds.
        """
        if y_train is None:
            y_train = []
        if x_train is None:
            x_train = []

        x_train = np.array(x_train)
        y_train = np.array(y_train).flatten()

        _validate(x_train, y_train)

        y_train = self.transform_y(y_train)

        # Transform x_train
        if self.data_transformer is None:
            self.data_transformer = DataTransformer(x_train, augment=self.augment)

        # Divide training data into training and testing data.
        x_train, x_test, y_train, y_test = train_test_split(x_train, y_train,
                                                            test_size=min(Constant.VALIDATION_SET_SIZE,
                                                                          int(len(y_train) * 0.2)),
                                                            random_state=42)

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data, test_data, time_limit)
Ejemplo n.º 25
0
    def save_preprocessors(self, path):
        """Save the preprocessors in the hypermodel in a single file.

        Args:
            path: String. The path to a single file.
        """
        if self.contains_hyper_block():
            self._plain_graph_hm.save_preprocessors(path)
            return
        configs = {}
        weights = {}
        for block in self._blocks:
            if isinstance(block, preprocessor.Preprocessor):
                configs[block.name] = block.get_config()
                weights[block.name] = block.get_weights()
        preprocessors = {'configs': configs, 'weights': weights}
        utils.pickle_to_file(preprocessors, path)
Ejemplo n.º 26
0
    def fit(self, x, y, x_test=None, y_test=None, time_limit=None):
        x = np.array(x)

        if len(x.shape) != 0 and len(x[0].shape) == 3:
            if self.verbose:
                print("Preprocessing the images.")
            self.resize_height, self.resize_width = compute_image_resize_params(
                x)
            x = resize_image_data(x, self.resize_height, self.resize_width)
            if x_test is not None:
                x_test = resize_image_data(x_test, self.resize_height,
                                           self.resize_width)
            if self.verbose:
                print("Preprocessing finished.")

        y = np.array(y).flatten()
        validate_xy(x, y)
        y = self.transform_y(y)
        if x_test is None or y_test is None:
            # Divide training data into training and testing data.
            validation_set_size = int(len(y) * Constant.VALIDATION_SET_SIZE)
            validation_set_size = min(validation_set_size, 500)
            validation_set_size = max(validation_set_size, 1)
            x_train, x_test, y_train, y_test = train_test_split(
                x, y, test_size=validation_set_size, random_state=42)
        else:
            x_train = x
            y_train = y
        # Transform x_train
        if self.data_transformer is None:
            self.data_transformer = ImageDataTransformer(x,
                                                         augment=self.augment)

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data,
                     test_data, time_limit)
Ejemplo n.º 27
0
    def fit(self, x, y, time_limit=None):
        """Find the best neural architecture for classifying the training data and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset must be in numpy.ndarray format.
        The training and validation data should be passed through `x`, `y`. This method will automatically split
        the training and validation data into training and validation sets.

        Args:
            x: A numpy.ndarray instance containing the training data or the training data combined with the
               validation data.
            y: A numpy.ndarray instance containing the labels of the training data. or the label of the training data
               combined with the validation label.
            time_limit: The time limit for the search in seconds. (optional, default = None, which turns into 24 hours in method)
            
        Effects:
            Trains a model that fits the data using the best neural architecture
        """
        validate_xy(x, y)
        y, flags = y[:, 0], y[:, 1]
        y = self.transform_y(y)
        # Divide training data into training and validation data.
        validation_set_size = int(len(y) * Constant.VALIDATION_SET_SIZE)
        validation_set_size = min(validation_set_size, 500)
        validation_set_size = max(validation_set_size, 1)
        import numpy as np
        y = np.concatenate((y, flags.reshape(-1, 1)), axis=1)
        x_train, x_valid, y_train, y_valid = train_test_split(
            x, y, test_size=validation_set_size, random_state=42)
        self.init_transformer(x)
        # Transform x_train

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        valid_data = self.data_transformer.transform_test(x_valid, y_valid)

        # Save the classifier
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data,
                     valid_data, time_limit)
Ejemplo n.º 28
0
    def fit(self, x, y, time_limit=None):
        """Find the best neural architecture for classifying the training data and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset must be in numpy.ndarray format.
        The training and validation data should be passed through `x`, `y`. This method will automatically split
        the training and validation data into training and validation sets.

        Args:
            x: A numpy.ndarray instance containing the training data or the training data combined with the
               validation data.
            y: A numpy.ndarray instance containing the labels of the training data. or the label of the training data
               combined with the validation label.
            time_limit: The time limit for the search in seconds. (optional, default = None, which turns into 24 hours in method)
            
        Effects:
            Trains a model that fits the data using the best neural architecture
        """
        validate_xy(x, y)
        y = self.transform_y(y)
        # Divide training data into training and validation data.
        validation_set_size = int(len(y) * Constant.VALIDATION_SET_SIZE)
        validation_set_size = min(validation_set_size, 500)
        validation_set_size = max(validation_set_size, 1)
        x_train, x_valid, y_train, y_valid = train_test_split(x, y,
                                                            test_size=validation_set_size,
                                                            random_state=42)
        # DEVELOPERS - WHY DOES THIS TRANSFORMER OCCUR AFTER SPLITTING THE DATA?
        self.init_transformer(x)
        # Transform x_train

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        valid_data = self.data_transformer.transform_test(x_valid, y_valid)

        # Save the classifier
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        # DEVELOPERS - WHY NOT PUT THIS BIT IN THE FUNCTION DEFINITION IF time_limit IS BEING ASSIGNED A DEFAULT VALUE OF None?
        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data, valid_data, time_limit)
Ejemplo n.º 29
0
    def fit(self, n_output_node, input_shape, train_data, test_data, time_limit=24 * 60 * 60):
        """ Search the best network.

        Args:
            n_output_node: A integer value represent the number of output node in the final layer.
            input_shape: A tuple to express the shape of every train entry. For example,
                MNIST dataset would be (28,28,1).
            train_data: A PyTorch DataLoader instance representing the training data.
            test_data: A PyTorch DataLoader instance representing the testing data.
            time_limit: A integer value represents the time limit on searching for models.
        """
        # Create the searcher and save on disk

        if not self.searcher:
            input_shape = input_shape[1:]
            self.searcher_args['n_output_node'] = n_output_node
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['metric'] = self.metric
            self.searcher_args['loss'] = self.loss
            self.searcher_args['generators'] = self.generators
            self.searcher_args['verbose'] = self.verbose
            pickle_to_file(self, os.path.join(self.path, 'module'))
            self.searcher = self.search_type(**self.searcher_args)

        start_time = time.time()
        time_remain = time_limit
        try:
            while time_remain > 0:
                self.searcher.search(train_data, test_data, int(time_remain))
                pickle_to_file(self, os.path.join(self.path, 'module'))
                if len(self.searcher.history) >= Constant.MAX_MODEL_NUM:
                    break
                time_elapsed = time.time() - start_time
                time_remain = time_limit - time_elapsed
            # if no search executed during the time_limit, then raise an error
            if time_remain <= 0:
                raise TimeoutError
        except TimeoutError:
            if len(self.searcher.history) == 0:
                raise TimeoutError("Search Time too short. No model was found during the search time.")
            elif self.verbose:
                print('Time is out.')
Ejemplo n.º 30
0
    def final_fit(self,
                  train_data,
                  test_data,
                  trainer_args=None,
                  retrain=False):
        """Final training after found the best architecture.

        Args:
            trainer_args: A dictionary containing the parameters of the ModelTrainer constructor.
            retrain: A boolean of whether reinitialize the weights of the model.
            train_data: A DataLoader instance representing the training data.
            test_data: A DataLoader instance representing the testing data.
        """
        graph = self.searcher.load_best_model()

        if retrain:
            graph.weighted = False
        _, _1, graph = train(None, graph, train_data, test_data, trainer_args,
                             self.metric, self.loss, self.verbose, self.path)
        self.searcher.replace_model(graph, self.searcher.get_best_model_id())
        pickle_to_file(self, os.path.join(self.path, 'module'))
Ejemplo n.º 31
0
    def final_fit(self,
                  x_train,
                  y_train,
                  x_test,
                  y_test,
                  trainer_args=None,
                  retrain=True):
        x_train = self.preprocess(x_train)
        x_test = self.preprocess(x_test)

        self.encoder.fit(y_train)
        y_train = self.encoder.transform(y_train)
        y_test = self.encoder.transform(y_test)

        self.data_transformer = ImageDataTransformer(x_train,
                                                     augment=self.augment)
        train_data = self.data_transformer.transform_train(x_train,
                                                           y_train,
                                                           batch_size=1)
        test_data = self.data_transformer.transform_test(x_test,
                                                         y_test,
                                                         batch_size=1)

        self.net = CnnGenerator(self.encoder.n_classes, x_train.shape[1:]) \
            .generate(model_len=self.Length, model_width=self.Width).produce_model()

        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        self.model_trainer = ModelTrainer(self.net,
                                          path=self.path,
                                          loss_function=classification_loss,
                                          metric=Accuracy,
                                          train_data=train_data,
                                          test_data=test_data,
                                          verbose=True)

        self.model_trainer.train_model(self.Epochs, 3)
        print('Finished Final Fit')
Ejemplo n.º 32
0
    def final_fit(self, train_data, test_data, trainer_args=None, retrain=False):
        """Final training after found the best architecture.

        Args:
            train_data: A DataLoader instance representing the training data.
            test_data: A DataLoader instance representing the testing data.
            trainer_args: A dictionary containing the parameters of the ModelTrainer constructor.
            retrain: A boolean of whether reinitialize the weights of the model.
        """
        graph = self.searcher.load_best_model()

        if retrain:
            graph.weighted = False
        _, _1, graph = train(None, graph,
                             train_data,
                             test_data,
                             trainer_args,
                             self.metric,
                             self.loss,
                             self.verbose,
                             self.path)
        self.searcher.replace_model(graph, self.searcher.get_best_model_id())
        pickle_to_file(self, os.path.join(self.path, 'module'))
Ejemplo n.º 33
0
    def search(self, train_data, test_data):
        torch.cuda.empty_cache()
        if not self.history:
            self.init_search()

        # Start the new process for training.
        graph, father_id, model_id = self.training_queue.pop(0)
        if self.verbose:
            print('Training model ', model_id)
        multiprocessing.set_start_method('spawn', force=True)
        pool = multiprocessing.Pool(1)
        train_results = pool.map_async(
            train, [(graph, train_data, test_data, self.trainer_args,
                     os.path.join(self.path,
                                  str(model_id) + '.png'), self.verbose)])

        # Do the search in current thread.
        if not self.training_queue:
            new_graph, new_father_id = self.maximize_acq()
            new_model_id = self.model_count
            self.model_count += 1
            self.training_queue.append(
                (new_graph, new_father_id, new_model_id))
            descriptor = new_graph.extract_descriptor()
            self.descriptors.append(new_graph.extract_descriptor())

        accuracy, loss, graph = train_results.get()[0]
        pool.terminate()
        pool.join()
        self.add_model(accuracy, loss, graph, model_id)
        self.search_tree.add_child(father_id, model_id)
        self.gpr.fit(self.x_queue, self.y_queue)
        self.x_queue = []
        self.y_queue = []

        pickle_to_file(self, os.path.join(self.path, 'searcher'))
        self.export_json(os.path.join(self.path, 'history.json'))
Ejemplo n.º 34
0
    def fit(self, x, y, time_limit=None):
        validate_xy(x, y)
        y = self.transform_y(y)
        # Divide training data into training and testing data.
        validation_set_size = int(len(y) * Constant.VALIDATION_SET_SIZE)
        validation_set_size = min(validation_set_size, 500)
        validation_set_size = max(validation_set_size, 1)
        x_train, x_test, y_train, y_test = train_test_split(x, y,
                                                            test_size=validation_set_size,
                                                            random_state=42)
        self.init_transformer(x)
        # Transform x_train

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data, test_data, time_limit)
Ejemplo n.º 35
0
    def fit(self, x, y, time_limit=None):
        validate_xy(x, y)
        y = self.transform_y(y)
        # Divide training data into training and testing data.
        validation_set_size = int(len(y) * Constant.VALIDATION_SET_SIZE)
        validation_set_size = min(validation_set_size, 500)
        validation_set_size = max(validation_set_size, 1)
        x_train, x_test, y_train, y_test = train_test_split(
            x, y, test_size=validation_set_size, random_state=42)
        self.init_transformer(x)
        # Transform x_train

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data,
                     test_data, time_limit)
Ejemplo n.º 36
0
    def search(self, x_train, y_train, x_test, y_test):
        if not self.history:
            model = DefaultClassifierGenerator(self.n_classes,
                                               self.input_shape).generate(
                                                   self.default_model_len,
                                                   self.default_model_width)
            history_item = self.add_model(model, x_train, y_train, x_test,
                                          y_test)
            self.search_tree.add_child(-1, history_item['model_id'])

            graph = Graph(model)
            self.init_search_queue = []
            # for child_graph in transform(graph):
            #     self.init_search_queue.append((child_graph, history_item['model_id']))
            self.init_gpr_x.append(graph.extract_descriptor())
            self.init_gpr_y.append(history_item['accuracy'])
            pickle_to_file(self, os.path.join(self.path, 'searcher'))
            return

        if self.init_search_queue:
            graph, father_id = self.init_search_queue.pop()
            model = graph.produce_model()
            history_item = self.add_model(model, x_train, y_train, x_test,
                                          y_test)
            self.search_tree.add_child(father_id, history_item['model_id'])
            self.init_gpr_x.append(graph.extract_descriptor())
            self.init_gpr_y.append(history_item['accuracy'])
            pickle_to_file(self, os.path.join(self.path, 'searcher'))
            return

        if not self.init_search_queue and not self.gpr.first_fitted:
            self.gpr.first_fit(self.init_gpr_x, self.init_gpr_y)

        new_model, father_id = self.maximize_acq()

        history_item = self.add_model(new_model, x_train, y_train, x_test,
                                      y_test)
        self.search_tree.add_child(father_id, history_item['model_id'])
        self.gpr.incremental_fit(
            Graph(new_model).extract_descriptor(), history_item['accuracy'])
        pickle_to_file(self, os.path.join(self.path, 'searcher'))
Ejemplo n.º 37
0
    def search(self, train_data, test_data, timeout=60 * 60 * 24):
        start_time = time.time()
        torch.cuda.empty_cache()
        if not self.history:
            self.init_search()

        # Start the new process for training.
        graph, father_id, model_id = self.training_queue.pop(0)
        if self.verbose:
            print('\n')
            print('╒' + '=' * 46 + '╕')
            print('|' + 'Training model {}'.format(model_id).center(46) + '|')
            print('╘' + '=' * 46 + '╛')
        mp.set_start_method('spawn', force=True)
        pool = mp.Pool(1)
        train_results = pool.map_async(train, [(graph, train_data, test_data, self.trainer_args,
                                                os.path.join(self.path, str(model_id) + '.png'),
                                                self.metric, self.loss, self.verbose)])

        # Do the search in current thread.
        try:
            if not self.training_queue:
                new_graph, new_father_id = self.bo.optimize_acq(self.search_tree.adj_list.keys(),
                                                                self.descriptors,
                                                                timeout)
                # Did not found a new architecture
                if new_father_id is None:
                    return
                new_model_id = self.model_count
                self.model_count += 1
                self.training_queue.append((new_graph, new_father_id, new_model_id))
                self.descriptors.append(new_graph.extract_descriptor())

                if self.verbose:
                    cell_size = [24, 49]
                    header = ['Father Model ID', 'Added Operation']
                    line = '|'.join(str(x).center(cell_size[i]) for i, x in enumerate(header))
                    print('\n' + '+' + '-' * len(line) + '+')
                    print('|' + line + '|')
                    print('+' + '-' * len(line) + '+')
                    for i in range(len(new_graph.operation_history)):
                        if i == len(new_graph.operation_history) // 2:
                            r = [new_father_id, new_graph.operation_history[i]]
                        else:
                            r = [' ', new_graph.operation_history[i]]
                        line = '|'.join(str(x).center(cell_size[i]) for i, x in enumerate(r))
                        print('|' + line + '|')
                    print('+' + '-' * len(line) + '+')
            remaining_time = timeout - (time.time() - start_time)
            if remaining_time > 0:
                metric_value, loss, graph = train_results.get(timeout=remaining_time)[0]
            else:
                raise TimeoutError
        except (mp.TimeoutError, TimeoutError) as e:
            raise TimeoutError from e
        finally:
            # terminate and join the subprocess to prevent any resource leak
            pool.close()
            pool.join()
        self.add_model(metric_value, loss, graph, model_id)
        self.search_tree.add_child(father_id, model_id)
        self.bo.fit(self.x_queue, self.y_queue)
        self.x_queue = []
        self.y_queue = []

        pickle_to_file(self, os.path.join(self.path, 'searcher'))
        self.export_json(os.path.join(self.path, 'history.json'))
Ejemplo n.º 38
0
    def fit(self, x_train=None, y_train=None, time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through `x_train`, `y_train`.

        Args:
            x_train: A numpy.ndarray instance containing the training data.
            y_train: A numpy.ndarray instance containing the label of the training data.
            time_limit: The time limit for the search in seconds.
        """
        if y_train is None:
            y_train = []
        if x_train is None:
            x_train = []

        x_train = np.array(x_train)
        y_train = np.array(y_train).flatten()

        _validate(x_train, y_train)

        y_train = self.transform_y(y_train)

        # Transform x_train
        if self.data_transformer is None:
            self.data_transformer = DataTransformer(x_train,
                                                    augment=self.augment)

        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = x_train.shape[1:]
            self.searcher_args['n_output_node'] = self.get_n_output_node()
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['metric'] = self.metric
            self.searcher_args['loss'] = self.loss
            self.searcher_args['verbose'] = self.verbose
            searcher = Searcher(**self.searcher_args)
            self.save_searcher(searcher)
            self.searcher = True

        # Divide training data into training and testing data.
        x_train, x_test, y_train, y_test = train_test_split(
            x_train,
            y_train,
            test_size=min(Constant.VALIDATION_SET_SIZE,
                          int(len(y_train) * 0.2)),
            random_state=42)

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        start_time = time.time()
        time_remain = time_limit
        try:
            while time_remain > 0:
                run_searcher_once(train_data, test_data, self.path,
                                  int(time_remain))
                if len(self.load_searcher().history) >= Constant.MAX_MODEL_NUM:
                    break
                time_elapsed = time.time() - start_time
                time_remain = time_limit - time_elapsed
            # if no search executed during the time_limit, then raise an error
            if time_remain <= 0:
                raise TimeoutError
        except TimeoutError:
            if len(self.load_searcher().history) == 0:
                raise TimeoutError(
                    "Search Time too short. No model was found during the search time."
                )
            elif self.verbose:
                print('Time is out.')
Ejemplo n.º 39
0
    def fit(self, x_train=None, y_train=None, time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through `x_train`, `y_train`.

        Args:
            x_train: A numpy.ndarray instance containing the training data.
            y_train: A numpy.ndarray instance containing the label of the training data.
            time_limit: The time limit for the search in seconds.
        """
        if y_train is None:
            y_train = []
        if x_train is None:
            x_train = []

        x_train = np.array(x_train)
        y_train = np.array(y_train).flatten()

        _validate(x_train, y_train)

        y_train = self.transform_y(y_train)

        # Transform x_train
        if self.data_transformer is None:
            self.data_transformer = DataTransformer(x_train, augment=self.augment)

        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = x_train.shape[1:]
            self.searcher_args['n_output_node'] = self.get_n_output_node()
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['metric'] = self.metric
            self.searcher_args['loss'] = self.loss
            self.searcher_args['verbose'] = self.verbose
            searcher = Searcher(**self.searcher_args)
            self.save_searcher(searcher)
            self.searcher = True

        # Divide training data into training and testing data.
        x_train, x_test, y_train, y_test = train_test_split(x_train, y_train,
                                                            test_size=min(Constant.VALIDATION_SET_SIZE,
                                                                          int(len(y_train) * 0.2)),
                                                            random_state=42)

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        start_time = time.time()
        time_remain = time_limit
        try:
            while time_remain > 0:
                run_searcher_once(train_data, test_data, self.path, int(time_remain))
                if len(self.load_searcher().history) >= Constant.MAX_MODEL_NUM:
                    break
                time_elapsed = time.time() - start_time
                time_remain = time_limit - time_elapsed
            # if no search executed during the time_limit, then raise an error
            if time_remain <= 0:
                raise TimeoutError
        except TimeoutError:
            if len(self.load_searcher().history) == 0:
                raise TimeoutError("Search Time too short. No model was found during the search time.")
            elif self.verbose:
                print('Time is out.')
Ejemplo n.º 40
0
 def replace_model(self, graph, model_id):
     pickle_to_file(graph, os.path.join(self.path, str(model_id) + '.h5'))