Ejemplo n.º 1
0
    def fit(self,
            n_output_node,
            input_shape,
            train_data,
            test_data,
            time_limit=24 * 60 * 60):
        """ Search the best CnnModule.

        Args:
            n_output_node: A integer value represent the number of output node in the final layer.
            input_shape: A tuple to express the shape of every train entry. For example,
                MNIST dataset would be (28,28,1)
            train_data: A PyTorch DataLoader instance represents the training data
            test_data: A PyTorch DataLoader instance represents the testing data
            time_limit: A integer value represents the time limit on searching for models.
        """
        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = input_shape[1:]
            self.searcher_args['n_output_node'] = n_output_node
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['metric'] = self.metric
            self.searcher_args['loss'] = self.loss
            self.searcher_args['verbose'] = self.verbose
            searcher = Searcher(**self.searcher_args)
            self._save_searcher(searcher)
            self.searcher = True

        start_time = time.time()
        time_remain = time_limit
        try:
            while time_remain > 0:
                searcher = pickle_from_file(os.path.join(
                    self.path, 'searcher'))
                searcher.search(train_data, test_data, int(time_remain))
                if len(self._load_searcher().history
                       ) >= Constant.MAX_MODEL_NUM:
                    break
                time_elapsed = time.time() - start_time
                time_remain = time_limit - time_elapsed
            # if no search executed during the time_limit, then raise an error
            if time_remain <= 0:
                raise TimeoutError
        except TimeoutError:
            if len(self._load_searcher().history) == 0:
                raise TimeoutError(
                    "Search Time too short. No model was found during the search time."
                )
            elif self.verbose:
                print('Time is out.')
Ejemplo n.º 2
0
    def fit(self,
            n_output_node,
            input_shape,
            train_data,
            test_data,
            time_limit=24 * 60 * 60):
        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = input_shape[1:]
            self.searcher_args['n_output_node'] = n_output_node
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['metric'] = self.metric
            self.searcher_args['loss'] = self.loss
            self.searcher_args['verbose'] = self.verbose
            searcher = Searcher(**self.searcher_args)
            self._save_searcher(searcher)
            self.searcher = True

        start_time = time.time()
        time_remain = time_limit
        try:
            while time_remain > 0:
                _run_searcher_once(train_data, test_data, self.path,
                                   int(time_remain))
                if len(self._load_searcher().history
                       ) >= Constant.MAX_MODEL_NUM:
                    break
                time_elapsed = time.time() - start_time
                time_remain = time_limit - time_elapsed
            # if no search executed during the time_limit, then raise an error
            if time_remain <= 0:
                raise TimeoutError
        except TimeoutError:
            if len(self._load_searcher().history) == 0:
                raise TimeoutError(
                    "Search Time too short. No model was found during the search time."
                )
            elif self.verbose:
                print('Time is out.')
Ejemplo n.º 3
0
class NetworkModule:
    """ Class to create a network module.

    Attributes:
        loss: A function taking two parameters, the predictions and the ground truth.
        metric: An instance of the Metric subclasses.
        searcher_args: A dictionary containing the parameters for the searcher's __init__ function.
        searcher: An instance of the Searcher class.
        path: A string. The path to the directory to save the searcher.
        verbose: A boolean. Setting it to true prints to stdout.
        generators: A list of instances of the NetworkGenerator class or its subclasses.
    """
    def __init__(self, loss, metric, searcher_args, path, verbose=False):
        self.searcher_args = searcher_args
        self.searcher = None
        self.path = path
        self.verbose = verbose
        self.loss = loss
        self.metric = metric
        self.generators = []

    def fit(self,
            n_output_node,
            input_shape,
            train_data,
            test_data,
            time_limit=24 * 60 * 60):
        """ Search the best network.

        Args:
            n_output_node: A integer value represent the number of output node in the final layer.
            input_shape: A tuple to express the shape of every train entry. For example,
                MNIST dataset would be (28,28,1).
            train_data: A PyTorch DataLoader instance representing the training data.
            test_data: A PyTorch DataLoader instance representing the testing data.
            time_limit: A integer value represents the time limit on searching for models.
        """
        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = input_shape[1:]
            self.searcher_args['n_output_node'] = n_output_node
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['metric'] = self.metric
            self.searcher_args['loss'] = self.loss
            self.searcher_args['generators'] = self.generators
            self.searcher_args['verbose'] = self.verbose
            self.searcher = Searcher(**self.searcher_args)
            pickle_to_file(self, os.path.join(self.path, 'module'))

        start_time = time.time()
        time_remain = time_limit
        try:
            while time_remain > 0:
                self.searcher.search(train_data, test_data, int(time_remain))
                pickle_to_file(self, os.path.join(self.path, 'module'))
                if len(self.searcher.history) >= Constant.MAX_MODEL_NUM:
                    break
                time_elapsed = time.time() - start_time
                time_remain = time_limit - time_elapsed
            # if no search executed during the time_limit, then raise an error
            if time_remain <= 0:
                raise TimeoutError
        except TimeoutError:
            if len(self.searcher.history) == 0:
                raise TimeoutError(
                    "Search Time too short. No model was found during the search time."
                )
            elif self.verbose:
                print('Time is out.')

    def final_fit(self,
                  train_data,
                  test_data,
                  trainer_args=None,
                  retrain=False):
        """Final training after found the best architecture.

        Args:
            trainer_args: A dictionary containing the parameters of the ModelTrainer constructor.
            retrain: A boolean of whether reinitialize the weights of the model.
            train_data: A DataLoader instance representing the training data.
            test_data: A DataLoader instance representing the testing data.
        """
        graph = self.searcher.load_best_model()

        if retrain:
            graph.weighted = False
        _, _1, graph = train(None, graph, train_data, test_data, trainer_args,
                             self.metric, self.loss, self.verbose, self.path)
        self.searcher.replace_model(graph, self.searcher.get_best_model_id())
        pickle_to_file(self, os.path.join(self.path, 'module'))

    @property
    def best_model(self):
        return self.searcher.load_best_model()
Ejemplo n.º 4
0
    def fit(self, x_train=None, y_train=None, time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through `x_train`, `y_train`.

        Args:
            x_train: A numpy.ndarray instance containing the training data.
            y_train: A numpy.ndarray instance containing the label of the training data.
            time_limit: The time limit for the search in seconds.
        """
        if y_train is None:
            y_train = []
        if x_train is None:
            x_train = []

        x_train = np.array(x_train)
        y_train = np.array(y_train).flatten()

        _validate(x_train, y_train)

        y_train = self.transform_y(y_train)

        # Transform x_train
        if self.data_transformer is None:
            self.data_transformer = DataTransformer(x_train,
                                                    augment=self.augment)

        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = x_train.shape[1:]
            self.searcher_args['n_output_node'] = self.get_n_output_node()
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['metric'] = self.metric
            self.searcher_args['loss'] = self.loss
            self.searcher_args['verbose'] = self.verbose
            searcher = Searcher(**self.searcher_args)
            self.save_searcher(searcher)
            self.searcher = True

        # Divide training data into training and testing data.
        x_train, x_test, y_train, y_test = train_test_split(
            x_train,
            y_train,
            test_size=min(Constant.VALIDATION_SET_SIZE,
                          int(len(y_train) * 0.2)),
            random_state=42)

        # Wrap the data into DataLoaders
        train_data = self.data_transformer.transform_train(x_train, y_train)
        test_data = self.data_transformer.transform_test(x_test, y_test)

        # Save the classifier
        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        start_time = time.time()
        time_remain = time_limit
        try:
            while time_remain > 0:
                run_searcher_once(train_data, test_data, self.path,
                                  int(time_remain))
                if len(self.load_searcher().history) >= Constant.MAX_MODEL_NUM:
                    break
                time_elapsed = time.time() - start_time
                time_remain = time_limit - time_elapsed
            # if no search executed during the time_limit, then raise an error
            if time_remain <= 0:
                raise TimeoutError
        except TimeoutError:
            if len(self.load_searcher().history) == 0:
                raise TimeoutError(
                    "Search Time too short. No model was found during the search time."
                )
            elif self.verbose:
                print('Time is out.')
Ejemplo n.º 5
0
    def fit_dataset(self,
                    train_root,
                    train_csv_file,
                    test_root,
                    test_csv_file,
                    time_limit=None):
        """

        :param train_data_root:
        :param test_data_root:
        :param time_limit:
        :return:
        """
        # loading data
        train_dataset = MyData(csv_file=train_csv_file,
                               root=train_root,
                               test=False)
        train_data = torch.utils.data.DataLoader(
            train_dataset,
            # TODO
            batch_size=8,
            shuffle=True,
            pin_memory=True)
        test_dataset = MyData(csv_file=test_csv_file,
                              root=test_root,
                              test=True)
        test_data = torch.utils.data.DataLoader(
            test_dataset,
            # TODO
            batch_size=8,
            shuffle=False,
            pin_memory=True)

        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = (224, 224, 3)
            self.searcher_args['n_output_node'] = 4
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['metric'] = self.metric
            self.searcher_args['loss'] = self.loss
            self.searcher_args['verbose'] = self.verbose
            searcher = Searcher(**self.searcher_args)
            self.save_searcher(searcher)
            self.searcher = True

        # Save the classifier
        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        start_time = time.time()
        time_remain = time_limit
        try:
            while time_remain > 0:
                run_searcher_once(train_data, test_data, self.path,
                                  int(time_remain))
                if len(self.load_searcher().history) >= Constant.MAX_MODEL_NUM:
                    break
                time_elapsed = time.time() - start_time
                time_remain = time_limit - time_elapsed
            # if no search executed during the time_limit, then raise an error
            if time_remain <= 0:
                raise TimeoutError
        except TimeoutError:
            if len(self.load_searcher().history) == 0:
                raise TimeoutError(
                    "Search Time too short. No model was found during the search time."
                )
            elif self.verbose:
                print('Time is out.')
Ejemplo n.º 6
0
    def fit(self,
            x_train=None,
            y_train=None,
            batch_size=None,
            time_limit=None):
        """Find the best neural architecture and train it.

        Based on the given dataset, the function will find the best neural architecture for it.
        The dataset is in numpy.ndarray format.
        So they training data should be passed through `x_train`, `y_train`.

        Args:
            x_train: A numpy.ndarray instance containing the training data.
            y_train: A numpy.ndarray instance containing the label of the training data.
            time_limit: The time limit for the search in seconds.
        """
        if y_train is None:
            y_train = []
        if x_train is None:
            x_train = []
        if self.augment:
            x_train = text_preprocess(x_train, path=self.path)

        x_train = np.array(x_train)
        y_train = np.array(y_train)
        _validate(x_train, y_train)
        y_train = self.transform_y(y_train)

        if batch_size is None:
            batch_size = Constant.MAX_BATCH_SIZE
        # Create the searcher and save on disk
        if not self.searcher:
            input_shape = x_train.shape[1:]
            self.searcher_args['n_output_node'] = self.get_n_output_node()
            self.searcher_args['input_shape'] = input_shape
            self.searcher_args['path'] = self.path
            self.searcher_args['metric'] = self.metric
            self.searcher_args['loss'] = self.loss
            self.searcher_args['verbose'] = self.verbose
            searcher = Searcher(**self.searcher_args)
            self.save_searcher(searcher)
            self.searcher = True

        # Divide training data into training and testing data.
        x_train, x_test, y_train, y_test = train_test_split(
            x_train,
            y_train,
            test_size=min(Constant.VALIDATION_SET_SIZE,
                          int(len(y_train) * 0.2)),
            random_state=42)

        # Wrap the data into DataLoaders
        train_data = text_dataloader(x_train,
                                     y_train,
                                     batch_size=batch_size,
                                     shuffle=True)
        test_data = text_dataloader(x_test, y_test, shuffle=True)

        # Save the classifier
        pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb'))
        pickle_to_file(self, os.path.join(self.path, 'classifier'))

        if time_limit is None:
            time_limit = 24 * 60 * 60

        self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data,
                     test_data, time_limit)