Esempio n. 1
0
    def search(self, train_data, test_data, timeout=60 * 60 * 24):
        """Run the search loop of training, generating and updating once.

        The function will run the training and generate in parallel.
        Then it will update the controller.
        The training is just pop out a graph from the training_queue and train it.
        The generate will call the self.generate function.
        The update will call the self.update function.

        Args:
            train_data: An instance of DataLoader.
            test_data: An instance of Dataloader.
            timeout: An integer, time limit in seconds.
        """
        torch.cuda.empty_cache()
        if not self.history:
            self.init_search()

        self._timeout = time.time(
        ) + timeout if timeout is not None else sys.maxsize
        self.trainer_args['timeout'] = timeout
        # Start the new process for training.
        graph, other_info, model_id = self.training_queue.pop(0)
        if self.verbose:
            print('\n')
            print('+' + '-' * 46 + '+')
            print('|' + 'Training model {}'.format(model_id).center(46) + '|')
            print('+' + '-' * 46 + '+')
        # Temporary solution to support GOOGLE Colab
        if get_system() == Constant.SYS_GOOGLE_COLAB:
            # When using Google Colab, use single process for searching and training.
            self.sp_search(graph, other_info, model_id, train_data, test_data)
        else:
            # Use two processes
            self.mp_search(graph, other_info, model_id, train_data, test_data)
Esempio n. 2
0
    def search(self, train_data, test_data, timeout=60 * 60 * 24):
        """Run the search loop of training, generating and updating once.

        The function will run the training and generate in parallel.
        Then it will update the controller.
        The training is just pop out a graph from the training_queue and train it.
        The generate will call the self.generate function.
        The update will call the self.update function.

        Args:
            train_data: An instance of DataLoader.
            test_data: An instance of Dataloader.
            timeout: An integer, time limit in seconds.
        """
        torch.cuda.empty_cache()
        if not self.history:
            self.init_search()

        self._timeout = time.time() + timeout if timeout is not None else sys.maxsize
        self.trainer_args['timeout'] = timeout
        # Start the new process for training.
        graph, other_info, model_id = self.training_queue.pop(0)
        if self.verbose:
            print('\n')
            print('+' + '-' * 46 + '+')
            print('|' + 'Training model {}'.format(model_id).center(46) + '|')
            print('+' + '-' * 46 + '+')
        # Temporary solution to support GOOGLE Colab
        if get_system() == Constant.SYS_GOOGLE_COLAB:
            # When using Google Colab, use single process for searching and training.
            self.sp_search(graph, other_info, model_id, train_data, test_data)
        else:
            # Use two processes
            self.mp_search(graph, other_info, model_id, train_data, test_data)
Esempio n. 3
0
    def search(self, train_data, test_data, timeout=60 * 60 * 24):
        """Run the search loop of training, generating and updating once.

        The function will run the training and generate in parallel.
        Then it will update the controller.
        The training is just pop out a graph from the training_queue and train it.
        The generate will call the self.generate function.
        The update will call the self.update function.

        Args:
            train_data: An instance of DataLoader.
            test_data: An instance of Dataloader.
            timeout: An integer, time limit in seconds.
        """
        torch.cuda.empty_cache()
        if not self.history:
            self.init_search()

        self._timeout = time.time(
        ) + timeout if timeout is not None else sys.maxsize
        self.trainer_args['timeout'] = timeout
        # Start the new process for training.
        graph, other_info, model_id = self.training_queue.pop(0)
        if self.verbose:
            print('\n')
            print('+' + '-' * 46 + '+')
            print('|' + 'Training model {}'.format(model_id).center(46) + '|')
            print('+' + '-' * 46 + '+')
        #for i in range(len(graph.layer_list)):
        #    layer=graph.layer_list[i]
        #    if is_layer(layer, LayerType.CONV):
        #        if graph.weighted:
        #            teacher_w, teacher_b = layer.get_weights()
        #            teacher_w = teacher_w.shape
        #            teacher_b = teacher_b.shape
        #        else:
        #            teacher_w = 0
        #            teacher_b = 0
        #        print("layer_id:{}, input_channel:{}, filters:{}, groups:{}, teacher_w:{}, teacher_b: {}".format(i,layer.input_channel,layer.filters,layer.groups,teacher_w, teacher_b))
        #model = graph.produce_model()
        #print(model)
        # Temporary solution to support GOOGLE Colab
        if get_system() == Constant.SYS_GOOGLE_COLAB:
            # When using Google Colab, use single process for searching and training.
            self.sp_search(graph, other_info, model_id, train_data, test_data)
        else:
            # Use two processes
            self.mp_search(graph, other_info, model_id, train_data, test_data)
Esempio n. 4
0
    def search(self, train_data, test_data, timeout=60 * 60 * 24):
        """Run the search loop of training, generating and updating once.

        The function will run the training and generate in parallel.
        Then it will update the controller.
        The training is just pop out a graph from the training_queue and train it.
        The generate will call teh self.generate function.
        The update will call the self.update function.

        Args:
            train_data: An instance of DataLoader.
            test_data: An instance of Dataloader.
            timeout: An integer, time limit in seconds.
        """
        start_time = time.time()
        torch.cuda.empty_cache()
        if not self.history:
            self.init_search()

        # Start the new process for training.
        graph, other_info, model_id = self.training_queue.pop(0)
        if self.verbose:
            print('\n')
            print('+' + '-' * 46 + '+')
            print('|' + 'Training model {}'.format(model_id).center(46) + '|')
            print('+' + '-' * 46 + '+')
        # Temporary solution to support GOOGLE Colab
        if get_system() == Constant.SYS_GOOGLE_COLAB:
            ctx = mp.get_context('fork')
        else:
            ctx = mp.get_context('spawn')
        q = ctx.Queue()
        p = ctx.Process(target=train,
                        args=(q, graph, train_data, test_data,
                              self.trainer_args, self.metric, self.loss,
                              self.verbose, self.path))
        try:
            p.start()
            # Do the search in current thread.
            searched = False
            generated_graph = None
            generated_other_info = None
            if not self.training_queue:
                searched = True

                remaining_time = timeout - (time.time() - start_time)
                generated_other_info, generated_graph = self.generate(
                    remaining_time, q)
                new_model_id = self.model_count
                self.model_count += 1
                self.training_queue.append(
                    (generated_graph, generated_other_info, new_model_id))
                self.descriptors.append(generated_graph.extract_descriptor())

            remaining_time = timeout - (time.time() - start_time)
            if remaining_time <= 0:
                raise TimeoutError
            metric_value, loss, graph = q.get(timeout=remaining_time)

            if self.verbose and searched:
                verbose_print(generated_other_info, generated_graph)

            self.add_model(metric_value, loss, graph, model_id)
            self.update(other_info, graph, metric_value, model_id)

            self.export_json(os.path.join(self.path, 'history.json'))

        except (TimeoutError, queue.Empty) as e:
            raise TimeoutError from e
        except RuntimeError as e:
            if not re.search('out of memory', str(e)):
                raise e
            if self.verbose:
                print(
                    '\nCurrent model size is too big. Discontinuing training this model to search for other models.'
                )
            Constant.MAX_MODEL_SIZE = graph.size() - 1
            return
        finally:
            # terminate and join the subprocess to prevent any resource leak
            p.terminate()
            p.join()
Esempio n. 5
0
def test_get_system():
    sys_name = get_system()
    assert \
        sys_name == Constant.SYS_GOOGLE_COLAB or \
        sys_name == Constant.SYS_LINUX or \
        sys_name == Constant.SYS_WINDOWS
Esempio n. 6
0
def test_get_system():
    sys_name = get_system()
    assert \
        sys_name == Constant.SYS_GOOGLE_COLAB or \
        sys_name == Constant.SYS_LINUX or \
        sys_name == Constant.SYS_WINDOWS
Esempio n. 7
0
    def search(self, train_data, test_data, timeout=60 * 60 * 24):
        start_time = time.time()
        torch.cuda.empty_cache()
        if not self.history:
            self.init_search()

        # Start the new process for training.
        graph, father_id, model_id = self.training_queue.pop(0)
        if self.verbose:
            print('\n')
            print('+' + '-' * 46 + '+')
            print('|' + 'Training model {}'.format(model_id).center(46) + '|')
            print('+' + '-' * 46 + '+')
        # Temporary solution to support GOOGLE Colab
        if get_system() == Constant.SYS_GOOGLE_COLAB:
            ctx = mp.get_context('fork')
        else:
            ctx = mp.get_context('spawn')
        q = ctx.Queue()
        p = ctx.Process(target=train,
                        args=(q, (graph, train_data, test_data,
                                  self.trainer_args, self.metric, self.loss,
                                  self.verbose, self.path)))
        try:
            p.start()
            # Do the search in current thread.
            searched = False
            new_graph = None
            new_father_id = None
            if not self.training_queue:
                searched = True

                while new_father_id is None:
                    remaining_time = timeout - (time.time() - start_time)
                    new_graph, new_father_id = self.bo.optimize_acq(
                        self.search_tree.adj_list.keys(), self.descriptors,
                        remaining_time)
                new_model_id = self.model_count
                self.model_count += 1
                self.training_queue.append(
                    (new_graph, new_father_id, new_model_id))
                self.descriptors.append(new_graph.extract_descriptor())

            remaining_time = timeout - (time.time() - start_time)
            if remaining_time <= 0:
                raise TimeoutError
            metric_value, loss, graph = q.get(timeout=remaining_time)

            if self.verbose and searched:
                verbose_print(new_father_id, new_graph)

            self.add_model(metric_value, loss, graph, model_id)
            self.search_tree.add_child(father_id, model_id)
            self.bo.fit(self.x_queue, self.y_queue)
            self.x_queue = []
            self.y_queue = []

            pickle_to_file(self, os.path.join(self.path, 'searcher'))
            self.export_json(os.path.join(self.path, 'history.json'))

        except (TimeoutError, queue.Empty) as e:
            raise TimeoutError from e
        except RuntimeError as e:
            if not re.search('out of memory', str(e)):
                raise e
            if self.verbose:
                print(
                    '\nCurrent model size is too big. Discontinuing training this model to search for other models.'
                )
            Constant.MAX_MODEL_SIZE = graph.size() - 1
            return
        finally:
            # terminate and join the subprocess to prevent any resource leak
            p.terminate()
            p.join()