コード例 #1
0
ファイル: optimizer.py プロジェクト: Herolin12/NASLib
    def new_epoch(self, epoch):
        # We sample as many architectures as we need
        if epoch < self.population_size:
            logger.info("Start sampling architectures to fill the population")
            # If there is no scope defined, let's use the search space default one

            model = torch.nn.Module(
            )  # hacky way to get arch and accuracy checkpointable

            model.arch = sample_random_architecture(self.search_space,
                                                    self.scope)
            model.accuracy = model.arch.query(self.performance_metric,
                                              self.dataset)

            self.population.append(model)
            self._update_history(model)
            log_every_n_seconds(
                logging.INFO,
                "Population size {}".format(len(self.population)))
        else:
            sample = []
            while len(sample) < self.sample_size:
                candidate = np.random.choice(list(self.population))
                sample.append(candidate)

            parent = max(sample, key=lambda x: x.accuracy)

            child = torch.nn.Module(
            )  # hacky way to get arch and accuracy checkpointable
            child.arch = mutate(parent.arch)
            child.accuracy = child.arch.query(self.performance_metric,
                                              self.dataset)

            self.population.append(child)
            self._update_history(child)
コード例 #2
0
ファイル: trainer.py プロジェクト: jackyvan/NASLib
    def search(self, resume_from=""):
        """
        Start the architecture search.

        Generates a json file with training statistics.

        Args:
            resume_from (str): Checkpoint file to resume from. If not given then
                train from scratch.
        """
        logger.info("Start training")
        self.optimizer.before_training()
        checkpoint_freq = self.config.search.checkpoint_freq
        if self.optimizer.using_step_function:
            self.scheduler = self.build_search_scheduler(
                self.optimizer.op_optimizer, self.config)

            start_epoch = self._setup_checkpointers(resume_from,
                                                    period=checkpoint_freq,
                                                    scheduler=self.scheduler)
        else:
            start_epoch = self._setup_checkpointers(resume_from,
                                                    period=checkpoint_freq)

        self.train_queue, self.valid_queue, _ = self.build_search_dataloaders(
            self.config)

        for e in range(start_epoch, self.epochs):
            self.optimizer.new_epoch(e)

            start_time = time.time()
            if self.optimizer.using_step_function:
                for step, (data_train, data_val) in enumerate(
                        zip(self.train_queue, self.valid_queue)):
                    data_train = (data_train[0].to(self.device),
                                  data_train[1].to(self.device,
                                                   non_blocking=True))
                    data_val = (data_val[0].to(self.device),
                                data_val[1].to(self.device, non_blocking=True))

                    stats = self.optimizer.step(data_train, data_val)
                    logits_train, logits_val, train_loss, val_loss = stats

                    self._store_accuracies(logits_train, data_train[1],
                                           'train')
                    self._store_accuracies(logits_val, data_val[1], 'val')

                    log_every_n_seconds(
                        logging.INFO,
                        "Epoch {}-{}, Train loss: {:.5f}, validation loss: {:.5f}, learning rate: {}"
                        .format(e, step, train_loss, val_loss,
                                self.scheduler.get_last_lr()),
                        n=5)

                    if torch.cuda.is_available():
                        log_first_n(logging.INFO,
                                    "cuda consumption\n {}".format(
                                        torch.cuda.memory_summary()),
                                    n=3)

                    self.train_loss.update(float(train_loss.detach().cpu()))
                    self.val_loss.update(float(val_loss.detach().cpu()))

                self.scheduler.step()

                end_time = time.time()

                self.errors_dict.train_acc.append(self.train_top1.avg)
                self.errors_dict.train_loss.append(self.train_loss.avg)
                self.errors_dict.valid_acc.append(self.val_top1.avg)
                self.errors_dict.valid_loss.append(self.val_loss.avg)
                self.errors_dict.runtime.append(end_time - start_time)
            else:
                end_time = time.time()
                train_acc, train_loss, valid_acc, valid_loss = self.optimizer.train_statistics(
                )
                self.errors_dict.train_acc.append(train_acc)
                self.errors_dict.train_loss.append(train_loss)
                self.errors_dict.valid_acc.append(valid_acc)
                self.errors_dict.valid_loss.append(valid_loss)
                self.errors_dict.runtime.append(end_time - start_time)
                self.train_top1.avg = train_acc
                self.val_top1.avg = valid_acc

            self.periodic_checkpointer.step(e)

            anytime_results = self.optimizer.test_statistics()
            if anytime_results:
                # record anytime performance
                self.errors_dict.arch_eval.append(anytime_results)
                log_every_n_seconds(logging.INFO,
                                    "Epoch {}, Anytime results: {}".format(
                                        e, anytime_results),
                                    n=5)

            self._log_to_json()
            self._log_and_reset_accuracies(e)

        self.optimizer.after_training()
        logger.info("Training finished")
コード例 #3
0
ファイル: trainer.py プロジェクト: jackyvan/NASLib
    def evaluate(
        self,
        retrain=True,
        search_model="",
        resume_from="",
        best_arch=None,
    ):
        """
        Evaluate the final architecture as given from the optimizer.

        If the search space has an interface to a benchmark then query that.
        Otherwise train as defined in the config.

        Args:
            retrain (bool): Reset the weights from the architecure search
            search_model (str): Path to checkpoint file that was created during
                search. If not provided, then try to load 'model_final.pth' from search
            resume_from (str): Resume retraining from the given checkpoint file.
            best_arch: Parsed model you want to directly evaluate and ignore the final model
                from the optimizer.
        """
        logger.info("Start evaluation")
        if not best_arch:

            if not search_model:
                search_model = os.path.join(self.config.save, "search",
                                            "model_final.pth")
            self._setup_checkpointers(
                search_model)  # required to load the architecture

            best_arch = self.optimizer.get_final_architecture()
        logger.info("Final architecture:\n" + best_arch.modules_str())

        if best_arch.QUERYABLE:
            metric = Metric.TEST_ACCURACY
            result = best_arch.query(metric=metric,
                                     dataset=self.config.dataset)
            logger.info("Queried results ({}): {}".format(metric, result))
        else:
            best_arch.to(self.device)
            if retrain:
                logger.info("Starting retraining from scratch")
                best_arch.reset_weights(inplace=True)

                self.train_queue, self.valid_queue, self.test_queue = self.build_eval_dataloaders(
                    self.config)

                optim = self.build_eval_optimizer(best_arch.parameters(),
                                                  self.config)
                scheduler = self.build_eval_scheduler(optim, self.config)

                start_epoch = self._setup_checkpointers(
                    resume_from,
                    search=False,
                    period=self.config.evaluation.checkpoint_freq,
                    model=best_arch,  # checkpointables start here
                    optim=optim,
                    scheduler=scheduler)

                grad_clip = self.config.evaluation.grad_clip
                loss = torch.nn.CrossEntropyLoss()

                best_arch.train()
                self.train_top1.reset()
                self.train_top5.reset()
                self.val_top1.reset()
                self.val_top5.reset()

                # Enable drop path
                best_arch.update_edges(update_func=lambda edge: edge.data.set(
                    'op', DropPathWrapper(edge.data.op)),
                                       scope=best_arch.OPTIMIZER_SCOPE,
                                       private_edge_data=True)

                # train from scratch
                epochs = self.config.evaluation.epochs
                for e in range(start_epoch, epochs):
                    if torch.cuda.is_available():
                        log_first_n(logging.INFO,
                                    "cuda consumption\n {}".format(
                                        torch.cuda.memory_summary()),
                                    n=20)

                    # update drop path probability
                    drop_path_prob = self.config.evaluation.drop_path_prob * e / epochs
                    best_arch.update_edges(
                        update_func=lambda edge: edge.data.set(
                            'drop_path_prob', drop_path_prob),
                        scope=best_arch.OPTIMIZER_SCOPE,
                        private_edge_data=True)

                    # Train queue
                    for i, (input_train,
                            target_train) in enumerate(self.train_queue):
                        input_train = input_train.to(self.device)
                        target_train = target_train.to(self.device,
                                                       non_blocking=True)

                        optim.zero_grad()
                        logits_train = best_arch(input_train)
                        train_loss = loss(logits_train, target_train)
                        if hasattr(best_arch,
                                   'auxilary_logits'):  # darts specific stuff
                            log_first_n(logging.INFO,
                                        "Auxiliary is used",
                                        n=10)
                            auxiliary_loss = loss(best_arch.auxilary_logits(),
                                                  target_train)
                            train_loss += self.config.evaluation.auxiliary_weight * auxiliary_loss
                        train_loss.backward()
                        if grad_clip:
                            torch.nn.utils.clip_grad_norm_(
                                best_arch.parameters(), grad_clip)
                        optim.step()

                        self._store_accuracies(logits_train, target_train,
                                               'train')
                        log_every_n_seconds(
                            logging.INFO,
                            "Epoch {}-{}, Train loss: {:.5}, learning rate: {}"
                            .format(e, i, train_loss, scheduler.get_last_lr()),
                            n=5)

                    # Validation queue
                    if self.valid_queue:
                        for i, (input_valid,
                                target_valid) in enumerate(self.valid_queue):

                            input_valid = input_valid.cuda().float()
                            target_valid = target_valid.cuda().float()

                            # just log the validation accuracy
                            with torch.no_grad():
                                logits_valid = best_arch(input_valid)
                                self._store_accuracies(logits_valid,
                                                       target_valid, 'val')

                    scheduler.step()
                    self.periodic_checkpointer.step(e)
                    self._log_and_reset_accuracies(e)

            # Disable drop path
            best_arch.update_edges(update_func=lambda edge: edge.data.set(
                'op', edge.data.op.get_embedded_ops()),
                                   scope=best_arch.OPTIMIZER_SCOPE,
                                   private_edge_data=True)

            # measure final test accuracy
            top1 = utils.AverageMeter()
            top5 = utils.AverageMeter()

            best_arch.eval()

            for i, data_test in enumerate(self.test_queue):
                input_test, target_test = data_test
                input_test = input_test.to(self.device)
                target_test = target_test.to(self.device, non_blocking=True)

                n = input_test.size(0)

                with torch.no_grad():
                    logits = best_arch(input_test)

                    prec1, prec5 = utils.accuracy(logits,
                                                  target_test,
                                                  topk=(1, 5))
                    top1.update(prec1.data.item(), n)
                    top5.update(prec5.data.item(), n)

                log_every_n_seconds(logging.INFO,
                                    "Inference batch {} of {}.".format(
                                        i, len(self.test_queue)),
                                    n=5)

            logger.info(
                "Evaluation finished. Test accuracies: top-1 = {:.5}, top-5 = {:.5}"
                .format(top1.avg, top5.avg))
コード例 #4
0
ファイル: trainer_multi.py プロジェクト: automl/NASLib
    def evaluate(
            self,
            retrain=True,
            search_model="",
            resume_from="",
            best_arch=None,
    ):
        """
        Evaluate the final architecture as given from the optimizer.

        If the search space has an interface to a benchmark then query that.
        Otherwise train as defined in the config.

        Args:
            retrain (bool): Reset the weights from the architecure search
            search_model (str): Path to checkpoint file that was created during
                search. If not provided, then try to load 'model_final.pth' from search
            resume_from (str): Resume retraining from the given checkpoint file.
            multi_gpu (bool): Distribute training on multiple gpus.
            best_arch: Parsed model you want to directly evaluate and ignore the final model
                from the optimizer.
        """

        #best_arch.to(self.device)
        self.config.evaluation.resume_from = resume_from
        if retrain:
            if self.config.gpu is not None:
                logger.warning(
                    'You have chosen a specific GPU. This will completely \
                    disable data parallelism.'
                )

            if self.config.evaluation.dist_url == "env://" and self.config.evaluation.world_size == -1:
                self.config.evaluation.world_size = int(os.environ["WORLD_SIZE"])

            self.config.evaluation.distributed = \
                self.config.evaluation.world_size > 1 or self.config.evaluation.multiprocessing_distributed
            ngpus_per_node = torch.cuda.device_count()

            if self.config.evaluation.multiprocessing_distributed:
                # Since we have ngpus_per_node processes per node, the
                # total world_size needs to be adjusted
                self.config.evaluation.world_size = ngpus_per_node * self.config.evaluation.world_size
                # Use torch.multiprocessing.spawn to launch distributed
                # processes: the main_worker process function
                mp.spawn(self.main_worker, nprocs=ngpus_per_node,
                         args=(ngpus_per_node, self.config.evaluation,
                               search_model, best_arch))
            else:
                # Simply call main_worker function
                self.main_worker(self.config.gpu, ngpus_per_node,
                                 self.config.evaluation,
                                 search_model, best_arch)

        if not self.QUERYABLE:
            # Disable drop path
            best_arch.update_edges(
                update_func=lambda edge: edge.data.set('op', edge.data.op.get_embedded_ops()),
                scope=best_arch.OPTIMIZER_SCOPE,
                private_edge_data=True
            )

            # measure final test accuracy
            top1 = utils.AverageMeter()
            top5 = utils.AverageMeter()

            best_arch.eval()

            for i, data_test in enumerate(self.test_queue):
                input_test, target_test = data_test
                input_test = input_test.to(self.device)
                target_test = target_test.to(self.device, non_blocking=True)

                n = input_test.size(0)

                with torch.no_grad():
                    logits = best_arch(input_test)

                    prec1, prec5 = utils.accuracy(logits, target_test, topk=(1, 5))
                    top1.update(prec1.data.item(), n)
                    top5.update(prec5.data.item(), n)

                log_every_n_seconds(
                    logging.INFO,
                    "Inference batch {} of {}.".format(
                        i, len(self.test_queue)
                    ), n=5
                )

            logger.info("Evaluation finished. Test accuracies: top-1 = {:.5}, \
                        top-5 = {:.5}".format(top1.avg, top5.avg))
コード例 #5
0
ファイル: trainer_multi.py プロジェクト: automl/NASLib
    def main_worker(self, gpu, ngpus_per_node, args, search_model, best_arch):
        logger.info("Start evaluation")
        if not best_arch:
            if not search_model:
                search_model = os.path.join(self.config.save, "search", "model_final.pth")
            self._setup_checkpointers(search_model)      # required to load the architecture

            best_arch = self.optimizer.get_final_architecture()
        logger.info("Final architecture:\n" + best_arch.modules_str())

        if best_arch.QUERYABLE:
            metric = Metric.TEST_ACCURACY
            result = best_arch.query(
                metric=metric, dataset=self.config.dataset
            )
            logger.info("Queried results ({}): {}".format(metric, result))
            self.QUERYABLE = True
            return

        best_arch.reset_weights(inplace=True)
        logger.info("Starting retraining from scratch")

        args.gpu = gpu
        if gpu is not None:
            logger.info("Use GPU: {} for training".format(args.gpu))

        if args.distributed:
            if args.dist_url == "env://" and args.rank == -1:
                args.rank = int(os.environ["RANK"])
            if args.multiprocessing_distributed:
                # For multiprocessing distributed training, rank needs to be the
                # global rank among all processes
                args.rank = args.rank * ngpus_per_node + gpu
            dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                    world_size=args.world_size, rank=args.rank)

        if not torch.cuda.is_available():
            logger.warning("Using CPU, this will be slow!")
        elif args.distributed:
            # For multiprocessing distributed, DistributedDataParallel constructor
            # should always set the single device scope, otherwise,
            # DistributedDataParallel will use all available devices
            if args.gpu is not None:
                torch.cuda.set_device(args.gpu)
                best_arch.cuda(args.gpu)
                # When using a single GPU per process and per
                # DistributedDataParallel, we need to divide the batch size
                # ourselves based on the total number of GPUs we have
                args.batch_size = int(args.batch_size / ngpus_per_node)
                args.workers = int((args.workers + ngpus_per_node - 1) /
                                   ngpus_per_node)
                best_arch = \
                    torch.nn.parallel.DistributedDataParallel(best_arch,
                                                              device_ids=[args.gpu])
            else:
                best_arch.cuda()
                # DistributedDataParallel will divide and allocate batch_size to all
                # available GPUs if device_ids are not set
                best_arch = torch.nn.parallel.DistributedDataParallel(best_arch)
        elif args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            best_arch = best_arch.cuda(args.gpu)
        else:
            # DataParallel will divide and allocate batch_size to all available GPUs
            best_arch = torch.nn.DataParallel(best_arch).cuda()

        cudnn.benchmark = True

        self.train_queue, self.valid_queue, self.test_queue =\
            self.build_eval_dataloaders(self.config)

        optim = self.build_eval_optimizer(best_arch.parameters(), self.config)
        scheduler = self.build_eval_scheduler(optim, self.config)

        start_epoch = self._setup_checkpointers(args.resume_from,
            search=False,
            period=self.config.evaluation.checkpoint_freq,
            model=best_arch,    # checkpointables start here
            optim=optim,
            scheduler=scheduler
        )

        grad_clip = self.config.evaluation.grad_clip
        loss = torch.nn.CrossEntropyLoss()

        best_arch.train()
        self.train_top1.reset()
        self.train_top5.reset()
        self.val_top1.reset()
        self.val_top5.reset()

        # Enable drop path
        if isinstance(best_arch, torch.nn.DataParallel):
            best_arch.module.update_edges(
                update_func=lambda edge: edge.data.set('op', DropPathWrapper(edge.data.op)),
                scope=best_arch.module.OPTIMIZER_SCOPE,
                private_edge_data=True
            )
        else:
            best_arch.update_edges(
                update_func=lambda edge: edge.data.set('op', DropPathWrapper(edge.data.op)),
                scope=best_arch.OPTIMIZER_SCOPE,
                private_edge_data=True
            )

        # train from scratch
        epochs = self.config.evaluation.epochs
        for e in range(start_epoch, epochs):
            # update drop path probability
            drop_path_prob = self.config.evaluation.drop_path_prob * e / epochs
            if isinstance(best_arch, torch.nn.DataParallel):
                best_arch.module.update_edges(
                    update_func=lambda edge: edge.data.set('drop_path_prob', drop_path_prob),
                    scope=best_arch.module.OPTIMIZER_SCOPE,
                    private_edge_data=True
                )
            else:
                best_arch.update_edges(
                    update_func=lambda edge: edge.data.set('drop_path_prob', drop_path_prob),
                    scope=best_arch.OPTIMIZER_SCOPE,
                    private_edge_data=True
                )

            # Train queue
            for i, (input_train, target_train) in enumerate(self.train_queue):
                input_train = input_train.to(self.device)
                target_train = target_train.to(self.device, non_blocking=True)

                optim.zero_grad()
                logits_train = best_arch(input_train)
                train_loss = loss(logits_train, target_train)
                if hasattr(best_arch, 'auxilary_logits'):   # darts specific stuff
                    log_first_n(logging.INFO, "Auxiliary is used", n=10)
                    auxiliary_loss = loss(best_arch.auxilary_logits(), target_train)
                    train_loss += self.config.evaluation.auxiliary_weight * auxiliary_loss
                train_loss.backward()
                if grad_clip:
                    torch.nn.utils.clip_grad_norm_(best_arch.parameters(), grad_clip)
                optim.step()

                self._store_accuracies(logits_train, target_train, 'train')
                log_every_n_seconds(
                    logging.INFO,
                    "Epoch {}-{}, Train loss: {:.5}, learning rate: {}".format(
                        e, i, train_loss, scheduler.get_last_lr()
                    ), n=5
                )

                if torch.cuda.is_available():
                    log_first_n(
                        logging.INFO,
                        "cuda consumption\n {}".format(
                            torch.cuda.memory_summary()
                        ), n=3
                    )

            # Validation queue
            if self.valid_queue:
                for i, (input_valid, target_valid) in enumerate(self.valid_queue):

                    input_valid = input_valid.to(self.device).float()
                    target_valid = target_valid.to(self.device, non_blocking=True).float()

                    # just log the validation accuracy
                    logits_valid = best_arch(input_valid)
                    self._store_accuracies(logits_valid, target_valid, 'val')

            scheduler.step()
            self.periodic_checkpointer.step(e)
            self._log_and_reset_accuracies(e)