Beispiel #1
0
 def _backup(self):
     """Backup result worker folder."""
     if self.need_backup is True and self.backup_base_path is not None:
         backup_worker_path = FileOps.join_path(
             self.backup_base_path, self.get_worker_subpath())
         FileOps.copy_folder(
             self.get_local_worker_path(self.step_name, self.worker_id), backup_worker_path)
Beispiel #2
0
 def _init_model(self):
     """Load model desc from save path and parse to model."""
     model = self.trainer.model
     if self.trainer.config.is_detection_trainer:
         model_desc = self.trainer.model_desc
     else:
         model_desc = self._get_model_desc()
     if model_desc:
         ModelConfig.model_desc = model_desc
     pretrained_model_file = self._get_pretrained_model_file()
     if not model:
         if not model_desc:
             raise Exception(
                 "Failed to Init model, can not get model description.")
         model = ModelZoo.get_model(model_desc, pretrained_model_file)
     if model:
         if zeus.is_torch_backend():
             import torch
             if self.trainer.use_cuda:
                 model = model.cuda()
             if General._parallel and General.devices_per_trainer > 1:
                 model = torch.nn.DataParallel(self.trainer.model)
         if zeus.is_tf_backend():
             if pretrained_model_file:
                 model_folder = os.path.dirname(pretrained_model_file)
                 FileOps.copy_folder(model_folder,
                                     self.trainer.get_local_worker_path())
     return model
Beispiel #3
0
    def save_results(self):
        """Save the results of evolution contains the information of pupulation and elitism."""
        _path = FileOps.join_path(self.local_output_path, General.step_name)
        FileOps.make_dir(_path)
        arch_file = FileOps.join_path(_path, 'arch.txt')
        arch_child = FileOps.join_path(_path, 'arch_child.txt')
        sel_arch_file = FileOps.join_path(_path, 'selected_arch.npy')
        sel_arch = []
        with open(arch_file, 'a') as fw_a, open(arch_child, 'a') as fw_ac:
            writer_a = csv.writer(fw_a, lineterminator='\n')
            writer_ac = csv.writer(fw_ac, lineterminator='\n')
            writer_ac.writerow(
                ['Population Iteration: ' + str(self.evolution_count + 1)])
            for c in range(self.individual_num):
                writer_ac.writerow(
                    self._log_data(net_info_type='active_only',
                                   pop=self.pop[c],
                                   value=self.pop[c].fitness))

            writer_a.writerow(
                ['Population Iteration: ' + str(self.evolution_count + 1)])
            for c in range(self.elitism_num):
                writer_a.writerow(
                    self._log_data(net_info_type='active_only',
                                   pop=self.elitism[c],
                                   value=self.elit_fitness[c]))
                sel_arch.append(self.elitism[c].gene)
        sel_arch = np.stack(sel_arch)
        np.save(sel_arch_file, sel_arch)
        if self.backup_base_path is not None:
            FileOps.copy_folder(self.local_output_path, self.backup_base_path)
 def _init_dataloader(self):
     """Init dataloader from timm."""
     if self.distributed and hvd.local_rank(
     ) == 0 and 'remote_data_dir' in self.config.dataset:
         FileOps.copy_folder(self.config.dataset.remote_data_dir,
                             self.config.dataset.data_dir)
     if self.distributed:
         hvd.join()
     args = self.config.dataset
     train_dir = os.path.join(self.config.dataset.data_dir, 'train')
     dataset_train = Dataset(train_dir)
     world_size, rank = None, None
     if self.distributed:
         world_size, rank = hvd.size(), hvd.rank()
     self.trainer.train_loader = create_loader(
         dataset_train,
         input_size=tuple(args.input_size),
         batch_size=args.batch_size,
         is_training=True,
         use_prefetcher=self.config.prefetcher,
         rand_erase_prob=args.reprob,
         rand_erase_mode=args.remode,
         rand_erase_count=args.recount,
         color_jitter=args.color_jitter,
         auto_augment=args.aa,
         interpolation='random',
         mean=tuple(args.mean),
         std=tuple(args.std),
         num_workers=args.workers,
         distributed=self.distributed,
         world_size=world_size,
         rank=rank)
     valid_dir = os.path.join(self.config.dataset.data_dir, 'val')
     dataset_eval = Dataset(valid_dir)
     self.trainer.valid_loader = create_loader(
         dataset_eval,
         input_size=tuple(args.input_size),
         batch_size=4 * args.batch_size,
         is_training=False,
         use_prefetcher=self.config.prefetcher,
         interpolation=args.interpolation,
         mean=tuple(args.mean),
         std=tuple(args.std),
         num_workers=args.workers,
         distributed=self.distributed,
         world_size=world_size,
         rank=rank)
     self.trainer.batch_num_train = len(self.trainer.train_loader)
     self.trainer.batch_num_valid = len(self.trainer.valid_loader)
Beispiel #5
0
 def _output_records(self,
                     step_name,
                     records,
                     desc=True,
                     weights_file=False,
                     performance=False):
     """Dump records."""
     columns = ["worker_id", "performance", "desc"]
     outputs = []
     for record in records:
         record = record.serialize()
         _record = {}
         for key in columns:
             _record[key] = record[key]
         outputs.append(deepcopy(_record))
     data = pd.DataFrame(outputs)
     step_path = FileOps.join_path(TaskOps().local_output_path, step_name)
     FileOps.make_dir(step_path)
     _file = FileOps.join_path(step_path, "output.csv")
     try:
         data.to_csv(_file, index=False)
     except Exception:
         logging.error("Failed to save output file, file={}".format(_file))
     for record in outputs:
         worker_id = record["worker_id"]
         worker_path = TaskOps().get_local_worker_path(step_name, worker_id)
         outputs_globs = []
         if desc:
             outputs_globs += glob.glob(
                 FileOps.join_path(worker_path, "desc_*.json"))
         if weights_file:
             outputs_globs += glob.glob(
                 FileOps.join_path(worker_path, "model_*"))
         if performance:
             outputs_globs += glob.glob(
                 FileOps.join_path(worker_path, "performance_*.json"))
         for _file in outputs_globs:
             if os.path.isfile(_file):
                 FileOps.copy_file(_file, step_path)
             elif os.path.isdir(_file):
                 FileOps.copy_folder(
                     _file,
                     FileOps.join_path(step_path, os.path.basename(_file)))
Beispiel #6
0
 def backup_output_path(self):
     """Back up output to local path."""
     backup_path = TaskOps().backup_base_path
     if backup_path is None:
         return
     FileOps.copy_folder(TaskOps().local_output_path, backup_path)