Example #1
0
 def copy_pareto_output(self, step_name=None, worker_ids=[]):
     """Copy files related to pareto from  worker to output."""
     taskops = TaskOps()
     local_output_path = os.path.join(taskops.local_output_path, step_name)
     if not (step_name and os.path.exists(local_output_path)):
         return
     for worker_id in worker_ids:
         desDir = os.path.join(local_output_path, str(worker_id))
         FileOps.make_dir(desDir)
         local_worker_path = taskops.get_worker_subpath(
             step_name, str(worker_id))
         srcDir = FileOps.join_path(taskops.local_base_path,
                                    local_worker_path)
         copy_search_file(srcDir, desDir)
Example #2
0
 def csv_to_records(self,
                    csv_file_path,
                    step_name=None,
                    record_name='best'):
     """Transfer cvs_file to records."""
     local_output_path = ''
     if not csv_file_path and not step_name:
         return []
     elif csv_file_path:
         local_output_path = csv_file_path
     if (not os.path.exists(local_output_path)
             or local_output_path) and step_name:
         local_output_path = os.path.join(TaskOps().local_output_path,
                                          step_name)
     csv_file_path = os.path.join(local_output_path,
                                  "{}.csv".format(record_name))
     logging.info("csv_file_path: {}".format(csv_file_path))
     if not os.path.isfile(csv_file_path):
         return []
     csv_headr = pd.read_csv(csv_file_path).columns.values
     csv_value = pd.read_csv(csv_file_path).values
     records = []
     for item in csv_value:
         record = dict(zip(csv_headr, item))
         records.append(ReportRecord().load_dict(record))
     logging.info("csv_to_records: {}".format(records))
     return records
Example #3
0
    def _init_model(self):
        """Initialize model if fully training a model.

        :return: config of fully train model
        :rtype: config file
        """
        config = Config(self.cfg.config_template)
        config['total_epochs'] = self.cfg.epoch
        if 'model_desc_file' in self.cfg:
            _model_desc_file = self.cfg.model_desc_file.replace(
                "{local_base_path}",
                TaskOps().local_base_path)
            _total_list = ListDict.load_csv(_model_desc_file)
            pre_arch = _total_list.sort('mAP')[0]['arch']
            pretrained = pre_arch.split('_')[1]
            pre_worker_id = _total_list.sort('mAP')[0]['pre_worker_id']
            model_desc = dict(arch=pre_arch,
                              pre_arch=pretrained,
                              pre_worker_id=-1)
            logging.info(
                "Initialize fully train model from: {}".format(model_desc))
            if self.cfg.regnition:
                # re-write config from previous result
                config['model']['backbone']['reignition'] = True
                config['model']['pretrained'] = os.path.join(
                    self.output_path, pretrained + '_imagenet.pth')
            else:
                config['model']['pretrained'] = extract_backbone_from_pth(
                    self.output_path, pre_worker_id, pretrained)
        elif 'model_desc' in self.cfg:
            model_desc = self.cfg.model_desc
        else:
            raise ValueError('Missing model description!')
        model_desc = update_config(config, model_desc)
        return model_desc
Example #4
0
 def _append_record_to_csv(self,
                           record_name=None,
                           step_name=None,
                           record=None,
                           mode='a'):
     """Transfer record to csv file."""
     local_output_path = os.path.join(TaskOps().local_output_path,
                                      step_name)
     logging.debug(
         "recode to csv, local_output_path={}".format(local_output_path))
     if not record_name and os.path.exists(local_output_path):
         return
     file_path = os.path.join(local_output_path,
                              "{}.csv".format(record_name))
     FileOps.make_base_dir(file_path)
     try:
         for key in record:
             if isinstance(record[key], dict) or isinstance(
                     record[key], list):
                 record[key] = str(record[key])
         data = pd.DataFrame([record])
         if not os.path.exists(file_path):
             data.to_csv(file_path, index=False)
         elif os.path.exists(file_path) and os.path.getsize(
                 file_path) and mode == 'a':
             data.to_csv(file_path, index=False, mode=mode, header=0)
         else:
             data.to_csv(file_path, index=False, mode=mode)
     except Exception as ex:
         logging.info(
             'Can not transfer record to csv file Error: {}'.format(ex))
Example #5
0
 def _save_worker_record(cls, record):
     step_name = record.get('step_name')
     worker_id = record.get('worker_id')
     _path = TaskOps().get_local_worker_path(step_name, worker_id)
     for record_name in ["desc", "performance"]:
         _file_name = None
         _file = None
         record_value = record.get(record_name)
         if not record_value:
             continue
         _file = None
         try:
             # for cars/darts save multi-desc
             if isinstance(record_value, list) and record_name == "desc":
                 for idx, value in enumerate(record_value):
                     _file_name = "desc_{}.json".format(idx)
                     _file = FileOps.join_path(_path, _file_name)
                     with open(_file, "w") as f:
                         json.dump(record_value, f)
             else:
                 _file_name = None
                 if record_name == "desc":
                     _file_name = "desc_{}.json".format(worker_id)
                 if record_name == "performance":
                     _file_name = "performance_{}.json".format(worker_id)
                 _file = FileOps.join_path(_path, _file_name)
                 with open(_file, "w") as f:
                     json.dump(record_value, f)
         except Exception as ex:
             logging.error(
                 "Failed to save {}, file={}, desc={}, msg={}".format(
                     record_name, _file, record_value, str(ex)))
Example #6
0
    def __init__(self, search_space=None, **kwargs):
        super(SpNas, self).__init__(search_space, **kwargs)
        self.search_space = search_space
        # self.codec = Codec(self.config.codec, search_space)
        self.sample_level = self.config.sample_level
        self.max_sample = self.config.max_sample
        self.max_optimal = self.config.max_optimal
        self._total_list_file = self.config.total_list.replace(
            "{local_base_path}",
            TaskOps().local_base_path)
        self.serial_settings = self.config.serial_settings

        self._total_list = ListDict()
        self.sample_count = 0
        self.init_code = None
        self.output_path = TaskOps().local_output_path

        if self.config.last_search_result:
            last_search_file = self.config.last_search_result.replace(
                "{local_base_path}",
                TaskOps().local_base_path)
            assert FileOps.exists(
                last_search_file), "Not found serial results!"
            last_search_results = ListDict.load_csv(last_search_file)
            pre_worker_id, pre_arch = self.select_from_remote(
                self.max_optimal, last_search_results)
            # re-write config template
            if self.config.regnition:
                self.codec.config_template['model']['backbone'][
                    'reignition'] = True
                assert FileOps.exists(
                    os.path.join(self.output_path, pre_arch + '_imagenet.pth')
                ), "Not found {} pretrained .pth file!".format(pre_arch)
                pretrained_pth = os.path.join(self.output_path,
                                              pre_arch + '_imagenet.pth')
                self.codec.config_template['model'][
                    'pretrained'] = pretrained_pth
                pre_worker_id = -1
            # update config template
            self.init_code = dict(arch=pre_arch,
                                  pre_arch=pre_arch.split('_')[1],
                                  pre_worker_id=pre_worker_id)

        logging.info("inited SpNas {}-level search...".format(
            self.sample_level))
Example #7
0
 def _get_current_step_records(self):
     step_name = self.task.step_name
     models_folder = PipeStepConfig.pipe_step.get("models_folder")
     records = []
     cur_index = PipelineConfig.steps.index(step_name)
     if cur_index >= 1 or models_folder:
         # records = Report().get_pareto_front_records(PipelineConfig.steps[cur_index - 1])
         if not models_folder:
             models_folder = FileOps.join_path(
                 TaskOps().local_output_path, PipelineConfig.steps[cur_index - 1])
         models_folder = models_folder.replace(
             "{local_base_path}", TaskOps().local_base_path)
         records = Report().load_records_from_model_folder(models_folder)
     else:
         records = [ReportRecord(step_name, 0)]
     logging.debug("Records: {}".format(records))
     for record in records:
         record.step_name = step_name
     return records
Example #8
0
 def _save_model_desc_file(self, id, desc):
     output_path = TaskOps(UserConfig().data.general).local_output_path
     desc_file = os.path.join(output_path, "nas",
                              "model_desc_{}.json".format(id))
     FileOps.make_base_dir(desc_file)
     output = {}
     for key in desc:
         if key in ["type", "modules", "custom"]:
             output[key] = desc[key]
     with open(desc_file, "w") as f:
         json.dump(output, f)
Example #9
0
 def _output_records(self,
                     step_name,
                     records,
                     desc=True,
                     weights_file=False,
                     performance=False):
     """Dump records."""
     columns = ["worker_id", "performance", "desc"]
     outputs = []
     for record in records:
         record = record.serialize()
         _record = {}
         for key in columns:
             _record[key] = record[key]
         outputs.append(deepcopy(_record))
     data = pd.DataFrame(outputs)
     step_path = FileOps.join_path(TaskOps().local_output_path, step_name)
     FileOps.make_dir(step_path)
     _file = FileOps.join_path(step_path, "output.csv")
     try:
         data.to_csv(_file, index=False)
     except Exception:
         logging.error("Failed to save output file, file={}".format(_file))
     for record in outputs:
         worker_id = record["worker_id"]
         worker_path = TaskOps().get_local_worker_path(step_name, worker_id)
         outputs_globs = []
         if desc:
             outputs_globs += glob.glob(
                 FileOps.join_path(worker_path, "desc_*.json"))
         if weights_file:
             outputs_globs += glob.glob(
                 FileOps.join_path(worker_path, "model_*.pth"))
         if performance:
             outputs_globs += glob.glob(
                 FileOps.join_path(worker_path, "performance_*.json"))
         for _file in outputs_globs:
             FileOps.copy_file(_file, step_path)
Example #10
0
 def _load_pretrained_model(cls, network, model, model_checkpoint):
     if not model_checkpoint and network._model_type == NetTypes.TORCH_VISION_MODEL:
         model_file_name = get_torchvision_model_file(network._model_name)
         full_path = "{}/torchvision_models/checkpoints/{}".format(
             TaskOps().model_zoo_path, model_file_name)
     else:
         full_path = model_checkpoint
     logging.info("load model weights from file.")
     logging.debug("Weights file: {}".format(full_path))
     if not os.path.isfile(full_path):
         raise "Pretrained model is not existed, model={}".format(full_path)
     checkpoint = torch.load(full_path)
     model.load_state_dict(checkpoint)
     return model
Example #11
0
 def _start_cluster(self):
     """Set and start dask distributed cluster."""
     self.md = ClusterDaskDistributor(self.dask_env.master_address)
     self.client = self.md.get_client()
     local_host = None
     if "BATCH_CURRENT_HOST" in os.environ:
         local_host = os.environ["BATCH_CURRENT_HOST"]
     elif "BATCH_CUSTOM0_HOSTS" in os.environ:
         local_host = os.environ["BATCH_CUSTOM0_HOSTS"]
     plugin = WorkerEnv(self.dask_env.slave_proc_num,
                        self.dask_env.slave_gpus_per_proc, local_host,
                        os.getpid(),
                        TaskOps(self.cfg).temp_path)
     self.client.register_worker_plugin(plugin)
     return
Example #12
0
 def __init__(self):
     """Init master attrs, setup and start dask distributed cluster and local multiprocess pool."""
     self.cfg = copy.deepcopy(UserConfig().data.general)
     self.task_count = 0
     self.eval_count = self.cfg.worker.eval_count
     self.dask_env = DaskEnv(UserConfig().data.env, self.__master_path__,
                             self.cfg.worker.gpus_per_job,
                             TaskOps(self.cfg).temp_path)
     status = self.dask_env.start()
     if not status or not self.dask_env.is_master:
         sys.exit(0)
     self._start_cluster()
     self._start_evaluator_multiprocess()
     self.t_queue = Queue()
     # now save GPU and Dloop Evaluator result.
     self.e_queue = utils.PairDictQueue()
     return
Example #13
0
 def update(self, record):
     """Update sampler."""
     step_name = record.get("step_name")
     worker_id = record.get("worker_id")
     worker_result_path = TaskOps().get_local_worker_path(
         step_name, worker_id)
     performance_file = self.performance_path(worker_result_path)
     logging.info(
         "SpNas.update(), performance file={}".format(performance_file))
     info = FileOps.load_pickle(performance_file)
     if info is not None:
         self._total_list.append(info)
     else:
         logging.info("SpNas.update(), file is not exited, "
                      "performance file={}".format(performance_file))
     self.save_output(self.output_path)
     if self.backup_base_path is not None:
         FileOps.copy_folder(self.output_path, self.backup_base_path)
Example #14
0
def set_torch_home():
    """Set TORCH_HOME to local path."""
    task = TaskOps(DefaultConfig().data.general)
    full_path = os.path.abspath("{}/torchvision_models".format(
        task.model_zoo_path))
    os.environ['TORCH_HOME'] = full_path
Example #15
0
 def __init__(self):
     self.task = TaskOps()
Example #16
0
 def backup_output_path(self):
     """Back up output to local path."""
     backup_path = TaskOps().backup_base_path
     if backup_path is None:
         return
     FileOps.copy_folder(TaskOps().local_output_path, backup_path)
Example #17
0
 def __init__(self):
     self.task = TaskOps(UserConfig().data.general)