def _get_current_step_records(self):
     step_name = General.step_name
     models_folder = PipeStepConfig.pipe_step.get("models_folder")
     cur_index = PipelineConfig.steps.index(step_name)
     if cur_index >= 1 or models_folder:
         if not models_folder:
             models_folder = FileOps.join_path(
                 TaskOps().local_output_path,
                 PipelineConfig.steps[cur_index - 1])
         models_folder = models_folder.replace("{local_base_path}",
                                               TaskOps().local_base_path)
         records = ReportServer().load_records_from_model_folder(
             models_folder)
     else:
         records = self._load_single_model_records()
     final_records = []
     for record in records:
         if not record.weights_file:
             logger.error("Model file is not existed, id={}".format(
                 record.worker_id))
         else:
             record.step_name = General.step_name
             final_records.append(record)
     logging.debug("Records: {}".format(final_records))
     return final_records
Exemple #2
0
 def _simulate_tiny_pipeline(self, cfg_tiny):
     """Simulate tiny pipeline by using one sample one epoch."""
     report = ReportServer()
     for i, step_name in enumerate(PipelineConfig.steps):
         step_cfg = cfg_tiny.get(step_name)
         if step_cfg.pipe_step.type != 'SearchPipeStep':
             continue
         step_cfg.trainer.distributed = False
         step_cfg.trainer.epochs = 1
         self.restrict_config.trials[step_name] = 1
         General.step_name = step_name
         PipeStepConfig.from_dict(step_cfg)
         pipestep = PipeStep()
         if i == 0:
             pipestep.do()
             record = report.get_step_records(step_name)[-1]
             self.epoch_time = record.runtime
             _worker_path = TaskOps().local_base_path
             if os.path.exists(_worker_path):
                 os.system('rm -rf {}'.format(_worker_path))
         if step_cfg.pipe_step.type == 'SearchPipeStep':
             self.params_dict[step_name]['max_samples'] = pipestep.generator.search_alg.max_samples
         _file = os.path.join(TaskOps().step_path, ".generator")
         if os.path.exists(_file):
             os.system('rm {}'.format(_file))
Exemple #3
0
def query_task_info():
    """Get task message."""
    from vega.common.task_ops import TaskOps
    return {
        "result": "success",
        "task_id": TaskOps().task_id,
        "base_path": os.path.abspath(TaskOps().task_cfg.local_base_path),
    }
Exemple #4
0
 def _get_abs_path(cls, _path):
     if "{local_base_path}" in _path:
         from vega.common.task_ops import TaskOps
         return os.path.abspath(
             _path.replace("{local_base_path}",
                           TaskOps().local_base_path))
     return _path
Exemple #5
0
def _backup_config(args):
    _file = args.config_file
    from vega.common.task_ops import TaskOps
    from vega.common.file_ops import FileOps
    dest_file = FileOps.join_path(TaskOps().local_output_path, os.path.basename(_file))
    FileOps.make_base_dir(dest_file)
    FileOps.copy_file(_file, dest_file)
Exemple #6
0
 def __init__(self):
     self._load_config()
     vega.set_backend(General.backend, General.device_category)
     init_log(level=General.logger.level,
              log_file=f"{General.step_name}_worker_{self.worker_id}.log",
              log_path=TaskOps().local_log_path)
     self.report_client = ReportClient()
Exemple #7
0
 def restore(cls):
     """Restore generator from file."""
     step_path = TaskOps().step_path
     _file = os.path.join(step_path, ".generator")
     if os.path.exists(_file):
         with open(_file, "rb") as f:
             return pickle.load(f)
     else:
         return None
Exemple #8
0
def _calc_forward_latency_davinci(model,
                                  input,
                                  sess_config=None,
                                  num=10,
                                  evaluate_config=None):
    """Model forward latency calculation.

    :param model: network model
    :type model: torch or tf module
    :param input: input tensor
    :type input: Tensor of torch or tf
    :param num: forward number
    :type num: int
    :param evaluate_config: some config for evaluate in davinci
    :type evaluate_config: dict
    :return: forward latency
    :rtype: float
    """
    from vega.evaluator.tools.evaluate_davinci_bolt import evaluate
    from vega.common.task_ops import TaskOps
    # backend = evaluate_config.get("backend")
    hardware = evaluate_config.get("hardware")
    remote_host = evaluate_config.get("remote_host")
    worker_path = TaskOps().local_base_path
    save_data_file = os.path.join(worker_path, "input.bin")

    latency = 0.
    now_time = datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')
    job_id = "pre_evaluate_" + now_time
    logging.info("The job id of evaluate service is {}.".format(job_id))
    if vega.is_torch_backend():
        import torch
        input_shape = input.shape
        if torch.is_tensor(input):
            input = input.cpu().numpy()
        input.tofile(save_data_file)
        for index in range(num):
            reuse_model = False if index == 0 else True
            results = evaluate("pytorch", hardware, remote_host, model, None,
                               save_data_file, input_shape, reuse_model,
                               job_id)
            latency += np.float(results.get("latency"))
    elif vega.is_tf_backend():
        input_shape = input.shape.as_list()
        test_data = np.random.random(input_shape).astype(np.float32)
        test_data.tofile(save_data_file)
        for index in range(num):
            reuse_model = False if index == 0 else True
            results = evaluate("tensorflow", hardware, remote_host, model,
                               None, save_data_file, input_shape, reuse_model,
                               job_id)
            latency += np.float(results.get("latency"))
    return latency / num
Exemple #9
0
def _show_performance():
    output_file = FileOps.join_path(TaskOps().local_output_path,
                                    General.step_name, "output.csv")
    try:
        data = pd.read_csv(output_file)
    except Exception:
        logging.info("  Result file output.csv is not existed or empty.")
        return
    if data.shape[1] < 2 or data.shape[0] == 0:
        logging.info("  Result file output.csv is empty.")
        return
    logging.info("-" * 48)
    data = json.loads(data.to_json())
    logging.info("  result: {}".format(data["performance"]["0"]))
    logging.info("-" * 48)
Exemple #10
0
def convert_to_coco_api(ds):
    """Convert to coco dataset."""
    coco_ds = COCO()
    ann_id = 1
    dataset = {'images': [], 'categories': [], 'annotations': []}
    categories = set()
    for img_idx in range(len(ds)):
        img, targets = ds[img_idx]
        image_id = targets["image_id"].item()
        img_dict = {}
        img_dict['id'] = image_id
        img_dict['height'] = img.shape[-2]
        img_dict['width'] = img.shape[-1]
        dataset['images'].append(img_dict)
        bboxes = targets["boxes"]
        bboxes[:, 2:] -= bboxes[:, :2]
        bboxes = bboxes.tolist()
        labels = targets['labels'].tolist()
        areas = targets['area'].tolist()
        iscrowd = targets['iscrowd'].tolist()
        if 'masks' in targets:
            masks = targets['masks']
            masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)
        if 'keypoints' in targets:
            keypoints = targets['keypoints']
            keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist()
        num_objs = len(bboxes)
        for i in range(num_objs):
            ann = {}
            ann['image_id'] = image_id
            ann['bbox'] = bboxes[i]
            ann['category_id'] = labels[i]
            categories.add(labels[i])
            ann['area'] = areas[i]
            ann['iscrowd'] = iscrowd[i]
            ann['id'] = ann_id
            if 'keypoints' in targets:
                ann['keypoints'] = keypoints[i]
                ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3])
            dataset['annotations'].append(ann)
            ann_id += 1
    dataset['categories'] = [{'id': i} for i in sorted(categories)]
    coco_ds.dataset = dataset
    coco_ds.createIndex()
    instances_val = os.path.join(TaskOps().local_output_path, 'instances.json')
    json.dump(coco_ds.dataset, open(instances_val, 'w'))
    logging.info("dump detection instances json file: {}".format(instances_val))
    return coco_ds
Exemple #11
0
def load_master_ip():
    """Get the ip and port that write in a system path.

    here will not download anything from S3.
    """
    temp_folder = TaskOps().temp_path
    FileOps.make_dir(temp_folder)
    file_path = os.path.join(temp_folder, 'ip_address.txt')
    if os.path.isfile(file_path):
        with open(file_path, 'r') as f:
            ip = f.readline().strip()
            port = f.readline().strip()
            logging.info("get write ip, ip={}, port={}".format(ip, port))
            return ip, port
    else:
        return None, None
Exemple #12
0
def save_master_ip(ip_address, port, args):
    """Write the ip and port in a system path.

    :param str ip_address: The `ip_address` need to write.
    :param str port: The `port` need to write.
    :param argparse.ArgumentParser args: `args` is a argparse that should
         contain `init_method`, `rank` and `world_size`.

    """
    temp_folder = TaskOps().temp_path
    FileOps.make_dir(temp_folder)
    file_path = os.path.join(temp_folder, 'ip_address.txt')
    logging.info("write ip, file path={}".format(file_path))
    with open(file_path, 'w') as f:
        f.write(ip_address + "\n")
        f.write(port + "\n")
Exemple #13
0
def _init_env(cfg_path):
    """Init config and evn parameters.

    :param cfg_path: config file path
    """
    logging.getLogger().setLevel(logging.DEBUG)
    UserConfig().load(cfg_path)
    # load general
    General.from_dict(UserConfig().data.get("general"), skip_check=False)
    init_log(level=General.logger.level,
             log_file="pipeline.log",
             log_path=TaskOps().local_log_path)
    General.env = env_args()
    if not General.env:
        General.env = init_cluster_args()
    setattr(PipelineConfig, "steps", UserConfig().data.pipeline)
    set_backend(General.backend, General.device_category)
Exemple #14
0
 def summary(self):
     """Summary all record from result cache, and get performance."""
     if not self.result_record:
         return {"mAP": -1, "AP_small": -1, "AP_medium": -1, "AP_large": -1}
     det_json_file = os.path.join(TaskOps().local_output_path,
                                  'det_json_file.json')
     with open(det_json_file, 'w') as f:
         json.dump(self.result_record, f)
     eval_result = self.print_scores(det_json_file, self.anno_path)
     ap_result = eval_result.pop('AP(bbox)')
     ap_result = list(ap_result)
     ap_result = {
         "mAP": ap_result[0] * 100,
         "AP50": ap_result[1] * 100,
         "AP_small": ap_result[3] * 100,
         "AP_medium": ap_result[4] * 100,
         "AP_large": ap_result[5] * 100
     }
     if eval_result:
         ap_result.update(eval_result)
     return ap_result
Exemple #15
0
 def _save_worker_record(self, record):
     step_name = record.get('step_name')
     worker_id = record.get('worker_id')
     _path = TaskOps().get_local_worker_path(step_name, worker_id)
     for record_name in ["desc", "hps", "performance"]:
         _file_name = None
         _file = None
         record_value = remove_np_value(record.get(record_name))
         if record_value is None:
             if record_name == "desc":
                 record_value = {}
             else:
                 continue
         _file = None
         try:
             # for cars/darts save multi-desc
             if isinstance(record_value, list) and record_name == "desc":
                 for idx, value in enumerate(record_value):
                     _file_name = "desc_{}.json".format(idx)
                     _file = FileOps.join_path(_path, _file_name)
                     with open(_file, "w") as f:
                         json.dump(value, f)
             else:
                 if 'multi_task' in record:
                     worker_id = record.get('multi_task') if record.get(
                         'multi_task') is not None else worker_id
                 _file_name = None
                 if record_name == "desc":
                     _file_name = "desc_{}.json".format(worker_id)
                 if record_name == "hps":
                     _file_name = "hps_{}.json".format(worker_id)
                 if record_name == "performance":
                     _file_name = "performance_{}.json".format(worker_id)
                 _file = FileOps.join_path(_path, _file_name)
                 with open(_file, "w") as f:
                     json.dump(record_value, f)
         except Exception as ex:
             logger.error(
                 "Failed to save {}, file={}, desc={}, msg={}".format(
                     record_name, _file, record_value, str(ex)))
 def _load_single_model_records(self):
     model_desc = PipeStepConfig.model.model_desc
     model_desc_file = PipeStepConfig.model.model_desc_file
     if model_desc_file:
         model_desc_file = model_desc_file.replace(
             "{local_base_path}",
             TaskOps().local_base_path)
         model_desc = Config(model_desc_file)
     if not model_desc:
         logger.error("Model desc or Model desc file is None.")
         return []
     model_file = PipeStepConfig.model.pretrained_model_file
     if not model_file:
         logger.error("Model file is None.")
         return []
     if not os.path.exists(model_file):
         logger.error("Model file is not existed.")
         return []
     return [
         ReportRecord().load_dict(
             dict(worker_id="1", desc=model_desc, weights_file=model_file))
     ]
Exemple #17
0
 def __init__(self, anno_path=None, category=None):
     self.anno_path = anno_path or os.path.join(TaskOps().local_output_path,
                                                'instances.json')
     self.category = category or []
     self.result_record = []
Exemple #18
0
 def dump(self):
     """Dump generator to file."""
     step_path = TaskOps().step_path
     _file = os.path.join(step_path, ".generator")
     with open(_file, "wb") as f:
         pickle.dump(self, f, protocol=pickle.HIGHEST_PROTOCOL)
Exemple #19
0
def _init_env():
    if sys.version_info < (3, 6):
        sys.exit('Sorry, Python < 3.6 is not supported.')
    init_log(level=General.logger.level, log_path=TaskOps().local_log_path)
    General.env = init_cluster_args()
    _print_task_id()