def init_res(self,
                 stages_pattern,
                 return_patterns=None,
                 return_stages=None):
        if return_patterns and return_stages:
            msg = f"The 'return_patterns' would be ignored when 'return_stages' is set."
            logger.warning(msg)
            return_stages = None

        if return_stages is True:
            return_patterns = stages_pattern
        # return_stages is int or bool
        if type(return_stages) is int:
            return_stages = [return_stages]
        if isinstance(return_stages, list):
            if max(return_stages) > len(stages_pattern) or min(
                    return_stages) < 0:
                msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}."
                logger.warning(msg)
                return_stages = [
                    val for val in return_stages
                    if val >= 0 and val < len(stages_pattern)
                ]
            return_patterns = [stages_pattern[i] for i in return_stages]

        if return_patterns:
            self.update_res(return_patterns)
Exemple #2
0
def override(dl, ks, v):
    """
    Recursively replace dict of list

    Args:
        dl(dict or list): dict or list to be replaced
        ks(list): list of keys
        v(str): value to be replaced
    """
    def str2num(v):
        try:
            return eval(v)
        except Exception:
            return v

    assert isinstance(dl, (list, dict)), ("{} should be a list or a dict")
    assert len(ks) > 0, ('lenght of keys should larger than 0')
    if isinstance(dl, list):
        k = str2num(ks[0])
        if len(ks) == 1:
            assert k < len(dl), ('index({}) out of range({})'.format(k, dl))
            dl[k] = str2num(v)
        else:
            override(dl[k], ks[1:], v)
    else:
        if len(ks) == 1:
            # assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl))
            if not ks[0] in dl:
                logger.warning('A new filed ({}) detected!'.format(ks[0]))
            dl[ks[0]] = str2num(v)
        else:
            if not ks[0] in dl:
                logger.warning('A new filed ({}) detected!'.format(ks[0]))
                dl[ks[0]] = {}
            override(dl[ks[0]], ks[1:], v)
Exemple #3
0
    def __init__(self, class_num, **op_dict):
        """Build OpSampler

        Raises:
            Exception: The parameter \"prob\" of operator(s) are be set error.
        """
        if not class_num:
            msg = "Please set \"Arch.class_num\" in config if use \"OpSampler\"."
            logger.error(Exception(msg))
            raise Exception(msg)

        if len(op_dict) < 1:
            msg = f"ConfigWarning: No operator in \"OpSampler\". \"OpSampler\" has been skipped."
            logger.warning(msg)

        self.ops = {}
        total_prob = 0
        for op_name in op_dict:
            param = op_dict[op_name]
            if "prob" not in param:
                msg = f"ConfigWarning: Parameter \"prob\" should be set when use operator in \"OpSampler\". The operator \"{op_name}\"'s prob has been set \"0\"."
                logger.warning(msg)
            prob = param.pop("prob", 0)
            total_prob += prob
            param.update({"class_num": class_num})
            op = eval(op_name)(**param)
            self.ops.update({op: prob})

        if total_prob > 1:
            msg = f"ConfigError: The total prob of operators in \"OpSampler\" should be less 1."
            logger.error(Exception(msg))
            raise Exception(msg)

        # add "None Op" when total_prob < 1, "None Op" do nothing
        self.ops[None] = 1 - total_prob
Exemple #4
0
def create_model(architecture, image, classes_num, config, is_train):
    """
    Create a model

    Args:
        architecture(dict): architecture information,
            name(such as ResNet50) is needed
        image(variable): model input variable
        classes_num(int): num of classes
        config(dict): model config

    Returns:
        out(variable): model output variable
    """
    name = architecture["name"]
    params = architecture.get("params", {})

    if "data_format" in config:
        params["data_format"] = config["data_format"]
        data_format = config["data_format"]
    input_image_channel = config.get('image_shape', [3, 224, 224])[0]
    if input_image_channel != 3:
        logger.warning(
            "Input image channel is changed to {}, maybe for better speed-up".
            format(input_image_channel))
        params["input_image_channel"] = input_image_channel
    if "is_test" in params:
        params['is_test'] = not is_train
    model = architectures.__dict__[name](class_dim=classes_num, **params)

    out = model(image)
    return out
Exemple #5
0
    def normal_predict(self):
        image_list = get_image_list(self.args.image_file)
        batch_input_list = []
        img_name_list = []
        cnt = 0
        for idx, img_path in enumerate(image_list):
            img = cv2.imread(img_path)
            if img is None:
                logger.warning(
                    "Image file failed to read and has been skipped. The path: {}".
                    format(img_path))
                continue
            else:
                img = img[:, :, ::-1]
                img = preprocess(img, args)
                batch_input_list.append(img)
                img_name = img_path.split("/")[-1]
                img_name_list.append(img_name)
                cnt += 1

            if cnt % args.batch_size == 0 or (idx + 1) == len(image_list):
                batch_outputs = self.predict(np.array(batch_input_list))
                batch_result_list = postprocess(batch_outputs, self.args.top_k)

                for number, result_dict in enumerate(batch_result_list):
                    filename = img_name_list[number]
                    clas_ids = result_dict["clas_ids"]
                    scores_str = "[{}]".format(", ".join("{:.2f}".format(
                        r) for r in result_dict["scores"]))
                    print(
                        "File:{}, Top-{} result: class id(s): {}, score(s): {}".
                        format(filename, self.args.top_k, clas_ids,
                               scores_str))
                batch_input_list = []
                img_name_list = []
Exemple #6
0
def build_optimizer(config, epochs, step_each_epoch, model_list=None):
    config = copy.deepcopy(config)
    # step1 build lr
    lr = build_lr_scheduler(config.pop('lr'), epochs, step_each_epoch)
    logger.debug("build lr ({}) success..".format(lr))
    # step2 build regularization
    if 'regularizer' in config and config['regularizer'] is not None:
        if 'weight_decay' in config:
            logger.warning(
                "ConfigError: Only one of regularizer and weight_decay can be set in Optimizer Config. \"weight_decay\" has been ignored."
            )
        reg_config = config.pop('regularizer')
        reg_name = reg_config.pop('name') + 'Decay'
        reg = getattr(paddle.regularizer, reg_name)(**reg_config)
        config["weight_decay"] = reg
        logger.debug("build regularizer ({}) success..".format(reg))
    # step3 build optimizer
    optim_name = config.pop('name')
    if 'clip_norm' in config:
        clip_norm = config.pop('clip_norm')
        grad_clip = paddle.nn.ClipGradByNorm(clip_norm=clip_norm)
    else:
        grad_clip = None
    optim = getattr(optimizer, optim_name)(learning_rate=lr,
                                           grad_clip=grad_clip,
                                           **config)(model_list=model_list)
    logger.debug("build optimizer ({}) success..".format(optim))
    return optim, lr
def main(args):
    benchmark_file_list = args.benchmark_file_list
    model_infos = parse_model_infos(benchmark_file_list)
    right_models = []
    wrong_models = []

    for model_info in model_infos:
        try:
            pretrained_url = model_info["pretrain_path"]
            fname = _download(pretrained_url, args.pretrained_dir)
            pretrained_path = os.path.splitext(fname)[0]
            if pretrained_url.endswith("tar"):
                path = _decompress(fname)
                pretrained_path = os.path.join(
                    os.path.dirname(pretrained_path), path)

            args.config = model_info["config_path"]
            args.override = [
                "pretrained_model={}".format(pretrained_path),
                "VALID.batch_size=256",
                "VALID.num_workers=16",
                "load_static_weights=True",
                "print_interval=100",
            ]

            manager = Manager()
            return_dict = manager.dict()

            # A hack method to avoid name conflict.
            # Multi-process maybe a better method here.
            # More details can be seen in branch 2.0-beta.
            # TODO: fluid needs to be removed in the future.
            with paddle.utils.unique_name.guard():
                eval.main(args, return_dict)

            top1_acc = return_dict.get("top1_acc", 0.0)
        except Exception as e:
            logger.error(e)
            top1_acc = 0.0
        diff = abs(top1_acc - model_info["top1_acc"])
        if diff > 0.001:
            err_info = "[{}]Top-1 acc diff should be <= 0.001 but got diff {}, gt acc: {}, eval acc: {}".format(
                model_info["model_name"], diff, model_info["top1_acc"],
                top1_acc)
            logger.warning(err_info)
            wrong_models.append(model_info["model_name"])
        else:
            right_models.append(model_info["model_name"])

    logger.info("[number of right models: {}, they are: {}".format(
        len(right_models), right_models))
    logger.info("[number of wrong models: {}, they are: {}".format(
        len(wrong_models), wrong_models))
Exemple #8
0
def _mkdir_if_not_exist(path):
    """
    mkdir if not exists, ignore the exception when multiprocess mkdir together
    """
    if not os.path.exists(path):
        try:
            os.makedirs(path)
        except OSError as e:
            if e.errno == errno.EEXIST and os.path.isdir(path):
                logger.warning(
                    'be happy if some process has already created {}'.format(
                        path))
            else:
                raise OSError('Failed to mkdir {}'.format(path))
Exemple #9
0
def load_params(exe, prog, path, ignore_params=None):
    """
    Load model from the given path.
    Args:
        exe (fluid.Executor): The fluid.Executor object.
        prog (fluid.Program): load weight to which Program object.
        path (string): URL string or loca model path.
        ignore_params (list): ignore variable to load when finetuning.
            It can be specified by finetune_exclude_pretrained_params
            and the usage can refer to the document
            docs/advanced_tutorials/TRANSFER_LEARNING.md
    """
    if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')):
        raise ValueError("Model pretrain path {} does not "
                         "exists.".format(path))

    logger.info(
        logger.coloring('Loading parameters from {}...'.format(path),
                        'HEADER'))

    ignore_set = set()
    state = _load_state(path)

    # ignore the parameter which mismatch the shape
    # between the model and pretrain weight.
    all_var_shape = {}
    for block in prog.blocks:
        for param in block.all_parameters():
            all_var_shape[param.name] = param.shape
    ignore_set.update([
        name for name, shape in all_var_shape.items()
        if name in state and shape != state[name].shape
    ])

    if ignore_params:
        all_var_names = [var.name for var in prog.list_vars()]
        ignore_list = filter(
            lambda var: any([re.match(name, var) for name in ignore_params]),
            all_var_names)
        ignore_set.update(list(ignore_list))

    if len(ignore_set) > 0:
        for k in ignore_set:
            if k in state:
                logger.warning(
                    'variable {} is already excluded automatically'.format(k))
                del state[k]

    paddle.static.set_program_state(prog, state)
 def __init__(self,
              step_each_epoch,
              decay_epochs,
              values,
              epochs,
              warmup_epoch=0,
              warmup_start_lr=0.0,
              last_epoch=-1,
              **kwargs):
     super().__init__()
     if warmup_epoch >= epochs:
         msg = f"When using warm up, the value of \"Global.epochs\" must be greater than value of \"Optimizer.lr.warmup_epoch\". The value of \"Optimizer.lr.warmup_epoch\" has been set to {epochs}."
         logger.warning(msg)
         warmup_epoch = epochs
     self.boundaries = [step_each_epoch * e for e in decay_epochs]
     self.values = values
     self.last_epoch = last_epoch
     self.warmup_steps = round(warmup_epoch * step_each_epoch)
     self.warmup_start_lr = warmup_start_lr
Exemple #11
0
def create_model(architecture, image, classes_num, config, is_train):
    """
    Create a model

    Args:
        architecture(dict): architecture information,
            name(such as ResNet50) is needed
        image(variable): model input variable
        classes_num(int): num of classes
        config(dict): model config

    Returns:
        out(variable): model output variable
    """
    use_pure_fp16 = config.get("use_pure_fp16", False)
    name = architecture["name"]
    params = architecture.get("params", {})

    data_format = "NCHW"
    if "data_format" in config:
        params["data_format"] = config["data_format"]
        data_format = config["data_format"]
    input_image_channel = config.get('image_shape', [3, 224, 224])[0]
    if input_image_channel != 3:
        logger.warning(
            "Input image channel is changed to {}, maybe for better speed-up".
            format(input_image_channel))
        params["input_image_channel"] = input_image_channel
    if "is_test" in params:
        params['is_test'] = not is_train
    model = architectures.__dict__[name](class_dim=classes_num, **params)

    if use_pure_fp16 and not config.get("use_dali", False):
        image = image.astype('float16')
    if data_format == "NHWC":
        image = paddle.tensor.transpose(image, [0, 2, 3, 1])
        image.stop_gradient = True
    out = model(image)
    if config.get("use_pure_fp16", False):
        cast_model_to_fp16(paddle.static.default_main_program())
        out = out.astype('float32')
    return out
 def __init__(self,
              learning_rate,
              step_each_epoch,
              epochs,
              eta_min=0.0,
              warmup_epoch=0,
              warmup_start_lr=0.0,
              last_epoch=-1,
              **kwargs):
     super().__init__()
     if warmup_epoch >= epochs:
         msg = f"When using warm up, the value of \"Global.epochs\" must be greater than value of \"Optimizer.lr.warmup_epoch\". The value of \"Optimizer.lr.warmup_epoch\" has been set to {epochs}."
         logger.warning(msg)
         warmup_epoch = epochs
     self.learning_rate = learning_rate
     self.T_max = (epochs - warmup_epoch) * step_each_epoch
     self.eta_min = eta_min
     self.last_epoch = last_epoch
     self.warmup_steps = round(warmup_epoch * step_each_epoch)
     self.warmup_start_lr = warmup_start_lr
Exemple #13
0
    def __init__(self, interpolation=None, backend="cv2"):
        _cv2_interp_from_str = {
            'nearest': cv2.INTER_NEAREST,
            'bilinear': cv2.INTER_LINEAR,
            'area': cv2.INTER_AREA,
            'bicubic': cv2.INTER_CUBIC,
            'lanczos': cv2.INTER_LANCZOS4
        }
        _pil_interp_from_str = {
            'nearest': Image.NEAREST,
            'bilinear': Image.BILINEAR,
            'bicubic': Image.BICUBIC,
            'box': Image.BOX,
            'lanczos': Image.LANCZOS,
            'hamming': Image.HAMMING
        }

        def _pil_resize(src, size, resample):
            pil_img = Image.fromarray(src)
            pil_img = pil_img.resize(size, resample)
            return np.asarray(pil_img)

        if backend.lower() == "cv2":
            if isinstance(interpolation, str):
                interpolation = _cv2_interp_from_str[interpolation.lower()]
            # compatible with opencv < version 4.4.0
            elif interpolation is None:
                interpolation = cv2.INTER_LINEAR
            self.resize_func = partial(cv2.resize, interpolation=interpolation)
        elif backend.lower() == "pil":
            if isinstance(interpolation, str):
                interpolation = _pil_interp_from_str[interpolation.lower()]
            self.resize_func = partial(_pil_resize, resample=interpolation)
        else:
            logger.warning(
                f"The backend of Resize only support \"cv2\" or \"PIL\". \"f{backend}\" is unavailable. Use \"cv2\" instead."
            )
            self.resize_func = cv2.resize
def parse_pattern_str(
    pattern: str, parent_layer: nn.Layer
) -> Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]:
    """parse the string type pattern.

    Args:
        pattern (str): The pattern to discribe layer.
        parent_layer (nn.Layer): The root layer relative to the pattern.

    Returns:
        Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: None if failed. If successfully, the members are layers parsed in order:
                                                                [
                                                                    {"layer": first layer, "name": first layer's name parsed, "index": first layer's index parsed if exist},
                                                                    {"layer": second layer, "name": second layer's name parsed, "index": second layer's index parsed if exist},
                                                                    ...
                                                                ]
    """

    pattern_list = pattern.split(".")
    if not pattern_list:
        msg = f"The pattern('{pattern}') is illegal. Please check and retry."
        logger.warning(msg)
        return None

    layer_list = []
    while len(pattern_list) > 0:
        if '[' in pattern_list[0]:
            target_layer_name = pattern_list[0].split('[')[0]
            target_layer_index = pattern_list[0].split('[')[1].split(']')[0]
        else:
            target_layer_name = pattern_list[0]
            target_layer_index = None

        target_layer = getattr(parent_layer, target_layer_name, None)

        if target_layer is None:
            msg = f"Not found layer named('{target_layer_name}') specifed in pattern('{pattern}')."
            logger.warning(msg)
            return None

        if target_layer_index and target_layer:
            if int(target_layer_index) < 0 or int(target_layer_index) >= len(
                    target_layer):
                msg = f"Not found layer by index('{target_layer_index}') specifed in pattern('{pattern}'). The index should < {len(target_layer)} and > 0."
                logger.warning(msg)
                return None

            target_layer = target_layer[target_layer_index]

        layer_list.append({
            "layer": target_layer,
            "name": target_layer_name,
            "index": target_layer_index
        })

        pattern_list = pattern_list[1:]
        parent_layer = target_layer
    return layer_list
    def stop_after(self, stop_layer_name: str) -> bool:
        """stop forward and backward after 'stop_layer_name'.

        Args:
            stop_layer_name (str): The name of layer that stop forward and backward after this layer.

        Returns:
            bool: 'True' if successful, 'False' otherwise.
        """

        layer_list = parse_pattern_str(stop_layer_name, self)
        if not layer_list:
            return False

        parent_layer = self
        for layer_dict in layer_list:
            name, index = layer_dict["name"], layer_dict["index"]
            if not set_identity(parent_layer, name, index):
                msg = f"Failed to set the layers that after stop_layer_name('{stop_layer_name}') to IdentityLayer. The error layer's name is '{name}'."
                logger.warning(msg)
                return False
            parent_layer = layer_dict["layer"]

        return True
Exemple #16
0
def run(dataloader,
        exe,
        program,
        feeds,
        fetchs,
        epoch=0,
        mode='train',
        config=None,
        vdl_writer=None,
        lr_scheduler=None):
    """
    Feed data to the model and fetch the measures and loss

    Args:
        dataloader(paddle io dataloader):
        exe():
        program():
        fetchs(dict): dict of measures and the loss
        epoch(int): epoch of training or validation
        model(str): log only

    Returns:
    """
    fetch_list = [f[0] for f in fetchs.values()]
    metric_list = [
        ("lr", AverageMeter('lr', 'f', postfix=",", need_avg=False)),
        ("batch_time", AverageMeter('batch_cost', '.5f', postfix=" s,")),
        ("reader_time", AverageMeter('reader_cost', '.5f', postfix=" s,")),
    ]
    topk_name = 'top{}'.format(config.topk)
    metric_list.insert(0, ("loss", fetchs["loss"][1]))
    use_mix = config.get("use_mix", False) and mode == "train"
    if not use_mix:
        metric_list.insert(0, (topk_name, fetchs[topk_name][1]))
        metric_list.insert(0, ("top1", fetchs["top1"][1]))

    metric_list = OrderedDict(metric_list)

    for m in metric_list.values():
        m.reset()

    use_dali = config.get('use_dali', False)
    dataloader = dataloader if use_dali else dataloader()
    tic = time.time()

    idx = 0
    batch_size = None
    while True:
        # The DALI maybe raise RuntimeError for some particular images, such as ImageNet1k/n04418357_26036.JPEG
        try:
            batch = next(dataloader)
        except StopIteration:
            break
        except RuntimeError:
            logger.warning(
                "Except RuntimeError when reading data from dataloader, try to read once again..."
            )
            continue
        idx += 1
        # ignore the warmup iters
        if idx == 5:
            metric_list["batch_time"].reset()
            metric_list["reader_time"].reset()

        metric_list['reader_time'].update(time.time() - tic)

        if use_dali:
            batch_size = batch[0]["feed_image"].shape()[0]
            feed_dict = batch[0]
        else:
            batch_size = batch[0].shape()[0]
            feed_dict = {
                key.name: batch[idx]
                for idx, key in enumerate(feeds.values())
            }
        metrics = exe.run(program=program,
                          feed=feed_dict,
                          fetch_list=fetch_list)

        for name, m in zip(fetchs.keys(), metrics):
            metric_list[name].update(np.mean(m), batch_size)
        metric_list["batch_time"].update(time.time() - tic)
        if mode == "train":
            metric_list['lr'].update(lr_scheduler.get_lr())

        fetchs_str = ' '.join([
            str(metric_list[key].mean)
            if "time" in key else str(metric_list[key].value)
            for key in metric_list
        ])
        ips_info = " ips: {:.5f} images/sec.".format(
            batch_size / metric_list["batch_time"].avg)
        fetchs_str += ips_info

        if lr_scheduler is not None:
            if lr_scheduler.update_specified:
                curr_global_counter = lr_scheduler.step_each_epoch * epoch + idx
                update = max(
                    0, curr_global_counter - lr_scheduler.update_start_step
                ) % lr_scheduler.update_step_interval == 0
                if update:
                    lr_scheduler.step()
            else:
                lr_scheduler.step()

        if vdl_writer:
            global total_step
            logger.scaler('loss', metrics[0][0], total_step, vdl_writer)
            total_step += 1
        if mode == 'valid':
            if idx % config.get('print_interval', 10) == 0:
                logger.info("{:s} step:{:<4d} {:s}".format(
                    mode, idx, fetchs_str))
        else:
            epoch_str = "epoch:{:<3d}".format(epoch)
            step_str = "{:s} step:{:<4d}".format(mode, idx)

            if idx % config.get('print_interval', 10) == 0:
                logger.info("{:s} {:s} {:s}".format(
                    logger.coloring(epoch_str, "HEADER")
                    if idx == 0 else epoch_str,
                    logger.coloring(step_str, "PURPLE"),
                    logger.coloring(fetchs_str, 'OKGREEN')))

        tic = time.time()

    end_str = ' '.join([str(m.mean) for m in metric_list.values()] +
                       [metric_list["batch_time"].total])
    ips_info = "ips: {:.5f} images/sec.".format(
        batch_size * metric_list["batch_time"].count /
        metric_list["batch_time"].sum)
    if mode == 'valid':
        logger.info("END {:s} {:s} {:s}".format(mode, end_str, ips_info))
    else:
        end_epoch_str = "END epoch:{:<3d}".format(epoch)
        logger.info("{:s} {:s} {:s} {:s}".format(end_epoch_str, mode, end_str,
                                                 ips_info))
    if use_dali:
        dataloader.reset()

    # return top1_acc in order to save the best model
    if mode == 'valid':
        return fetchs["top1"][1].avg
Exemple #17
0
def run(dataloader,
        exe,
        program,
        feeds,
        fetchs,
        epoch=0,
        mode='train',
        config=None,
        vdl_writer=None,
        lr_scheduler=None,
        profiler_options=None):
    """
    Feed data to the model and fetch the measures and loss

    Args:
        dataloader(paddle io dataloader):
        exe():
        program():
        fetchs(dict): dict of measures and the loss
        epoch(int): epoch of training or evaluation
        model(str): log only

    Returns:
    """
    fetch_list = [f[0] for f in fetchs.values()]
    metric_dict = OrderedDict([("lr",
                                AverageMeter('lr',
                                             'f',
                                             postfix=",",
                                             need_avg=False))])

    for k in fetchs:
        metric_dict[k] = fetchs[k][1]

    metric_dict["batch_time"] = AverageMeter('batch_cost',
                                             '.5f',
                                             postfix=" s,")
    metric_dict["reader_time"] = AverageMeter('reader_cost',
                                              '.5f',
                                              postfix=" s,")

    for m in metric_dict.values():
        m.reset()

    use_dali = config["Global"].get('use_dali', False)
    tic = time.time()

    if not use_dali:
        dataloader = dataloader()

    idx = 0
    batch_size = None
    while True:
        # The DALI maybe raise RuntimeError for some particular images, such as ImageNet1k/n04418357_26036.JPEG
        try:
            batch = next(dataloader)
        except StopIteration:
            break
        except RuntimeError:
            logger.warning(
                "Except RuntimeError when reading data from dataloader, try to read once again..."
            )
            continue
        idx += 1
        # ignore the warmup iters
        if idx == 5:
            metric_dict["batch_time"].reset()
            metric_dict["reader_time"].reset()

        metric_dict['reader_time'].update(time.time() - tic)

        profiler.add_profiler_step(profiler_options)

        if use_dali:
            batch_size = batch[0]["data"].shape()[0]
            feed_dict = batch[0]
        else:
            batch_size = batch[0].shape()[0]
            feed_dict = {
                key.name: batch[idx]
                for idx, key in enumerate(feeds.values())
            }

        metrics = exe.run(program=program,
                          feed=feed_dict,
                          fetch_list=fetch_list)

        for name, m in zip(fetchs.keys(), metrics):
            metric_dict[name].update(np.mean(m), batch_size)
        metric_dict["batch_time"].update(time.time() - tic)
        if mode == "train":
            metric_dict['lr'].update(lr_scheduler.get_lr())

        fetchs_str = ' '.join([
            str(metric_dict[key].mean)
            if "time" in key else str(metric_dict[key].value)
            for key in metric_dict
        ])
        ips_info = " ips: {:.5f} images/sec.".format(
            batch_size / metric_dict["batch_time"].avg)
        fetchs_str += ips_info

        if lr_scheduler is not None:
            lr_scheduler.step()

        if vdl_writer:
            global total_step
            logger.scaler('loss', metrics[0][0], total_step, vdl_writer)
            total_step += 1
        if mode == 'eval':
            if idx % config.get('print_interval', 10) == 0:
                logger.info("{:s} step:{:<4d} {:s}".format(
                    mode, idx, fetchs_str))
        else:
            epoch_str = "epoch:{:<3d}".format(epoch)
            step_str = "{:s} step:{:<4d}".format(mode, idx)

            if idx % config.get('print_interval', 10) == 0:
                logger.info("{:s} {:s} {:s}".format(epoch_str, step_str,
                                                    fetchs_str))

        tic = time.time()

    end_str = ' '.join([str(m.mean) for m in metric_dict.values()] +
                       [metric_dict["batch_time"].total])
    ips_info = "ips: {:.5f} images/sec.".format(batch_size /
                                                metric_dict["batch_time"].avg)
    if mode == 'eval':
        logger.info("END {:s} {:s} {:s}".format(mode, end_str, ips_info))
    else:
        end_epoch_str = "END epoch:{:<3d}".format(epoch)
        logger.info("{:s} {:s} {:s} {:s}".format(end_epoch_str, mode, end_str,
                                                 ips_info))
    if use_dali:
        dataloader.reset()

    # return top1_acc in order to save the best model
    if mode == 'eval':
        return fetchs["top1"][1].avg
Exemple #18
0
def main(args):
    image_path_list = get_image_list(args.image_file)
    headers = {"Content-type": "application/json"}

    cnt = 0
    predict_time = 0
    all_score = 0.0
    start_time = time.time()

    batch_input_list = []
    img_name_list = []
    cnt = 0
    for idx, img_path in enumerate(image_path_list):
        img = cv2.imread(img_path)
        if img is None:
            logger.warning(
                "Image file failed to read and has been skipped. The path: {}".
                format(img_path))
            continue
        else:
            img = img[:, :, ::-1]
            data = preprocess(img, args)
            batch_input_list.append(data)
            img_name = img_path.split('/')[-1]
            img_name_list.append(img_name)
            cnt += 1
        if cnt % args.batch_size == 0 or (idx + 1) == len(image_path_list):
            batch_input = np.array(batch_input_list)
            b64str, revert_shape = np_to_b64(batch_input)
            data = {
                "images": b64str,
                "revert_params": {
                    "shape": revert_shape,
                    "dtype": str(batch_input.dtype)
                },
                "top_k": args.top_k
            }
            try:
                r = requests.post(url=args.server_url,
                                  headers=headers,
                                  data=json.dumps(data))
                r.raise_for_status
                if r.json()["status"] != "000":
                    msg = r.json()["msg"]
                    raise Exception(msg)
            except Exception as e:
                logger.error("{}, in file(s): {} etc.".format(
                    e, img_name_list[0]))
                continue
            else:
                results = r.json()["results"]
                batch_result_list = results["prediction"]
                elapse = results["elapse"]

                cnt += len(batch_result_list)
                predict_time += elapse

                for number, result_list in enumerate(batch_result_list):
                    all_score += result_list["scores"][0]
                    result_str = ""
                    for i in range(len(result_list["clas_ids"])):
                        result_str += "{}: {:.2f}\t".format(
                            result_list["clas_ids"][i],
                            result_list["scores"][i])
                    logger.info("File:{}, The top-{} result(s): {}".format(
                        img_name_list[number], args.top_k, result_str))

            finally:
                batch_input_list = []
                img_name_list = []

    total_time = time.time() - start_time
    logger.info("The average time of prediction cost: {:.3f} s/image".format(
        predict_time / cnt))
    logger.info("The average time cost: {:.3f} s/image".format(total_time /
                                                               cnt))
    logger.info("The average top-1 score: {:.3f}".format(all_score / cnt))
Exemple #19
0
    def __init__(self, config, mode="train"):
        assert mode in ["train", "eval", "infer", "export"]
        self.mode = mode
        self.config = config
        self.eval_mode = self.config["Global"].get("eval_mode",
                                                   "classification")
        if "Head" in self.config["Arch"] or self.config["Arch"].get(
                "is_rec", False):
            self.is_rec = True
        else:
            self.is_rec = False

        # set seed
        seed = self.config["Global"].get("seed", False)
        if seed or seed == 0:
            assert isinstance(seed, int), "The 'seed' must be a integer!"
            paddle.seed(seed)
            np.random.seed(seed)
            random.seed(seed)

        # init logger
        self.output_dir = self.config['Global']['output_dir']
        log_file = os.path.join(self.output_dir, self.config["Arch"]["name"],
                                f"{mode}.log")
        init_logger(log_file=log_file)
        print_config(config)

        # init train_func and eval_func
        assert self.eval_mode in ["classification", "retrieval"], logger.error(
            "Invalid eval mode: {}".format(self.eval_mode))
        self.train_epoch_func = train_epoch
        self.eval_func = getattr(evaluation, self.eval_mode + "_eval")

        self.use_dali = self.config['Global'].get("use_dali", False)

        # for visualdl
        self.vdl_writer = None
        if self.config['Global'][
                'use_visualdl'] and mode == "train" and dist.get_rank() == 0:
            vdl_writer_path = os.path.join(self.output_dir, "vdl")
            if not os.path.exists(vdl_writer_path):
                os.makedirs(vdl_writer_path)
            self.vdl_writer = LogWriter(logdir=vdl_writer_path)

        # set device
        assert self.config["Global"]["device"] in [
            "cpu", "gpu", "xpu", "npu", "mlu"
        ]
        self.device = paddle.set_device(self.config["Global"]["device"])
        logger.info('train with paddle {} and device {}'.format(
            paddle.__version__, self.device))

        # AMP training
        self.amp = True if "AMP" in self.config and self.mode == "train" else False
        if self.amp and self.config["AMP"] is not None:
            self.scale_loss = self.config["AMP"].get("scale_loss", 1.0)
            self.use_dynamic_loss_scaling = self.config["AMP"].get(
                "use_dynamic_loss_scaling", False)
        else:
            self.scale_loss = 1.0
            self.use_dynamic_loss_scaling = False
        if self.amp:
            AMP_RELATED_FLAGS_SETTING = {
                'FLAGS_max_inplace_grad_add': 8,
            }
            if paddle.is_compiled_with_cuda():
                AMP_RELATED_FLAGS_SETTING.update(
                    {'FLAGS_cudnn_batchnorm_spatial_persistent': 1})
            paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING)

        if "class_num" in config["Global"]:
            global_class_num = config["Global"]["class_num"]
            if "class_num" not in config["Arch"]:
                config["Arch"]["class_num"] = global_class_num
                msg = f"The Global.class_num will be deprecated. Please use Arch.class_num instead. Arch.class_num has been set to {global_class_num}."
            else:
                msg = "The Global.class_num will be deprecated. Please use Arch.class_num instead. The Global.class_num has been ignored."
            logger.warning(msg)
        #TODO(gaotingquan): support rec
        class_num = config["Arch"].get("class_num", None)
        self.config["DataLoader"].update({"class_num": class_num})
        # build dataloader
        if self.mode == 'train':
            self.train_dataloader = build_dataloader(self.config["DataLoader"],
                                                     "Train", self.device,
                                                     self.use_dali)
        if self.mode == "eval" or (self.mode == "train" and
                                   self.config["Global"]["eval_during_train"]):
            if self.eval_mode == "classification":
                self.eval_dataloader = build_dataloader(
                    self.config["DataLoader"], "Eval", self.device,
                    self.use_dali)
            elif self.eval_mode == "retrieval":
                self.gallery_query_dataloader = None
                if len(self.config["DataLoader"]["Eval"].keys()) == 1:
                    key = list(self.config["DataLoader"]["Eval"].keys())[0]
                    self.gallery_query_dataloader = build_dataloader(
                        self.config["DataLoader"]["Eval"], key, self.device,
                        self.use_dali)
                else:
                    self.gallery_dataloader = build_dataloader(
                        self.config["DataLoader"]["Eval"], "Gallery",
                        self.device, self.use_dali)
                    self.query_dataloader = build_dataloader(
                        self.config["DataLoader"]["Eval"], "Query",
                        self.device, self.use_dali)

        # build loss
        if self.mode == "train":
            loss_info = self.config["Loss"]["Train"]
            self.train_loss_func = build_loss(loss_info)
        if self.mode == "eval" or (self.mode == "train" and
                                   self.config["Global"]["eval_during_train"]):
            loss_config = self.config.get("Loss", None)
            if loss_config is not None:
                loss_config = loss_config.get("Eval")
                if loss_config is not None:
                    self.eval_loss_func = build_loss(loss_config)
                else:
                    self.eval_loss_func = None
            else:
                self.eval_loss_func = None

        # build metric
        if self.mode == 'train':
            metric_config = self.config.get("Metric")
            if metric_config is not None:
                metric_config = metric_config.get("Train")
                if metric_config is not None:
                    if hasattr(
                            self.train_dataloader, "collate_fn"
                    ) and self.train_dataloader.collate_fn is not None:
                        for m_idx, m in enumerate(metric_config):
                            if "TopkAcc" in m:
                                msg = f"'TopkAcc' metric can not be used when setting 'batch_transform_ops' in config. The 'TopkAcc' metric has been removed."
                                logger.warning(msg)
                                break
                        metric_config.pop(m_idx)
                    self.train_metric_func = build_metrics(metric_config)
                else:
                    self.train_metric_func = None
        else:
            self.train_metric_func = None

        if self.mode == "eval" or (self.mode == "train" and
                                   self.config["Global"]["eval_during_train"]):
            metric_config = self.config.get("Metric")
            if self.eval_mode == "classification":
                if metric_config is not None:
                    metric_config = metric_config.get("Eval")
                    if metric_config is not None:
                        self.eval_metric_func = build_metrics(metric_config)
            elif self.eval_mode == "retrieval":
                if metric_config is None:
                    metric_config = [{"name": "Recallk", "topk": (1, 5)}]
                else:
                    metric_config = metric_config["Eval"]
                self.eval_metric_func = build_metrics(metric_config)
        else:
            self.eval_metric_func = None

        # build model
        self.model = build_model(self.config)
        # set @to_static for benchmark, skip this by default.
        apply_to_static(self.config, self.model)

        # load_pretrain
        if self.config["Global"]["pretrained_model"] is not None:
            if self.config["Global"]["pretrained_model"].startswith("http"):
                load_dygraph_pretrain_from_url(
                    self.model, self.config["Global"]["pretrained_model"])
            else:
                load_dygraph_pretrain(
                    self.model, self.config["Global"]["pretrained_model"])

        # build optimizer
        if self.mode == 'train':
            self.optimizer, self.lr_sch = build_optimizer(
                self.config["Optimizer"], self.config["Global"]["epochs"],
                len(self.train_dataloader), [self.model])

        # for amp training
        if self.amp:
            self.scaler = paddle.amp.GradScaler(
                init_loss_scaling=self.scale_loss,
                use_dynamic_loss_scaling=self.use_dynamic_loss_scaling)
            amp_level = self.config['AMP'].get("level", "O1")
            if amp_level not in ["O1", "O2"]:
                msg = "[Parameter Error]: The optimize level of AMP only support 'O1' and 'O2'. The level has been set 'O1'."
                logger.warning(msg)
                self.config['AMP']["level"] = "O1"
                amp_level = "O1"
            self.model, self.optimizer = paddle.amp.decorate(
                models=self.model,
                optimizers=self.optimizer,
                level=amp_level,
                save_dtype='float32')

        # for distributed
        world_size = dist.get_world_size()
        self.config["Global"]["distributed"] = world_size != 1
        if world_size != 4 and self.mode == "train":
            msg = f"The training strategy in config files provided by PaddleClas is based on 4 gpus. But the number of gpus is {world_size} in current training. Please modify the stategy (learning rate, batch size and so on) if use config files in PaddleClas to train."
            logger.warning(msg)
        if self.config["Global"]["distributed"]:
            dist.init_parallel_env()
            self.model = paddle.DataParallel(self.model)

        # build postprocess for infer
        if self.mode == 'infer':
            self.preprocess_func = create_operators(
                self.config["Infer"]["transforms"])
            self.postprocess_func = build_postprocess(
                self.config["Infer"]["PostProcess"])