Python compute_flops_and_params Beispiele

Programmiersprache: Python

Namespace / Paketname: tools.utils

Methode / Funktion: compute_flops_and_params

Beispiele auf hotexamples.com: 6

Python compute_flops_and_params - 6 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die tools.utils.compute_flops_and_params, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: detect_single_image.py Projekt: zgcr/simpleAICV-pytorch-ImageNet-COCO-training

def inference():
    args = parse_args()
    print(f'args: {args}')

    assert args.trained_dataset_name in ['COCO', 'VOC'], 'Unsupported dataset!'
    assert args.model in models.__dict__.keys(), 'Unsupported model!'
    assert args.decoder in decode.__dict__.keys(), 'Unsupported decoder!'

    if args.use_gpu:
        # only use one Graphics card to inference
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'
        assert torch.cuda.is_available(), 'need gpu to train network!'
        torch.cuda.empty_cache()

    if args.seed:
        seed = args.seed
        os.environ['PYTHONHASHSEED'] = str(seed)
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        if args.use_gpu:
            torch.cuda.manual_seed(seed)
            torch.cuda.manual_seed_all(seed)
            # for cudnn
            cudnn.enabled = True
            cudnn.deterministic = True
            cudnn.benchmark = False

    model = models.__dict__[args.model](
        **{
            'num_classes': args.trained_num_classes,
        })
    decoder = decode.__dict__[args.decoder]()

    if args.use_gpu:
        model = model.cuda()
        decoder = decoder.cuda()

    if args.trained_model_path:
        saved_model = torch.load(args.trained_model_path,
                                 map_location=torch.device('cpu'))
        model.load_state_dict(saved_model)

    model.eval()

    flops, params = compute_flops_and_params(args, model)
    print(f'model: {args.model}, flops: {flops}, params: {params}')

    resized_img, origin_img, scale = load_image_for_detection_inference(args)
    resized_img = torch.tensor(resized_img)

    if args.use_gpu:
        resized_img = resized_img.cuda()

    out_tuples = model(resized_img.permute(2, 0, 1).float().unsqueeze(0))
    scores, classes, boxes = decoder(*out_tuples)
    scores, classes, boxes = scores.cpu(), classes.cpu(), boxes.cpu()
    boxes /= scale

    scores = scores.squeeze(0)
    classes = classes.squeeze(0)
    boxes = boxes.squeeze(0)

    scores = scores[classes > -1]
    boxes = boxes[classes > -1]
    classes = classes[classes > -1]

    boxes = boxes[scores > args.min_score_threshold]
    classes = classes[scores > args.min_score_threshold]
    scores = scores[scores > args.min_score_threshold]

    # clip boxes
    origin_h, origin_w = origin_img.shape[0], origin_img.shape[1]
    boxes[:, 0] = torch.clamp(boxes[:, 0], min=0)
    boxes[:, 1] = torch.clamp(boxes[:, 1], min=0)
    boxes[:, 2] = torch.clamp(boxes[:, 2], max=origin_w)
    boxes[:, 3] = torch.clamp(boxes[:, 3], max=origin_h)

    if args.trained_dataset_name == 'COCO':
        dataset_classes_name = COCO_CLASSES
        dataset_classes_color = COCO_CLASSES_COLOR
    else:
        dataset_classes_name = VOC_CLASSES
        dataset_classes_color = VOC_CLASSES_COLOR

    # draw all pred boxes
    for per_score, per_class_index, per_box in zip(scores, classes, boxes):
        per_score = per_score.numpy().astype(np.float32)
        per_class_index = per_class_index.numpy().astype(np.int32)
        per_box = per_box.numpy().astype(np.int32)

        class_name, class_color = dataset_classes_name[
            per_class_index], dataset_classes_color[per_class_index]

        left_top, right_bottom = (per_box[0], per_box[1]), (per_box[2],
                                                            per_box[3])
        cv2.rectangle(origin_img,
                      left_top,
                      right_bottom,
                      color=class_color,
                      thickness=2,
                      lineType=cv2.LINE_AA)

        text = f'{class_name}:{per_score:.3f}'
        text_size = cv2.getTextSize(text, 0, 0.5, thickness=1)[0]
        fill_right_bottom = (max(left_top[0] + text_size[0], right_bottom[0]),
                             left_top[1] - text_size[1] - 3)
        cv2.rectangle(origin_img,
                      left_top,
                      fill_right_bottom,
                      color=class_color,
                      thickness=-1,
                      lineType=cv2.LINE_AA)
        cv2.putText(origin_img,
                    text, (left_top[0], left_top[1] - 2),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.5,
                    color=(0, 0, 0),
                    thickness=1,
                    lineType=cv2.LINE_AA)

    if args.save_image_path:
        cv2.imwrite(os.path.join(args.save_image_path, 'detection_result.jpg'),
                    origin_img)

    if args.show_image:
        cv2.namedWindow("detection_result", cv2.WINDOW_AUTOSIZE)
        cv2.imshow('detection_result', origin_img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()

    return

Beispiel #2

Datei anzeigen

Datei: KD_classification_model.py Projekt: zgcr/simpleAICV-pytorch-ImageNet-COCO-training

def main():
    assert torch.cuda.is_available(), 'need gpu to train network!'
    torch.cuda.empty_cache()

    args = parse_args()
    sys.path.append(args.work_dir)
    from train_config import config
    log_dir = os.path.join(args.work_dir, 'log')
    checkpoint_dir = os.path.join(args.work_dir, 'checkpoints')
    resume_model = os.path.join(checkpoint_dir, 'latest.pth')

    set_seed(config.seed)

    local_rank = args.local_rank
    # start init process
    if config.distributed:
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        torch.cuda.set_device(local_rank)

    init_fn = functools.partial(worker_seed_init_fn,
                                num_workers=config.num_workers,
                                local_rank=local_rank,
                                seed=config.seed)
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        config.train_dataset, shuffle=True) if config.distributed else None
    train_loader = DataLoader(config.train_dataset,
                              batch_size=config.batch_size,
                              shuffle=(train_sampler is None),
                              pin_memory=True,
                              num_workers=config.num_workers,
                              sampler=train_sampler,
                              worker_init_fn=init_fn)
    val_sampler = torch.utils.data.distributed.DistributedSampler(
        config.val_dataset, shuffle=False) if config.distributed else None
    val_loader = DataLoader(config.val_dataset,
                            batch_size=config.batch_size,
                            shuffle=False,
                            pin_memory=True,
                            num_workers=config.num_workers,
                            sampler=val_sampler)

    if (config.distributed and local_rank == 0) or not config.distributed:
        # automatically create checkpoint folder
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)

        if not os.path.exists(log_dir):
            os.makedirs(log_dir)

    logger = get_logger('train', log_dir)

    for key, value in config.__dict__.items():
        if not key.startswith('__'):
            if key not in ['model', 'criterion']:
                log_info = f'{key}: {value}'
                logger.info(log_info) if (
                    config.distributed
                    and local_rank == 0) or not config.distributed else None

    gpus_type, gpus_num = torch.cuda.get_device_name(
    ), torch.cuda.device_count()
    log_info = f'gpus_type: {gpus_type}, gpus_num: {gpus_num}'
    logger.info(log_info) if (config.distributed and local_rank
                              == 0) or not config.distributed else None

    model = config.model.cuda()
    criterion = config.criterion

    for name in criterion.keys():
        criterion[name] = criterion[name].cuda()

    # parameters needs to be updated by the optimizer
    # buffers doesn't needs to be updated by the optimizer
    for name, param in model.named_parameters():
        log_info = f'name: {name}, grad: {param.requires_grad}'
        logger.info(log_info) if (config.distributed and local_rank
                                  == 0) or not config.distributed else None

    for name, buffer in model.named_buffers():
        log_info = f'name: {name}, grad: {buffer.requires_grad}'
        logger.info(log_info) if (config.distributed and local_rank
                                  == 0) or not config.distributed else None

    optimizer = build_optimizer(config, model)
    scheduler = build_scheduler(config, optimizer)
    model = build_training_mode(config, model, optimizer)

    start_epoch = 1
    # automatically resume model for training if checkpoint model exist
    if os.path.exists(resume_model):
        checkpoint = torch.load(resume_model, map_location=torch.device('cpu'))
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        saved_epoch = checkpoint['epoch']
        start_epoch += saved_epoch
        best_top1, loss, lr = checkpoint['best_top1'], checkpoint[
            'loss'], checkpoint['lr']

        log_info = f'resuming model from {resume_model}. resume_epoch: {saved_epoch}, best_top1: {best_top1:.3f}%, loss: {loss:.4f}, lr: {lr:.6f}'
        logger.info(log_info) if (config.distributed and local_rank
                                  == 0) or not config.distributed else None

    # calculate training time
    start_time = time.time()
    best_top1 = 0.0

    for epoch in range(start_epoch, config.epochs + 1):
        torch.cuda.empty_cache()
        train_sampler.set_epoch(epoch) if config.distributed else None
        top1, top5, loss = train_KD(train_loader, model, criterion, optimizer,
                                    scheduler, epoch, logger, config)
        log_info = f'train: epoch {epoch:0>3d}, top1: {top1:.2f}%, top5: {top5:.2f}%, total_loss: {loss:.2f}'
        logger.info(log_info) if (config.distributed and local_rank
                                  == 0) or not config.distributed else None

        top1, top5, loss = validate_KD(val_loader, model, criterion)
        log_info = f'eval: epoch: {epoch:0>3d}, top1: {top1:.2f}%, top5: {top5:.2f}%, total_loss: {loss:.2f}'
        logger.info(log_info) if (config.distributed and local_rank
                                  == 0) or not config.distributed else None

        if (config.distributed and local_rank == 0) or not config.distributed:
            # save best top1 model and each epoch checkpoint
            if top1 > best_top1:
                torch.save(model.module.student.state_dict(),
                           os.path.join(checkpoint_dir, 'best_student.pth'))
                best_top1 = top1

            torch.save(
                {
                    'epoch': epoch,
                    'best_top1': best_top1,
                    'loss': loss,
                    'lr': scheduler.get_lr()[0],
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'scheduler_state_dict': scheduler.state_dict(),
                }, os.path.join(checkpoint_dir, 'latest.pth'))

            if os.path.exists(os.path.join(checkpoint_dir, 'best.pth')):
                os.rename(
                    os.path.join(checkpoint_dir, 'best_student'),
                    os.path.join(
                        checkpoint_dir,
                        f'{config.student}-epoch{epoch}-top1{best_top1:.3f}.pth'
                    ))

    training_time = (time.time() - start_time) / 3600
    flops, params = compute_flops_and_params(config, model)
    log_info = f'train done. teacher: {config.teacher}, student: {config.student}, total_flops: {flops}, total_params: {params}, training time: {training_time:.3f} hours, best_top1: {best_top1:.3f}%'
    logger.info(log_info) if (config.distributed and local_rank
                              == 0) or not config.distributed else None

Beispiel #3

Datei anzeigen

def inference():
    args = parse_args()
    print(f'args: {args}')

    assert args.trained_dataset_name in ['COCO', 'VOC'], 'Unsupported dataset!'
    assert args.model in models.__dict__.keys(), 'Unsupported model!'
    assert args.decoder in decode.__dict__.keys(), 'Unsupported decoder!'

    if args.use_gpu:
        # only use one Graphics card to inference
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'
        assert torch.cuda.is_available(), 'need gpu to train network!'
        torch.cuda.empty_cache()

    if args.seed:
        seed = args.seed
        os.environ['PYTHONHASHSEED'] = str(seed)
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        if args.use_gpu:
            torch.cuda.manual_seed(seed)
            torch.cuda.manual_seed_all(seed)
            # for cudnn
            cudnn.enabled = True
            cudnn.deterministic = True
            cudnn.benchmark = False

    model = models.__dict__[args.model](
        **{
            'num_classes': args.trained_num_classes,
        })
    decoder = decode.__dict__[args.decoder]()

    if args.use_gpu:
        model = model.cuda()
        decoder = decoder.cuda()

    if args.trained_model_path:
        saved_model = torch.load(args.trained_model_path,
                                 map_location=torch.device('cpu'))
        model.load_state_dict(saved_model)

    model.eval()

    flops, params = compute_flops_and_params(args, model)
    print(f'model: {args.model}, flops: {flops}, params: {params}')

    video = cv2.VideoCapture(args.test_video_path)

    if not video.isOpened():
        raise IOError("Couldn't open video!")

    # video_FourCC = int(video.get(cv2.CAP_PROP_FOURCC))
    video_FourCC = cv2.VideoWriter_fourcc(*"XVID")
    video_fps = video.get(cv2.CAP_PROP_FPS)
    video_size = (
        int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),
        int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)),
    )

    if args.save_video_path:
        print(f'save_video_info:{video_FourCC},{video_fps},{video_size}')
        out_video = cv2.VideoWriter(
            os.path.join(args.save_video_path, 'detection_result.avi'),
            video_FourCC, video_fps, video_size)

    frame_num = 0
    time_sum = 0
    end = time.time()
    while True:
        return_value, frame = video.read()

        if not return_value:
            break

        img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        resized_img, origin_img, scale = load_image_for_detection_inference(
            img, args)
        resized_img = torch.tensor(resized_img)

        if args.use_gpu:
            resized_img = resized_img.cuda()

        out_tuples = model(resized_img.permute(2, 0, 1).float().unsqueeze(0))
        scores, classes, boxes = decoder(*out_tuples)
        scores, classes, boxes = scores.cpu(), classes.cpu(), boxes.cpu()
        boxes /= scale

        scores = scores.squeeze(0)
        classes = classes.squeeze(0)
        boxes = boxes.squeeze(0)

        scores = scores[classes > -1]
        boxes = boxes[classes > -1]
        classes = classes[classes > -1]

        boxes = boxes[scores > args.min_score_threshold]
        classes = classes[scores > args.min_score_threshold]
        scores = scores[scores > args.min_score_threshold]

        # clip boxes
        origin_h, origin_w = origin_img.shape[0], origin_img.shape[1]
        boxes[:, 0] = torch.clamp(boxes[:, 0], min=0)
        boxes[:, 1] = torch.clamp(boxes[:, 1], min=0)
        boxes[:, 2] = torch.clamp(boxes[:, 2], max=origin_w)
        boxes[:, 3] = torch.clamp(boxes[:, 3], max=origin_h)

        if args.trained_dataset_name == 'COCO':
            dataset_classes_name = COCO_CLASSES
            dataset_classes_color = COCO_CLASSES_COLOR
        else:
            dataset_classes_name = VOC_CLASSES
            dataset_classes_color = VOC_CLASSES_COLOR

        # draw all pred boxes
        for per_score, per_class_index, per_box in zip(scores, classes, boxes):
            per_score = per_score.numpy().astype(np.float32)
            per_class_index = per_class_index.numpy().astype(np.int32)
            per_box = per_box.numpy().astype(np.int32)

            class_name, class_color = dataset_classes_name[
                per_class_index], dataset_classes_color[per_class_index]

            left_top, right_bottom = (per_box[0], per_box[1]), (per_box[2],
                                                                per_box[3])
            cv2.rectangle(origin_img,
                          left_top,
                          right_bottom,
                          color=class_color,
                          thickness=2,
                          lineType=cv2.LINE_AA)

            text = f'{class_name}:{per_score:.3f}'
            text_size = cv2.getTextSize(text, 0, 0.5, thickness=1)[0]
            fill_right_bottom = (max(left_top[0] + text_size[0],
                                     right_bottom[0]),
                                 left_top[1] - text_size[1] - 3)
            cv2.rectangle(origin_img,
                          left_top,
                          fill_right_bottom,
                          color=class_color,
                          thickness=-1,
                          lineType=cv2.LINE_AA)
            cv2.putText(origin_img,
                        text, (left_top[0], left_top[1] - 2),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.5,
                        color=(0, 0, 0),
                        thickness=1,
                        lineType=cv2.LINE_AA)

        frame_num += 1
        time_sum += time.time() - end
        fps = 1. / (time_sum / frame_num)
        fps_text = f'FPS:{fps:.2f}'

        cv2.putText(origin_img,
                    fps_text, (30, 30),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.5,
                    color=(51, 204, 51),
                    thickness=1,
                    lineType=cv2.LINE_AA)

        if args.save_video_path:
            out_video.write(origin_img)

        if args.show_video:
            cv2.imshow('detection_result', origin_img)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                cv2.destroyAllWindows()
                break

    return

Beispiel #4

Datei anzeigen

def main():
    assert torch.cuda.is_available(), 'need gpu to train network!'
    torch.cuda.empty_cache()

    args = parse_args()
    sys.path.append(args.work_dir)
    from test_config import config
    log_dir = os.path.join(args.work_dir, 'log')

    set_seed(config.seed)

    collater = DetectionCollater()
    val_loader = DataLoader(config.val_dataset,
                            batch_size=config.batch_size,
                            shuffle=False,
                            pin_memory=True,
                            num_workers=config.num_workers,
                            collate_fn=collater.next)

    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    logger = get_logger('test', log_dir)

    for key, value in config.__dict__.items():
        if not key.startswith('__'):
            if key not in [
                    'model', 'criterion', 'decoder', 'train_dataset',
                    'val_dataset'
            ]:
                log_info = f'{key}: {value}'
                logger.info(log_info)

    gpus_type, gpus_num = torch.cuda.get_device_name(
    ), torch.cuda.device_count()
    log_info = f'gpus_type: {gpus_type}, gpus_num: {gpus_num}'
    logger.info(log_info)

    model = config.model
    decoder = config.decoder

    if config.trained_model_path:
        saved_model = torch.load(os.path.join(BASE_DIR,
                                              config.trained_model_path),
                                 map_location=torch.device('cpu'))
        model.load_state_dict(saved_model)

    flops, params = compute_flops_and_params(config, model)
    log_info = f'model: {config.network}, flops: {flops}, params: {params}'
    logger.info(log_info)

    model = model.cuda()
    decoder = decoder.cuda()
    model = nn.DataParallel(model)

    result_dict = validate_detection(config.val_dataset, val_loader, model,
                                     decoder, config)
    log_info = f'eval_result: '
    if result_dict:
        for key, value in result_dict.items():
            log_info += f'{key}: {value} ,'
    else:
        log_info += f', no target detected in testset images!'
    logger.info(log_info)

    return

Beispiel #5

Datei anzeigen

def inference():
    args = parse_args()
    print(f'args: {args}')

    assert args.model in backbones.__dict__.keys(), 'Unsupported model!'

    if args.use_gpu:
        # only use one Graphics card to inference
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'
        assert torch.cuda.is_available(), 'need gpu to train network!'
        torch.cuda.empty_cache()

    if args.seed:
        seed = args.seed
        os.environ['PYTHONHASHSEED'] = str(seed)
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        if args.use_gpu:
            torch.cuda.manual_seed(seed)
            torch.cuda.manual_seed_all(seed)
            # for cudnn
            cudnn.enabled = True
            cudnn.deterministic = True
            cudnn.benchmark = False

    model = backbones.__dict__[args.model](
        **{
            'num_classes': args.trained_num_classes,
        })

    if args.use_gpu:
        model = model.cuda()

    if args.trained_model_path:
        saved_model = torch.load(args.trained_model_path,
                                 map_location=torch.device('cpu'))
        model.load_state_dict(saved_model)

    model.eval()

    flops, params = compute_flops_and_params(args, model)
    print(f'model: {args.model}, flops: {flops}, params: {params}')

    origin_img = Image.open(args.test_image_path)
    img = origin_img
    transform = transforms.Compose([
        transforms.Resize(int(args.input_image_size * (256 / 224))),
        transforms.CenterCrop(args.input_image_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])
    img = transform(img)
    img = torch.tensor(img)

    if args.use_gpu:
        img = img.cuda()

    origin_img = cv2.cvtColor(np.asarray(origin_img), cv2.COLOR_RGB2BGR)

    output = model(img.unsqueeze(0))
    output = F.softmax(output, dim=1)
    pred_score, pred_class = output.max(dim=1)
    pred_score, pred_class = pred_score.item(), pred_class.item()
    color = [random.randint(0, 255) for _ in range(3)]
    print(f'score: {pred_score:.3f}, class: {pred_class}, color: {color}')

    text = f'{pred_class}:{pred_score:.3f}'
    cv2.putText(origin_img,
                text, (30, 30),
                cv2.FONT_HERSHEY_PLAIN,
                1.5,
                color=color,
                thickness=1)

    if args.save_image_path:
        cv2.imwrite(
            os.path.join(args.save_image_path, 'classification_result.jpg'),
            origin_img)

    if args.show_image:
        cv2.namedWindow("classification_result", cv2.WINDOW_AUTOSIZE)
        cv2.imshow('classification_result', origin_img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()

    return

Beispiel #6

Datei anzeigen

Datei: test_classification_model.py Projekt: zgcr/simpleAICV-pytorch-ImageNet-COCO-training

def main():
    assert torch.cuda.is_available(), 'need gpu to train network!'
    torch.cuda.empty_cache()

    args = parse_args()
    sys.path.append(args.work_dir)
    from test_config import config
    log_dir = os.path.join(args.work_dir, 'log')

    set_seed(config.seed)

    local_rank = args.local_rank
    # start init process
    if config.distributed:
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        torch.cuda.set_device(local_rank)

    val_sampler = torch.utils.data.distributed.DistributedSampler(
        config.val_dataset, shuffle=False) if config.distributed else None
    val_loader = DataLoader(config.val_dataset,
                            batch_size=config.batch_size,
                            shuffle=False,
                            pin_memory=False,
                            num_workers=config.num_workers,
                            sampler=val_sampler)

    if (config.distributed and local_rank == 0) or not config.distributed:
        if not os.path.exists(log_dir):
            os.makedirs(log_dir)

    global logger
    logger = get_logger('test', log_dir)

    for key, value in config.__dict__.items():
        if not key.startswith('__'):
            if key not in ['model', 'criterion']:
                log_info = f'{key}: {value}'
                logger.info(log_info) if (
                    config.distributed
                    and local_rank == 0) or not config.distributed else None

    gpus_type, gpus_num = torch.cuda.get_device_name(
    ), torch.cuda.device_count()
    log_info = f'gpus_type: {gpus_type}, gpus_num: {gpus_num}'
    logger.info(log_info) if (config.distributed and local_rank
                              == 0) or not config.distributed else None

    model = config.model
    criterion = config.criterion

    if config.trained_model_path:
        saved_model = torch.load(os.path.join(BASE_DIR,
                                              config.trained_model_path),
                                 map_location=torch.device('cpu'))
        model.load_state_dict(saved_model)

    flops, params = compute_flops_and_params(config, model)
    log_info = f'model: {config.network}, flops: {flops}, params: {params}'
    logger.info(log_info) if (config.distributed and local_rank
                              == 0) or not config.distributed else None

    model = model.cuda()
    criterion = criterion.cuda()
    if config.distributed:
        model = nn.parallel.DistributedDataParallel(model,
                                                    device_ids=[local_rank],
                                                    output_device=local_rank)
    else:
        model = nn.DataParallel(model)

    top1, top5, loss, per_image_load_time, per_image_inference_time = validate_classification(
        val_loader, model, criterion, config)
    log_info = f'top1: {top1:.3f}%, top5: {top5:.3f}%, loss: {loss:.4f}, per_image_load_time: {per_image_load_time:.3f}ms, per_image_inference_time: {per_image_inference_time:.3f}ms'
    logger.info(log_info) if (config.distributed and local_rank
                              == 0) or not config.distributed else None

    return