Ejemplo n.º 1
0
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter(
        'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters,
                                                 warmup_factor)

    for images, targets in metric_logger.log_every(data_loader, print_freq,
                                                   header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])
        sys.stdout.flush()

    return metric_logger
Ejemplo n.º 2
0
def evaluate(model,
             criterion,
             data_loader,
             device,
             print_freq=100,
             log_suffix=""):
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = f"Test: {log_suffix}"

    num_processed_samples = 0
    with torch.inference_mode():
        for image, target in metric_logger.log_every(data_loader, print_freq,
                                                     header):
            image = image.to(device, non_blocking=True)
            target = target.to(device, non_blocking=True)
            output = model(image)
            loss = criterion(output, target)

            acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))
            # FIXME need to take into account that the datasets
            # could have been padded in distributed setup
            batch_size = image.shape[0]
            metric_logger.update(loss=loss.item())
            metric_logger.meters["acc1"].update(acc1.item(), n=batch_size)
            metric_logger.meters["acc5"].update(acc5.item(), n=batch_size)
            num_processed_samples += batch_size
    # gather the stats from all processes

    num_processed_samples = utils.reduce_across_processes(
        num_processed_samples)
    if (hasattr(data_loader.dataset, "__len__")
            and len(data_loader.dataset) != num_processed_samples
            and torch.distributed.get_rank() == 0):
        # See FIXME above
        warnings.warn(
            f"It looks like the dataset has {len(data_loader.dataset)} samples, but {num_processed_samples} "
            "samples were used for the validation, which might bias the results. "
            "Try adjusting the batch size and / or the world size. "
            "Setting the world size to 1 is always a safe bet.")

    metric_logger.synchronize_between_processes()

    print(
        f"{header} Acc@1 {metric_logger.acc1.global_avg:.3f} Acc@5 {metric_logger.acc5.global_avg:.3f}"
    )
    return metric_logger.acc1.global_avg
Ejemplo n.º 3
0
def evaluate(model, data_loader, device):
    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'

    coco = get_coco_api_from_dataset(data_loader.dataset)
    print("[*] Successfully get coco api from dataset")

    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)

    for image, targets in metric_logger.log_every(data_loader, 100, header):
        image = list(img.to(device) for img in image)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        torch.cuda.synchronize()
        model_time = time.time()
        outputs = model(image)

        outputs = [{k: v.to(cpu_device)
                    for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        res = {
            target["image_id"].item(): output
            for target, output in zip(targets, outputs)
        }
        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time,
                             evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    torch.set_num_threads(n_threads)
    return coco_evaluator
Ejemplo n.º 4
0
def extract_features(model, data_loader):
    metric_logger = utils.MetricLogger(delimiter="  ")
    features = None
    for samples, index in metric_logger.log_every(data_loader, 10):
        samples = samples.cuda(non_blocking=True)
        index = index.cuda(non_blocking=True)
        feats = model(samples.float()).clone()

        # init storage feature matrix
        if dist.get_rank() == 0 and features is None:
            features = torch.zeros(len(data_loader.dataset), feats.shape[-1])
            if args.use_cuda:
                features = features.cuda(non_blocking=True)
            print(f"Storing features into tensor of shape {features.shape}")

        # get indexes from all processes
        y_all = torch.empty(dist.get_world_size(),
                            index.size(0),
                            dtype=index.dtype,
                            device=index.device)
        y_l = list(y_all.unbind(0))
        y_all_reduce = torch.distributed.all_gather(y_l, index, async_op=True)
        y_all_reduce.wait()
        index_all = torch.cat(y_l)

        # share features between processes
        feats_all = torch.empty(
            dist.get_world_size(),
            feats.size(0),
            feats.size(1),
            dtype=feats.dtype,
            device=feats.device,
        )
        output_l = list(feats_all.unbind(0))
        output_all_reduce = torch.distributed.all_gather(output_l,
                                                         feats,
                                                         async_op=True)
        output_all_reduce.wait()

        # update storage feature matrix
        if dist.get_rank() == 0:
            if args.use_cuda:
                features.index_copy_(0, index_all, torch.cat(output_l))
            else:
                features.index_copy_(0, index_all.cpu(),
                                     torch.cat(output_l).cpu())
    return features
def train_one_epoch(epoch,
                    model,
                    data_loader,
                    optimizer,
                    device,
                    lr_update=None,
                    lr_scheduler=None,
                    print_freq=100):
    model.train()
    # logger
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter(
        'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)
    print('----' * 20)
    print("[Epoch {}] The number of batch: {}".format(epoch, len(data_loader)))
    for idx, (images, targets) in enumerate(
            metric_logger.log_every(data_loader, print_freq, header)):
        # get data
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        # forward
        loss_dict = model(images, targets)
        losses = loss_dict['loss_classifier'] + loss_dict['loss_box_reg'] \
                 + loss_dict['loss_mask'] + loss_dict['loss_objectness'] \
                 + loss_dict['loss_rpn_box_reg']
        # backporp
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        # sum loss
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        loss_value = losses_reduced.item()
        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)
        # logging
        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])
        # learning rate scheduler
        curr_itr = idx + epoch * len(data_loader) + 1
        if lr_scheduler is not None and lr_update is not None:
            if curr_itr % lr_update == 0:
                print("+++ LR Update !")
                lr_scheduler.step()
Ejemplo n.º 6
0
def train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler,
                    device, epoch, print_freq):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr',
                            utils.SmoothedValue(window_size=1, fmt='{value}'))
    header = 'Epoch: [{}]'.format(epoch)

    for data, target in metric_logger.log_every(data_loader, print_freq,
                                                header):

        global n_iter
        n_iter = n_iter + 1

        optimizer.zero_grad()

        target = target.to(device)

        output = model(data)
        loss = criterion(output, target)
        loss = loss.mean()

        #visualization
        segmap = torch.argmax(output['out'], dim=1)

        loss.backward()
        optimizer.step()

        lr_scheduler.step()

        metric_logger.update(loss=loss.item(),
                             lr=optimizer.param_groups[0]["lr"])

        if n_iter % args.print_freq == 0:
            if args.tensorboard and utils.is_main_process():
                args.writer.add_scalar('SupLoss', loss.item(), n_iter)
                if n_iter % (args.print_freq * 100) == 0:
                    grid = torchvision.utils.make_grid(data[:1])
                    grid = (grid - grid.min()) / (grid.max() - grid.min() +
                                                  1e-5)
                    args.writer.add_image('sup images', grid, n_iter)

                    segmap = args.colormap[segmap[0].detach().cpu().numpy()]
                    segmap = segmap / 255.
                    args.writer.add_image('sup segmaps',
                                          segmap.transpose((2, 0, 1)), n_iter)
Ejemplo n.º 7
0
def evaluate(model, data_loader, device, num_classes):
    model.eval()
    confmat = utils.ConfusionMatrix(num_classes)
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'
    with torch.no_grad():
        for image, target in metric_logger.log_every(data_loader, 100, header):
            image, target = image.to(device), target.to(device)
            output = model(image)
            output = {"out": output[0], "aux": output[1]}
            output = output['out']

            confmat.update(target.flatten(), output.argmax(1).flatten())

        confmat.reduce_from_all_processes()

    return confmat
Ejemplo n.º 8
0
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter(
        'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)
    print_freq = 10

    lr_scheduler = None

    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(train_loader) - 1)

        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters,
                                                 warmup_factor)
    i = 1
    for img, box in train_loader:
        imgs = list(image.to(device) for image in img)
        boxs = [{k: v.to(device) for k, v in t.items()} for t in box]
        i += 1
        loss_dict = model(imgs, boxs)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        print(loss_value)
        print('-' * 50)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])
        # print(loss)
        if i > 10:
            return
Ejemplo n.º 9
0
def train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, device, epoch, print_freq):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}'))
    header = 'Epoch: [{}]'.format(epoch)
    for image, target in metric_logger.log_every(data_loader, print_freq, header):
        image, target = image.to(device), target.to(device)
        output = model(image)
        loss = criterion(output, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        lr_scheduler.step()

        metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"])
Ejemplo n.º 10
0
def evaluate(model, data_loader, device):
    iou_types = ["bbox"]
    coco = get_coco_api_from_dataset(data_loader.dataset)
    n_threads = torch.get_num_threads()
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = "Test:"
    model.to(device)
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)
    to_tensor = torchvision.transforms.ToTensor()
    for image, targets in metric_logger.log_every(data_loader, 100, header):

        image = list(to_tensor(img).to(device) for img in image)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        torch.cuda.synchronize()
        model_time = time.time()

        outputs = model(image)

        outputs = [{k: v.to(cpu_device)
                    for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        res = {
            target["image_id"].item(): output
            for target, output in zip(targets, outputs)
        }
        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time,
                             evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    torch.set_num_threads(n_threads)
    return coco_evaluator
Ejemplo n.º 11
0
def train(model, linear_classifier, optimizer, loader, epoch, n, avgpool):
    linear_classifier.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter(
        'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)
    for (inp, target) in metric_logger.log_every(loader, 20, header):
        # move to gpu
        inp = inp.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)

        # forward
        with torch.no_grad():
            if "vit" in args.arch:
                intermediate_output = model.get_intermediate_layers(inp, n)
                output = torch.cat([x[:, 0] for x in intermediate_output],
                                   dim=-1)
                if avgpool:
                    output = torch.cat(
                        (output.unsqueeze(-1),
                         torch.mean(intermediate_output[-1][:, 1:],
                                    dim=1).unsqueeze(-1)),
                        dim=-1)
                    output = output.reshape(output.shape[0], -1)
            else:
                output = model(inp)
        output = linear_classifier(output)

        # compute cross entropy loss
        loss = nn.CrossEntropyLoss()(output, target)

        # compute the gradients
        optimizer.zero_grad()
        loss.backward()

        # step
        optimizer.step()

        # log
        torch.cuda.synchronize()
        metric_logger.update(loss=loss.item())
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])
    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
Ejemplo n.º 12
0
def evaluate(model, criterion, data_loader, device, print_freq=100):
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ", device=device)
    header = 'Test:'
    step_count = 0
    with torch.no_grad():
        for image, target in metric_logger.log_every(data_loader, print_freq,
                                                     header):
            image = image.to(device, non_blocking=True)

            if args.channels_last:
                image = image.contiguous(memory_format=torch.channels_last)
                if args.run_lazy_mode:
                    # This mark_step is added so that the the lazy kernel can
                    # create and evaluate the graph to infer the resulting tensor
                    # as channels_last
                    import habana_frameworks.torch.core as htcore
                    htcore.mark_step()

            target = target.to(device, non_blocking=True)
            output = model(image)
            loss = criterion(output, target)

            acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))
            # FIXME need to take into account that the datasets
            # could have been padded in distributed setup
            batch_size = image.shape[0]
            loss_cpu = loss.to('cpu').detach()
            metric_logger.update(loss=loss_cpu.item())
            metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
            metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)
            step_count = step_count + 1
            if step_count >= args.num_eval_steps:
                break
    # gather the stats from all processes
    metric_logger.synchronize_between_processes()

    # Return from here if evaluation phase does not go through any iterations.(eg, The data set is so small that
    # there is only one eval batch, but that was skipped in data loader due to drop_last=True)
    if len(metric_logger.meters) == 0:
        return

    print(' * Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f}'.format(
        top1=metric_logger.acc1, top5=metric_logger.acc5))
    return metric_logger.acc1.global_avg
Ejemplo n.º 13
0
def train_one_epoch(model,
                    criterion,
                    optimizer,
                    data_loader,
                    device,
                    epoch,
                    print_freq,
                    apex=False):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ", device=device)
    metric_logger.add_meter('lr',
                            utils.SmoothedValue(window_size=1, fmt='{value}'))
    metric_logger.add_meter('img/s',
                            utils.SmoothedValue(window_size=10, fmt='{value}'))

    header = 'Epoch: [{}]'.format(epoch)
    step_count = 0
    for image, target in metric_logger.log_every(data_loader, print_freq,
                                                 header):

        if args.distributed:
            utils.barrier()

        start_time = time.time()

        image, target = image.to(device, non_blocking=True), target.to(
            device, non_blocking=True)

        if args.channels_last:
            image = image.contiguous(memory_format=torch.channels_last)

        loss_cpu, output_cpu = train_model(model, criterion, optimizer, image,
                                           target, apex)

        acc1, acc5 = utils.accuracy(output_cpu, target, topk=(1, 5))
        batch_size = image.shape[0]
        #Bring the loss tensor back to CPU before printing. Certainly needed if running on Habana.
        metric_logger.update(loss=loss_cpu, lr=optimizer.param_groups[0]["lr"])
        metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
        metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)
        metric_logger.meters['img/s'].update(batch_size /
                                             (time.time() - start_time))
        step_count = step_count + 1
        if step_count >= args.num_train_steps:
            break
Ejemplo n.º 14
0
def evaluate(model, criterion, data_loader, device):
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = "Test:"
    num_processed_samples = 0
    with torch.inference_mode():
        for video, target in metric_logger.log_every(data_loader, 100, header):
            video = video.to(device, non_blocking=True)
            target = target.to(device, non_blocking=True)
            output = model(video)
            loss = criterion(output, target)

            acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))
            # FIXME need to take into account that the datasets
            # could have been padded in distributed setup
            batch_size = video.shape[0]
            metric_logger.update(loss=loss.item())
            metric_logger.meters["acc1"].update(acc1.item(), n=batch_size)
            metric_logger.meters["acc5"].update(acc5.item(), n=batch_size)
            num_processed_samples += batch_size
    # gather the stats from all processes
    num_processed_samples = utils.reduce_across_processes(
        num_processed_samples)
    if isinstance(data_loader.sampler, DistributedSampler):
        # Get the len of UniformClipSampler inside DistributedSampler
        num_data_from_sampler = len(data_loader.sampler.dataset)
    else:
        num_data_from_sampler = len(data_loader.sampler)

    if (hasattr(data_loader.dataset, "__len__")
            and num_data_from_sampler != num_processed_samples
            and torch.distributed.get_rank() == 0):
        # See FIXME above
        warnings.warn(
            f"It looks like the sampler has {num_data_from_sampler} samples, but {num_processed_samples} "
            "samples were used for the validation, which might bias the results. "
            "Try adjusting the batch size and / or the world size. "
            "Setting the world size to 1 is always a safe bet.")

    metric_logger.synchronize_between_processes()

    print(
        " * Clip Acc@1 {top1.global_avg:.3f} Clip Acc@5 {top5.global_avg:.3f}".
        format(top1=metric_logger.acc1, top5=metric_logger.acc5))
    return metric_logger.acc1.global_avg
Ejemplo n.º 15
0
def train_one_epoch(model,
                    criterion,
                    optimizer,
                    lr_scheduler,
                    data_loader,
                    device,
                    epoch,
                    print_freq,
                    scaler=None):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter("lr",
                            utils.SmoothedValue(window_size=1, fmt="{value}"))
    metric_logger.add_meter(
        "clips/s", utils.SmoothedValue(window_size=10, fmt="{value:.3f}"))

    header = f"Epoch: [{epoch}]"
    for video, target in metric_logger.log_every(data_loader, print_freq,
                                                 header):
        start_time = time.time()
        video, target = video.to(device), target.to(device)
        with torch.cuda.amp.autocast(enabled=scaler is not None):
            output = model(video)
            loss = criterion(output, target)

        optimizer.zero_grad()

        if scaler is not None:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()

        acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))
        batch_size = video.shape[0]
        metric_logger.update(loss=loss.item(),
                             lr=optimizer.param_groups[0]["lr"])
        metric_logger.meters["acc1"].update(acc1.item(), n=batch_size)
        metric_logger.meters["acc5"].update(acc5.item(), n=batch_size)
        metric_logger.meters["clips/s"].update(batch_size /
                                               (time.time() - start_time))
        lr_scheduler.step()
Ejemplo n.º 16
0
def train_one_epoch(model,
                    criterion,
                    optimizer,
                    data_loader,
                    device,
                    epoch,
                    print_freq,
                    apex=False,
                    model_ema=None):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr',
                            utils.SmoothedValue(window_size=1, fmt='{value}'))
    metric_logger.add_meter('img/s',
                            utils.SmoothedValue(window_size=10, fmt='{value}'))

    header = 'Epoch: [{}]'.format(epoch)
    for image, target in metric_logger.log_every(data_loader, print_freq,
                                                 header):
        start_time = time.time()
        image, target = image.to(device), target.to(device)
        output = model(image)
        loss = criterion(output, target)

        optimizer.zero_grad()
        if apex:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()
        optimizer.step()

        acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))
        batch_size = image.shape[0]
        metric_logger.update(loss=loss.item(),
                             lr=optimizer.param_groups[0]["lr"])
        metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
        metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)
        metric_logger.meters['img/s'].update(batch_size /
                                             (time.time() - start_time))

    if model_ema:
        model_ema.update_parameters(model)
Ejemplo n.º 17
0
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
    model.train()
    mse_loss = MSELoss()
    l1_loss = L1Loss()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    for images, distorted_bounding_box, ground_truth_box in metric_logger.log_every(data_loader, print_freq, header):
        images = images.to(device)
        distorted_bounding_box = distorted_bounding_box.to(device)
        ground_truth_box = ground_truth_box.to(device)

        prediction = model(images, distorted_bounding_box)
        directly_predicted_box, guided_predicted_box = prediction
        # We want that both outputs produce the same result, the right bounding box
        difference = l1_loss(directly_predicted_box, guided_predicted_box)
        # We want that the first and second head both produce accurate results wrt. the ground truth
        loss_1 = mse_loss(directly_predicted_box, ground_truth_box)
        loss_2 = mse_loss(guided_predicted_box, ground_truth_box)
        # We optimize for all these criteria
        loss = loss_1 + loss_2 + difference

        if not math.isfinite(loss):
            print("Loss is {}, stopping training".format(loss))
            sys.exit(1)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=loss)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])
Ejemplo n.º 18
0
def train_teacher_model(model,
                        labeled_dataset,
                        optimizer,
                        scheduler=None,
                        train_ratio=0.7,
                        batch_size=4,
                        device='cpu',
                        max_epochs=100,
                        print_freq=10,
                        save_path=None,
                        checkpoint=None):
    model.to(device)
    metric_logger = utils.MetricLogger(delimiter=" ")
    last_loss = 1e9

    cur_epoch = 0
    if checkpoint is not None:
        print("loading checkpoint:" + checkpoint)
        model, optimizer, scheduler, cur_epoch = load_checkpoint(
            model, optimizer, scheduler, device, checkpoint)

    train_dataset, vld_dataset = split_dataset(labeled_dataset, train_ratio)
    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              collate_fn=collate_fn)
    vld_loader = DataLoader(vld_dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            collate_fn=collate_fn)
    for epoch in range(cur_epoch, max_epochs):
        print("epoch {} / {}".format(epoch + 1, max_epochs))
        train_one_epoch(model, optimizer, train_loader, device, epoch,
                        print_freq)
        loss = evaluate(model, vld_loader, device, epoch, print_freq)

        if loss < last_loss and save_path != None:
            save_checkpoint(model, optimizer, scheduler, epoch + 1, device,
                            save_path)
            last_loss = loss
        if scheduler is not None:
            scheduler.step()
Ejemplo n.º 19
0
def evaluate_mobilenet_ssd(model, data_loader, device):

    model.eval()
    predictor = create_mobilenetv1_ssd_predictor(model,
                                                 nms_method='hard',
                                                 device=device)

    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'

    jdict = []
    imgIds = []
    for images, gt_boxes, gt_labels, image_ids in metric_logger.log_every(
            data_loader, 50, header):

        imgIds.extend(image_ids)

        torch.cuda.synchronize()
        model_time = time.time()
        for image, img_id in zip(images, image_ids):
            boxes, labels, probs = predictor.predict(image)
            for box, label, prob in zip(boxes, labels, probs):
                box = xyxy2xywh_ssd(box)  # xywh
                jdict.append({
                    "image_id": img_id,
                    "category_id": int(label),
                    "bbox": box.cpu().numpy().tolist(),
                    "score": float(prob)
                })

        # Evaluate one batch
        model_time = time.time() - model_time
        evaluator_time = 0
        metric_logger.update(model_time=model_time,
                             evaluator_time=evaluator_time)

    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    # coco evel
    # annotation_path = 'VOC2012.json'
    cocoEval = coco_eval_json('VOC2012.json', jdict, imgIds)
    return cocoEval
Ejemplo n.º 20
0
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter(
        'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    for images, targets in metric_logger.log_every(data_loader, print_freq,
                                                   header):
        # for images, targets in data_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())
        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        # loss in original paper
        # losses = loss_dict['loss_classifier'] + loss_dict['loss_box_reg']
        # losses_reduced = loss_dict_reduced['loss_classifier'] + loss_dict_reduced['loss_box_reg']

        loss_value = losses_reduced.item()

        optimizer.zero_grad()
        losses.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 2)
        optimizer.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])

        if device == 'cuda':
            torch.cuda.empty_cache()
            del images
            del targets
            del losses_reduced
            del losses
            del loss_dict
            del loss_dict_reduced
Ejemplo n.º 21
0
def evaluate(model, loss_fn, data_loader):
    """Test dlrm model

    Args:
        model (dlrm):
        loss_fn (torch.nn.Module): Loss function
        data_loader (torch.utils.data.DataLoader):
    """
    # Test bach size could be big, make sure it prints
    default_print_freq = max(524288 * 100 // FLAGS.test_batch_size, 1)
    print_freq = default_print_freq if FLAGS.print_freq is None else FLAGS.print_freq

    steps_per_epoch = len(data_loader)
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter(
        'loss', utils.SmoothedValue(window_size=print_freq, fmt='{avg:.4f}'))
    metric_logger.add_meter(
        'step_time', utils.SmoothedValue(window_size=1, fmt='{avg:.4f}'))
    with torch.no_grad():
        # ROC can be computed per batch and then compute AUC globally, but I don't have the code.
        # So pack all the outputs and labels together to compute AUC. y_true and y_score naming follows sklearn
        y_true = []
        y_score = []
        stop_time = time()
        for step, (numerical_features, categorical_features,
                   click) in enumerate(data_loader):
            output = model(numerical_features, categorical_features).squeeze()
            loss = loss_fn(output, click)
            y_true.append(click)
            y_score.append(output)

            metric_logger.update(loss=loss.item())
            if step % print_freq == 0:
                metric_logger.update(step_time=(time() - stop_time) /
                                     print_freq)
                stop_time = time()
                metric_logger.print(header=F"Test: [{step}/{steps_per_epoch}]")

        auc = metrics.roc_auc_score(torch.cat(y_true),
                                    torch.sigmoid(torch.cat(y_score)))

    return metric_logger.loss.global_avg, auc
Ejemplo n.º 22
0
def evaluate(model, data_loader, device, epoch,
             print_freq):  # test overfitting
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Validation'.format(epoch)
    sum_loss = []

    with torch.no_grad():
        for images, targets in metric_logger.log_every(data_loader, print_freq,
                                                       header):
            # for images, targets in data_loader:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device)
                        for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

            # reduce losses over all GPUs for logging purposes
            loss_dict_reduced = utils.reduce_dict(loss_dict)
            losses_reduced = sum(loss for loss in loss_dict_reduced.values())

            # loss in origin paper
            # losses_reduced = loss_dict_reduced['loss_classifier'] + loss_dict_reduced['loss_box_reg']
            # losses = loss_dict['loss_classifier'] + loss_dict['loss_box_reg']
            if math.isfinite(losses.item()):
                sum_loss.append(losses.item())

            loss_value = losses_reduced.item()

            metric_logger.update(loss=losses_reduced, **loss_dict_reduced)

            if device == 'cuda':
                torch.cuda.empty_cache()
                del images
                del targets
                del losses_reduced
                del losses
                del loss_dict
                del loss_dict_reduced
            # break
    sum_loss = np.sum(sum_loss)
    return sum_loss
Ejemplo n.º 23
0
def evaluate_bin_yolo_2014(model, data_loader, device, bin_folder):

    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'

    jdict = []
    jdict_not_resized = []  # To compare with HW
    imgIds = []
    for imgs, _, paths, shapes in metric_logger.log_every(
            data_loader, 10, header):
        image_ids = [
            int(Path(image_path).stem.split('_')[-1]) for image_path in paths
        ]
        imgIds.extend(image_ids)
        # convert bin files 2 tensor
        imgs_tensor = torch.tensor([], dtype=torch.uint8)
        for i, img_id in enumerate(image_ids):
            path = os.path.join(bin_folder, str(img_id) + '.bin')
            f = open(path, 'rb')
            img_from_file = np.fromfile(f, np.uint8)
            img_from_file = np.reshape(img_from_file, imgs[i].shape)
            img_T = torch.tensor(img_from_file).unsqueeze(0)
            imgs_tensor = torch.cat((imgs_tensor, img_T), 0)

        # Evaluate one batch
        model_time, evaluator_time = eval_yolo_2014_batch(
            jdict, jdict_not_resized, model, imgs_tensor, image_ids, shapes,
            device)
        metric_logger.update(model_time=model_time,
                             evaluator_time=evaluator_time)

    # imgIds = [int(Path(x).stem.split('_')[-1]) for x in data_loader.dataset.img_files]
    # gather the stats from all processes
    # with open('jdict_not_resized.json', 'w') as f:
    #     json.dump(jdict_not_resized, f)

    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    # coco evel
    cocoEval = coco_eval_json(annotation_path, jdict, imgIds)
    return cocoEval
Ejemplo n.º 24
0
def preprocess_and_save_bin_yolo_2014(model, data_loader, device):

    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'
    shape_dict = {}
    for imgs, targets, paths, shapes in metric_logger.log_every(
            data_loader, 10, header):

        image_id = int(Path(paths[0]).stem.split('_')[-1])

        filePath = os.path.join(args.output_dir, str(image_id) + '.bin')
        transformed_np_img = imgs[0].cpu().numpy()
        transformed_np_img.tofile(filePath)
        shape_dict[str(image_id)] = [transformed_np_img.shape[1:], shapes]

    # gather the stats from all processes
    jsonPath = os.path.join(args.output_dir, 'images_shape.json')
    with open(jsonPath, 'w') as fp:
        json.dump(shape_dict, fp)
Ejemplo n.º 25
0
def train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler,
                    device, epoch, writer, print_freq):
    model.train()

    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr',
                            utils.SmoothedValue(window_size=1, fmt='{value}'))
    header = 'Epoch: [{}]'.format(epoch)

    for image, target in metric_logger.log_every(data_loader, print_freq,
                                                 header):
        image, target = image.to(device), target.to(device)

        output = model(image)

        loss, iou = criterion(output, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        lr_scheduler.step()

        metric_logger.update(loss=loss.item(),
                             lr=optimizer.param_groups[0]["lr"],
                             iou=iou)

        if random.random() < 0.15:
            writer.add_image(
                'input/train',
                torchvision.utils.make_grid([
                    torchvision.utils.make_grid(image),
                    torchvision.utils.make_grid(target),
                    torchvision.utils.make_grid(output['out'].data,
                                                normalize=True)
                ],
                                            nrow=1), epoch)

        writer.add_scalar('loss/train', loss.item(), epoch)
        writer.add_scalar('lr/train', optimizer.param_groups[0]["lr"], epoch)
        writer.add_scalar('iou/train', iou, epoch)
Ejemplo n.º 26
0
def evaluate_unlabeled(model, data_loader, device):
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'
    pred_scores = {}
    for image, targets in metric_logger.log_every(data_loader, 100, header):
        image = list(img.to(device) for img in image)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        torch.cuda.synchronize()
        model_time = time.time()
        outputs = model(image)
        for i in range(len(targets)):
            img_id = targets[i]['image_id'].item()
            pred_scores[img_id] = outputs[i]['vanilla_scores'].cpu().numpy()
        outputs = [{k: v.to(cpu_device)
                    for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time
        metric_logger.update(model_time=model_time)
        i += 1
    return pred_scores
Ejemplo n.º 27
0
def evaluate(model,
             criterion,
             data_loader,
             device,
             print_freq=100,
             dgPruner=None,
             output_dir=''):
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'
    with torch.no_grad():
        if (dgPruner):
            dgPruner.dump_growth_stat(output_dir, 1000)
            dgPruner.dump_sparsity_stat(model, output_dir, 1000)

        for image, target in metric_logger.log_every(data_loader, print_freq,
                                                     header):
            image = image.to(device)
            target = target.to(device)
            output = model(image)
            loss = criterion(output, target)

            acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))
            # FIXME need to take into account that the datasets
            # could have been padded in distributed setup
            batch_size = image.shape[0]
            metric_logger.update(loss=loss.item())
            metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
            metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)
    # gather the stats from all processes
    metric_logger.synchronize_between_processes()

    print(' * Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f}'.format(
        top1=metric_logger.acc1, top5=metric_logger.acc5))

    metrics = OrderedDict([('loss', metric_logger.loss),
                           ('top1', metric_logger.acc1),
                           ('top5', metric_logger.acc5)])

    return metrics
Ejemplo n.º 28
0
def validate_network(val_loader, model, linear_classifier, n, avgpool):
    linear_classifier.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'
    for inp, target in metric_logger.log_every(val_loader, 20, header):
        # move to gpu
        inp = inp.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)

        # forward
        with torch.no_grad():
            intermediate_output = model.get_intermediate_layers(inp, n)
            output = [x[:, 0] for x in intermediate_output]
            if avgpool:
                output.append(torch.mean(intermediate_output[-1][:, 1:],
                                         dim=1))
            output = torch.cat(output, dim=-1)
        output = linear_classifier(output)
        loss = nn.CrossEntropyLoss()(output, target)

        if linear_classifier.module.num_labels >= 5:
            acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))
        else:
            acc1, = utils.accuracy(output, target, topk=(1, ))

        batch_size = inp.shape[0]
        metric_logger.update(loss=loss.item())
        metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
        if linear_classifier.module.num_labels >= 5:
            metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)
    if linear_classifier.module.num_labels >= 5:
        print(
            '* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}'
            .format(top1=metric_logger.acc1,
                    top5=metric_logger.acc5,
                    losses=metric_logger.loss))
    else:
        print('* Acc@1 {top1.global_avg:.3f} loss {losses.global_avg:.3f}'.
              format(top1=metric_logger.acc1, losses=metric_logger.loss))
    return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
Ejemplo n.º 29
0
def evaluate(model, data_loader, device, num_classes):
    model.eval()
    confmat = utils.ConfusionMatrix(num_classes)
    metric_logger = utils.MetricLogger(delimiter="  ")
    class_iou_image = list()
    img_list = list()
    target_list = list()
    prediction_list = list()

    header = "Evaluate:"
    with torch.inference_mode():
        for image, target in metric_logger.log_every(data_loader, 100, header):
            image, target = image.to(device), target.to(device)

            confmat_image = utils.ConfusionMatrix(num_classes)

            output = model(image)
            output = output["out"]

            inv_normalize = transforms.Normalize(mean=(-0.485, -0.456, -0.406),
                                                 std=(1 / 0.229, 1 / 0.224,
                                                      1 / 0.225))
            img_npy = inv_normalize(image[0], target)[0].cpu().detach().numpy()
            target_npy = target.cpu().detach().numpy()
            prediction_npy = output.cpu().detach().numpy()

            img_list.append(img_npy)
            target_list.append(target_npy)
            prediction_list.append(prediction_npy)

            confmat.update(target.flatten(), output.argmax(1).flatten())
            confmat_image.update(target.flatten(), output.argmax(1).flatten())

            class_iou_image.append(confmat_image.get_class_iou())
            confmat_image.reduce_from_all_processes()

        confmat.reduce_from_all_processes()

    return confmat, class_iou_image, img_list, target_list, prediction_list
Ejemplo n.º 30
0
def train_one_epoch(model, criterion, data_loader, optimizer, epoch,
                    max_epoch):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Epoch: [{}]/[{}]'.format(epoch, max_epoch - 1)
    print_freq = 20

    for batch in metric_logger.log_every(data_loader, print_freq, header):

        images = batch['img'].cuda()
        labels = batch['label'].cuda()
        logits = model(images)
        loss = criterion(logits, labels)
        #         print(loss.detach().cpu().numpy())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        metric_logger.update(loss=loss.item())

    print("stats:", metric_logger)
    return {k: meter.global_avg for k, meter in metric_logger.meters.items()}