Example #1
0
def train(train_loader, model, criterion, optimizer, scheduler, epoch, args):
    cls_losses, reg_losses, center_ness_losses, losses = [], [], [], []

    # switch to train mode
    model.train()

    iters = len(train_loader.dataset) // (args.per_node_batch_size * gpus_num)
    prefetcher = COCODataPrefetcher(train_loader)
    images, annotations = prefetcher.next()
    iter_index = 1

    while images is not None:
        images, annotations = images.cuda().float(), annotations.cuda()
        cls_heads, reg_heads, center_heads, batch_positions = model(images)
        cls_loss, reg_loss, center_ness_loss = criterion(
            cls_heads, reg_heads, center_heads, batch_positions, annotations)

        loss = 10 * cls_loss + reg_loss + center_ness_loss
        if cls_loss == 0.0 or reg_loss == 0.0:
            optimizer.zero_grad()
            print("zero")
            continue
#*********************************************************************************************************
        if args.apex:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()


#         loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
        optimizer.step()
        optimizer.zero_grad()

        cls_losses.append(cls_loss.item())
        reg_losses.append(reg_loss.item())
        center_ness_losses.append(center_ness_loss.item())
        losses.append(cls_loss.item() + reg_loss.item() +
                      center_ness_loss.item())

        images, annotations = prefetcher.next()

        if local_rank == 0 and iter_index % args.print_interval == 0:
            logger.info(
                f"train: epoch {epoch:0>3d}, iter [{iter_index:0>5d}, {iters:0>5d}], cls_loss: {cls_loss.item():.2f}, reg_loss: {reg_loss.item():.2f}, center_ness_loss: {center_ness_loss.item():.2f}, loss_total: {losses[-1]:.2f}"
            )

        iter_index += 1

    scheduler.step(np.mean(losses))

    return np.mean(cls_losses), np.mean(reg_losses), np.mean(
        center_ness_losses), np.mean(losses)
Example #2
0
def train(train_loader, model, criterion, optimizer, scheduler, epoch, args):
    heatmap_losses, offset_losses, wh_losses, losses = [], [], [], []

    # switch to train mode
    model.train()

    iters = len(train_loader.dataset) // (args.per_node_batch_size * gpus_num)
    prefetcher = COCODataPrefetcher(train_loader)
    images, annotations = prefetcher.next()
    iter_index = 1

    while images is not None:
        images, annotations = images.cuda().float(), annotations.cuda()

        heatmap_output, offset_output, wh_output = model(images)
        heatmap_loss, offset_loss, wh_loss = criterion(heatmap_output,
                                                       offset_output,
                                                       wh_output, annotations)
        loss = heatmap_loss + offset_loss + wh_loss

        if args.apex:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
        optimizer.step()
        optimizer.zero_grad()

        heatmap_losses.append(heatmap_loss.item())
        offset_losses.append(offset_loss.item())
        wh_losses.append(wh_loss.item())
        losses.append(loss.item())

        images, annotations = prefetcher.next()

        if local_rank == 0 and iter_index % args.print_interval == 0:
            logger.info(
                f"train: epoch {epoch:0>3d}, iter [{iter_index:0>5d}, {iters:0>5d}], heatmap_loss: {heatmap_loss.item():.2f}, offset_loss: {offset_loss.item():.2f}, wh_loss: {wh_loss.item():.2f}, loss_total: {loss.item():.2f}"
            )

        iter_index += 1

    scheduler.step()

    return np.mean(heatmap_losses), np.mean(offset_losses), np.mean(
        wh_losses), np.mean(losses)