Python Chrono Examples

Programming Language: Python

Namespace/Package Name: lbtoolbox

Method/Function: Chrono

Examples at hotexamples.com: 3

Python Chrono - 3 examples found. These are the top rated real world Python examples of lbtoolbox.Chrono extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: predict.py Project: abhiagwl4262/big_transfer

def main(args):

    # Lets cuDNN benchmark conv implementations and choose the fastest.
    # Only good if sizes stay the same within the main loop!
    torch.backends.cudnn.benchmark = True

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    #classes = 5

    #valid_set, valid_loader = mkval(args)

    model = models.KNOWN_MODELS[args.model](head_size=args.classes,
                                            zero_head=False)
    model = torch.nn.DataParallel(model)
    checkpoint = torch.load(args.weight_path, map_location=device)
    model.load_state_dict(checkpoint["model"])

    # Optionally resume from a checkpoint.
    # Load it to CPU first as we'll move the model to GPU later.
    # This way, we save a little bit of GPU memory when loading.

    # Note: no weight-decay!

    model = model.to(device)

    model.eval()

    chrono = lb.Chrono()
    #run_eval(model, valid_loader, device, chrono, step='end')
    end = time.time()
    val_tx = tv.transforms.Compose([
        tv.transforms.Resize((448, 448)),
        tv.transforms.ToTensor(),
        tv.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

    run_predict(model, args.datadir, val_tx, device)

Example #2

Show file

File: train_effnet.py Project: abhiagwl4262/big_transfer

def main(args):

    best_acc = -1

    logger = bit_common.setup_logger(args)
    cp, cn = smooth_BCE(eps=0.1)
    # Lets cuDNN benchmark conv implementations and choose the fastest.
    # Only good if sizes stay the same within the main loop!
    torch.backends.cudnn.benchmark = True

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    logger.info(f"Going to train on {device}")

    classes = 5

    train_set, valid_set, train_loader, valid_loader = mktrainval(args, logger)
    logger.info(f"Loading model from {args.model}.npz")
    #model = models.KNOWN_MODELS[args.model](head_size=classes, zero_head=True)
    #model.load_from(np.load(f"{args.model}.npz"))

    model = EfficientNet.from_pretrained(args.model, num_classes=classes)
    logger.info("Moving model onto all GPUs")
    model = torch.nn.DataParallel(model)

    # Optionally resume from a checkpoint.
    # Load it to CPU first as we'll move the model to GPU later.
    # This way, we save a little bit of GPU memory when loading.
    start_epoch = 0

    # Note: no weight-decay!
    optim = torch.optim.SGD(model.parameters(), lr=0.003, momentum=0.9)

    # Resume fine-tuning if we find a saved model.
    savename = pjoin(args.logdir, args.name, "bit.pth.tar")
    try:
        logger.info(f"Model will be saved in '{savename}'")
        checkpoint = torch.load(savename, map_location="cpu")
        logger.info(f"Found saved model to resume from at '{savename}'")

        start_epoch = checkpoint["epoch"]
        model.load_state_dict(checkpoint["model"])
        optim.load_state_dict(checkpoint["optim"])
        logger.info(f"Resumed at epoch {start_epoch}")
    except FileNotFoundError:
        logger.info("Fine-tuning from BiT")

    model = model.to(device)
    optim.zero_grad()

    model.train()
    mixup = bit_hyperrule.get_mixup(len(train_set))
    #mixup = -1
    cri = torch.nn.CrossEntropyLoss().to(device)
    #cri = FocalLoss(cri)
    logger.info("Starting training!")
    chrono = lb.Chrono()
    accum_steps = 0
    mixup_l = np.random.beta(mixup, mixup) if mixup > 0 else 1
    end = time.time()

    epoches = 10
    scheduler = torch.optim.lr_scheduler.OneCycleLR(optim,
                                                    max_lr=0.01,
                                                    steps_per_epoch=1,
                                                    epochs=epoches)

    with lb.Uninterrupt() as u:
        for epoch in range(start_epoch, epoches):

            pbar = enumerate(train_loader)
            pbar = tqdm.tqdm(pbar, total=len(train_loader))

            scheduler.step()
            all_top1, all_top5 = [], []
            for param_group in optim.param_groups:
                lr = param_group["lr"]
            #for x, y in recycle(train_loader):
            for batch_id, (x, y) in pbar:
                #for batch_id, (x, y) in enumerate(train_loader):
                # measure data loading time, which is spent in the `for` statement.
                chrono._done("load", time.time() - end)

                if u.interrupted:
                    break

                # Schedule sending to GPU(s)
                x = x.to(device, non_blocking=True)
                y = y.to(device, non_blocking=True)

                # Update learning-rate, including stop training if over.
                #lr = bit_hyperrule.get_lr(step, len(train_set), args.base_lr)
                #if lr is None:
                #  break

                if mixup > 0.0:
                    x, y_a, y_b = mixup_data(x, y, mixup_l)

                # compute output
                with chrono.measure("fprop"):
                    logits = model(x)
                    top1, top5 = topk(logits, y, ks=(1, 5))
                    all_top1.extend(top1.cpu())
                    all_top5.extend(top5.cpu())
                    if mixup > 0.0:
                        c = mixup_criterion(cri, logits, y_a, y_b, mixup_l)
                    else:
                        c = cri(logits, y)
                train_loss = c.item()
                train_acc = np.mean(all_top1) * 100.0
                # Accumulate grads
                with chrono.measure("grads"):
                    (c / args.batch_split).backward()
                    accum_steps += 1
                accstep = f"({accum_steps}/{args.batch_split})" if args.batch_split > 1 else ""
                s = f"epoch={epoch} batch {batch_id}{accstep}: loss={train_loss:.5f} train_acc={train_acc:.2f} lr={lr:.1e}"
                #s = f"epoch={epoch} batch {batch_id}{accstep}: loss={c.item():.5f} lr={lr:.1e}"
                pbar.set_description(s)
                #logger.info(f"[batch {batch_id}{accstep}]: loss={c_num:.5f} (lr={lr:.1e})")  # pylint: disable=logging-format-interpolation
                logger.flush()

                # Update params
                with chrono.measure("update"):
                    optim.step()
                    optim.zero_grad()
                # Sample new mixup ratio for next batch
                mixup_l = np.random.beta(mixup, mixup) if mixup > 0 else 1

            # Run evaluation and save the model.
            val_loss, val_acc = run_eval(model, valid_loader, device, chrono,
                                         logger, epoch)

            best = val_acc > best_acc
            if best:
                best_acc = val_acc
                torch.save(
                    {
                        "epoch": epoch,
                        "val_loss": val_loss,
                        "val_acc": val_acc,
                        "train_acc": train_acc,
                        "model": model.state_dict(),
                        "optim": optim.state_dict(),
                    }, savename)
            end = time.time()

    logger.info(f"Timings:\n{chrono}")

Example #3

Show file

def main(args):
    logger = common.setup_logger(args)

    # Lets cuDNN benchmark conv implementations and choose the fastest.
    # Only good if sizes stay the same within the main loop!
    torch.backends.cudnn.benchmark = True

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    logger.info(f"Going to train on {device}")

    train_set, valid_set, train_loader, valid_loader = mktrainval(args, logger)

    logger.info(f"Loading model from {args.model}.npz")
    model = models.KNOWN_MODELS[args.model](head_size=len(valid_set.classes),
                                            zero_head=True)
    model.load_from(
        np.load(os.path.join(args.pretrained_dir, f"{args.model}.npz")))

    logger.info("Moving model onto all GPUs")
    model = torch.nn.DataParallel(model)

    # Optionally resume from a checkpoint.
    # Load it to CPU first as we'll move the model to GPU later.
    # This way, we save a little bit of GPU memory when loading.
    step = 0

    # Note: no weight-decay!
    optim = torch.optim.SGD(model.parameters(), lr=args.base_lr, momentum=0.9)

    writer = SummaryWriter(os.path.join(args.logdir, args.name))

    # Resume fine-tuning if we find a saved model.
    savename = pjoin(args.logdir, args.name, "model.tar")
    try:
        logger.info(f"Model will be saved in '{savename}'")
        checkpoint = torch.load(savename, map_location="cpu")
        logger.info(f"Found saved model to resume from at '{savename}'")

        step = checkpoint["step"]
        model.load_state_dict(checkpoint["model"])
        optim.load_state_dict(checkpoint["optim"])
        logger.info(f"Resumed at step {step}")
    except FileNotFoundError:
        logger.info("Fine-tuning from BiT")

    model = model.to(device)
    optim.zero_grad()

    model.train()
    mixup = hyperrule.get_mixup(len(train_set))
    cri = torch.nn.CrossEntropyLoss().to(device)

    logger.info("Starting training!")
    chrono = lb.Chrono()
    accum_steps = 0
    mixup_l = np.random.beta(mixup, mixup) if mixup > 0 else 1
    end = time.time()

    with lb.Uninterrupt() as u:
        for x, y in recycle(train_loader):
            # measure data loading time, which is spent in the `for` statement.
            chrono._done("load", time.time() - end)

            if u.interrupted:
                break

            # Schedule sending to GPU(s)
            x = x.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)

            # Update learning-rate, including stop training if over.
            lr = hyperrule.get_lr(step, len(train_set), args.base_lr)
            if lr is None:
                break
            for param_group in optim.param_groups:
                param_group["lr"] = lr

            if mixup > 0.0:
                x, y_a, y_b = mixup_data(x, y, mixup_l)

            # compute output
            with chrono.measure("fprop"):
                logits = model(x)
                if mixup > 0.0:
                    c = mixup_criterion(cri, logits, y_a, y_b, mixup_l)
                else:
                    c = cri(logits, y)
                c_num = float(
                    c.data.cpu().numpy())  # Also ensures a sync point.

            # Accumulate grads
            with chrono.measure("grads"):
                (c / args.batch_split).backward()
                accum_steps += 1

            accstep = f" ({accum_steps}/{args.batch_split})" if args.batch_split > 1 else ""
            logger.info(
                f"[step {step}{accstep}]: loss={c_num:.5f} (lr={lr:.1e})")  # pylint: disable=logging-format-interpolation
            logger.flush()
            writer.add_scalar('Train/loss', c_num, step)
            writer.add_scalar('Train/lr', lr, step)

            # Update params
            if accum_steps == args.batch_split:
                with chrono.measure("update"):
                    optim.step()
                    optim.zero_grad()
                step += 1
                accum_steps = 0
                # Sample new mixup ratio for next batch
                mixup_l = np.random.beta(mixup, mixup) if mixup > 0 else 1

                # Run evaluation and save the model.
                if args.eval_every and step % args.eval_every == 0:
                    run_eval(model, valid_loader, device, chrono, logger,
                             writer, step)
                if args.save and step % args.save_every == 0:
                    step_savename = pjoin(args.logdir, args.name,
                                          "model_" + str(step) + ".tar")
                    torch.save(
                        {
                            "step": step,
                            "model": model.state_dict(),
                            "optim": optim.state_dict()
                        }, step_savename)

            end = time.time()

        # Final eval at end of training.
        run_eval(model, valid_loader, device, chrono, logger, writer, step)

    logger.info(f"Timings:\n{chrono}")