Ejemplo n.º 1
0
def get_model(opt, pretrained=None, trn=True, weights_FIMs=None, alpha=1.):
    '''Getting model and initializing.
    Args:
        pretrained, None or path to pretrained model weights
        trn, True for training and False for evaluating'''
    # Model structure
    model = Darknet(opt.model_config_path, opt.img_size, weights_FIMs, alpha)
    print(model)
    # Initialize
    model.apply(weights_init_normal)
    # Pretrained or not
    coco_weights = True if pretrained == 'weights/yolov3.weights' else False
    try:
        model.load_weights(pretrained, use_coco=coco_weights)
    except TypeError:
        pass
    # Cuda or not
    if opt.cuda:
        model = model.cuda()
        cudnn.benchmark = True
    # Mode = train or eval
    if trn:
        model.train()
    else:
        model.eval()
    return model
Ejemplo n.º 2
0
    os.makedirs("output", exist_ok=True)
    os.makedirs("checkpoints", exist_ok=True)

    # Get data configuration
    data_config = parse_data_config(opt.data_config)
    if platform == "linux" or platform == "linux2":
        train_path = data_config["train_Linux"]
        valid_path = data_config["valid_Linux"]
    else:
        train_path = data_config["train"]
        valid_path = data_config["valid"]

    class_names = load_classes(data_config["names"])
    # Initiate model
    model = Darknet(opt.model_def).to(device)
    model.apply(weights_init_normal)

    # If specified we start from checkpoint
    if opt.pretrained_weights:
        if opt.pretrained_weights.endswith(".pth"):
            model.load_state_dict(torch.load(opt.pretrained_weights))
        else:
            model.load_darknet_weights(opt.pretrained_weights)

    # Get dataloader
    dataset = ListDataset(train_path,
                          augment=False,
                          multiscale=opt.multiscale_training)
    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=opt.batch_size,
Ejemplo n.º 3
0
def main(opt):
    wandb.init(project="training_loop_tutorial", entity='samiksha')

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    os.makedirs("output", exist_ok=True)
    os.makedirs("checkpoints", exist_ok=True)

    # Initiate model
    model = Darknet(opt.model_def).to(device)
    model.apply(utils.weights_init_normal)

    pretrained_weights = opt.pretrained_weights
    if pretrained_weights is not None:
        print(f'\nLoading weights: {pretrained_weights}\n')
        if pretrained_weights.endswith(".pth"):
            # Load our pytorch training's checkpoint
            checkpoint = torch.load(pretrained_weights)
            model.load_state_dict(checkpoint['model_state_dict'])
        else:
            # Load original author's darknet weights (trained on yolo)
            model.load_darknet_weights(pretrained_weights)

    # dataloader
    root_train = opt.root_train
    root_test = opt.root_test
    img_size = opt.img_size
    # dataset_train = PascalVOC(root_train, transform=get_transforms(img_size=img_size))
    # dataset_test = PascalVOC(root_test, transform=get_transforms(img_size=img_size))
    dataset_train = NipponDataset(root_train,
                                  transform=get_transforms(img_size=img_size))
    dataset_test = NipponDataset(root_test,
                                 transform=get_transforms(img_size=img_size))

    # Take subset of dataset for faster testing
    debug_mode = opt.debug_mode
    if debug_mode:
        num_debug_imgs = 100
        num_images_train = min(num_debug_imgs, len(dataset_train))
        num_images_test = min(num_debug_imgs, len(dataset_train))
        print(
            f'Warning: Debugging mode, only {num_images_train} images from datasets will be used.'
        )
    else:
        num_images_train = len(dataset_train)
        num_images_test = len(dataset_test)
    dataset_train = torch.utils.data.Subset(dataset_train,
                                            list(range(num_images_train)))
    dataset_test = torch.utils.data.Subset(dataset_test,
                                           list(range(num_images_test)))

    batch_size = model.hyperparams['batch']
    n_cpu = opt.n_cpu
    trainloader = torch.utils.data.DataLoader(dataset_train,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              collate_fn=collate_fn,
                                              num_workers=n_cpu)

    sampler = torch.utils.data.SequentialSampler(dataset_test)
    testloader = torch.utils.data.DataLoader(dataset_test,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             collate_fn=collate_fn,
                                             num_workers=n_cpu)

    # optimizer
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=model.hyperparams['learning_rate'],
        weight_decay=model.hyperparams['decay'],
    )

    epochs = opt.epochs
    evaluation_interval = opt.evaluation_interval
    checkpoint_interval = opt.checkpoint_interval
    log_image_interval = opt.log_image_interval
    for epoch_idx in range(epochs):
        print(f"Epoch {epoch_idx + 1}\n-------------------------------")
        train_loop(trainloader, model, optimizer, device)

        # Run Evaluation
        if (epoch_idx + 1) % evaluation_interval == 0:
            evaluate_metrics(model,
                             testloader,
                             device,
                             iou_thres=0.5,
                             conf_thres=0.1,
                             nms_thres=0.5,
                             mode="Test")
        if (epoch_idx + 1) % log_image_interval == 0:
            log_bbox_predictions(model,
                                 testloader,
                                 device,
                                 conf_thres=0.5,
                                 nms_thres=0.5,
                                 mode="Test",
                                 max_images_to_upload=16)

        # Save checkpoint
        if (epoch_idx + 1) % checkpoint_interval == 0:
            run_id = wandb.run.id
            save_dir = Path(f"checkpoints/{run_id}")
            save_dir.mkdir(parents=True, exist_ok=True)
            checkpoint_path = str(save_dir / f"yolov3_ckpt_{epoch_idx}.pth")

            torch.save(
                {
                    'epoch': epoch_idx,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                }, checkpoint_path)
Ejemplo n.º 4
0
                        help="allow for multi-scale training")
    opt = parser.parse_args()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    os.makedirs("output", exist_ok=True)
    os.makedirs("checkpoints", exist_ok=True)

    # Load data configuration
    data_config = parse_config.parse_data_config(opt.data_config)
    train_path, val_path = data_config['train'], data_config['valid']
    class_names = parse_config.load_classes(data_config['names'])

    # Initialize model
    model = Darknet(opt.model_def).to(device)
    model.apply(utils.weights_init_normal)
    # load weights from checkpoint
    if opt.pretrained_weights:
        if opt.pretrained_weights.endswith(".pth"):
            model.load_state_dict(
                torch.load(opt.pretrained_weights, map_location=device))
        else:
            model.load_darknet_weights(opt.pretrained_weights)

    # Initialize optimizer
    optimizer = torch.optim.Adam(model.parameters())

    # Initialize data loader
    dataset = datasets.ListDataset(train_path)
    data_loader = torch.utils.data.DataLoader(
        dataset,
Ejemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--config_file", default="config/runs/config.json")
    parser.add_argument("--output_dir", default='output')
    args = parser.parse_args()

    with open(args.config_file) as config_buffer:
        config = json.loads(config_buffer.read())

    exp_name = get_experiment_name(config)
    print(f"Experiment name: {exp_name}")
    out_dir = os.path.join(args.output_dir, exp_name)
    if os.path.exists(out_dir):
        print("experiment dir already exists! Removing...")
        shutil.rmtree(out_dir)

    os.makedirs(out_dir)

    log_dir = f"{out_dir}/logs"
    checkpoint_dir = f"{out_dir}/checkpoints"
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    tb_logger = SummaryWriter(log_dir)

    logging.basicConfig(
        format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
        datefmt='%m/%d/%Y %H:%M:%S',
        handlers=[
            logging.FileHandler(f"{out_dir}/log.log"),
            logging.StreamHandler(sys.stdout)
        ],
        level=logging.INFO)

    logger = logging.getLogger(__name__)
    logging.info("New session")

    seed = config["train"]["seed"]
    if seed > 0:
        np.random.seed(seed)
        torch.manual_seed(seed)

    ###############################
    #   Prepare data loaders
    ###############################
    print("Loading datasets...")
    if config['val']['validate']:
        train_loader, val_concat_loader, val_loader_dict = prepare_dataloaders(
            config)
    else:
        train_loader = prepare_dataloaders(config)
    print("Loaded!")
    if config["train"]["debug"]:
        image_batch, target = next(iter(train_loader))
        draw_image_batch_with_targets(image_batch[:4], target, cols=2)

        if config['val']['validate']:
            val_image_batch, val_target = next(iter(val_concat_loader))
            draw_image_batch_with_targets(val_image_batch[:4],
                                          val_target,
                                          cols=2)

    ###############################
    #   Construct the model
    ###############################

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = Darknet(config["model"]["config"]).to(device)
    model.apply(weights_init_normal)
    print("Model initialized!")

    if config["train"]["freeze_feature_extractor"]:
        model.freeze_feature_extractor()

    print(f"Trainable params: {get_trainable_params_num(model):,}")

    # If specified we start from checkpoint
    if config["model"]["pretrained_weights"]:
        if config["model"]["pretrained_weights"].endswith(".pth"):
            model.load_state_dict(
                torch.load(config["model"]["pretrained_weights"]))
        else:
            model.load_darknet_weights(config["model"]["pretrained_weights"])
            print("Pretrained weights loaded!")

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config["train"]["learning_rate"])

    ###############################
    #   Training
    ###############################
    batches_done = 0
    grad_accumulations = config["train"]["gradient_accumulations"]
    save_every = config["train"]["save_every"]

    if config["val"]["validate"]:
        val_iterator = iter(val_concat_loader)

    for epoch in range(config["train"]["nb_epochs"]):

        effective_loss = 0
        loss_history = torch.zeros(len(train_loader))
        logger.info(f"Epoch {epoch} started!")
        bar = tqdm(train_loader)
        for i, (image_batch, bboxes) in enumerate(bar):
            model.train()
            image_batch = image_batch.to(device)
            bboxes = bboxes.to(device)

            loss, outputs = model(image_batch, bboxes)
            effective_loss += loss.item()
            loss_history[i] = loss.item()

            loss.backward()

            if i % grad_accumulations == 0:
                # Accumulates gradient before each step
                optimizer.step()

                if config["train"]["gradient_clipping"]:
                    torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
                grad_norm = get_grad_norm(model)

                optimizer.zero_grad()
                if config["val"]["validate"]:
                    model.eval()

                    try:
                        val_image_batch, val_bboxes = next(val_iterator)
                    except StopIteration:
                        val_iterator = iter(val_concat_loader)
                        val_image_batch, val_bboxes = next(val_iterator)
                    val_image_batch = val_image_batch.to(device)
                    val_bboxes = val_bboxes.to(device)
                    with torch.no_grad():
                        val_loss, val_outputs = model(val_image_batch,
                                                      val_bboxes)

                    tb_logger.add_scalar("loss/validation", val_loss,
                                         batches_done)

                bar.set_description(
                    f"Loss: {effective_loss / grad_accumulations:.6f}")

                batches_done += 1

                # Tensorboard logging
                for metric_name in metrics:
                    metric_dict = {}
                    for j, yolo_layer in enumerate(model.yolo_layers):
                        metric_dict[f"yolo_{j}"] = yolo_layer.metrics[
                            metric_name]

                    if metric_name == 'loss':
                        metric_dict["overall"] = loss.item()

                    tb_logger.add_scalars(metric_name, metric_dict,
                                          batches_done)
                tb_logger.add_scalar("grad_norm", grad_norm, batches_done)
                tb_logger.add_scalar("loss/effective_loss", effective_loss,
                                     batches_done)

                effective_loss = 0

                # save model
                if save_every > 0 and batches_done % save_every == 0:
                    torch.save(model.state_dict(),
                               f"{checkpoint_dir}/yolov3_{batches_done}.pth")

        epoch_loss = loss_history.mean()
        print(f"Epoch loss: {epoch_loss}")
        tb_logger.add_scalar("epoch_loss", epoch_loss, epoch)

        if config["val"]["validate"]:
            result_dict = evaluate(model, val_loader_dict, config["val"])
            for name, results in result_dict.items():
                output_str = f"{name} evaluation results:\n" \
                    f"precision-{results['precision']},\n" \
                    f"recall-{results['recall']},\n" \
                    f"AP-{results['AP']},\n" \
                    f"F1-{results['F1']},\n" \
                    f"ap_class-{results['AP_class']}"
                logging.info(output_str)
                print(output_str)

                tb_logger.add_scalar(f"val_precision/{name}",
                                     results['precision'], epoch)
                tb_logger.add_scalar(f"val_recall/{name}", results['recall'],
                                     epoch)
                tb_logger.add_scalar(f"val_F1/{name}", results['F1'], epoch)
                tb_logger.add_scalar(f"val_AP/{name}", results['AP'], epoch)

        # save model
        torch.save(model.state_dict(),
                   f"{checkpoint_dir}/yolov3_epoch_{epoch}.pth")