def get_model(opt, pretrained=None, trn=True, weights_FIMs=None, alpha=1.): '''Getting model and initializing. Args: pretrained, None or path to pretrained model weights trn, True for training and False for evaluating''' # Model structure model = Darknet(opt.model_config_path, opt.img_size, weights_FIMs, alpha) print(model) # Initialize model.apply(weights_init_normal) # Pretrained or not coco_weights = True if pretrained == 'weights/yolov3.weights' else False try: model.load_weights(pretrained, use_coco=coco_weights) except TypeError: pass # Cuda or not if opt.cuda: model = model.cuda() cudnn.benchmark = True # Mode = train or eval if trn: model.train() else: model.eval() return model
os.makedirs("output", exist_ok=True) os.makedirs("checkpoints", exist_ok=True) # Get data configuration data_config = parse_data_config(opt.data_config) if platform == "linux" or platform == "linux2": train_path = data_config["train_Linux"] valid_path = data_config["valid_Linux"] else: train_path = data_config["train"] valid_path = data_config["valid"] class_names = load_classes(data_config["names"]) # Initiate model model = Darknet(opt.model_def).to(device) model.apply(weights_init_normal) # If specified we start from checkpoint if opt.pretrained_weights: if opt.pretrained_weights.endswith(".pth"): model.load_state_dict(torch.load(opt.pretrained_weights)) else: model.load_darknet_weights(opt.pretrained_weights) # Get dataloader dataset = ListDataset(train_path, augment=False, multiscale=opt.multiscale_training) dataloader = torch.utils.data.DataLoader( dataset, batch_size=opt.batch_size,
def main(opt): wandb.init(project="training_loop_tutorial", entity='samiksha') device = torch.device("cuda" if torch.cuda.is_available() else "cpu") os.makedirs("output", exist_ok=True) os.makedirs("checkpoints", exist_ok=True) # Initiate model model = Darknet(opt.model_def).to(device) model.apply(utils.weights_init_normal) pretrained_weights = opt.pretrained_weights if pretrained_weights is not None: print(f'\nLoading weights: {pretrained_weights}\n') if pretrained_weights.endswith(".pth"): # Load our pytorch training's checkpoint checkpoint = torch.load(pretrained_weights) model.load_state_dict(checkpoint['model_state_dict']) else: # Load original author's darknet weights (trained on yolo) model.load_darknet_weights(pretrained_weights) # dataloader root_train = opt.root_train root_test = opt.root_test img_size = opt.img_size # dataset_train = PascalVOC(root_train, transform=get_transforms(img_size=img_size)) # dataset_test = PascalVOC(root_test, transform=get_transforms(img_size=img_size)) dataset_train = NipponDataset(root_train, transform=get_transforms(img_size=img_size)) dataset_test = NipponDataset(root_test, transform=get_transforms(img_size=img_size)) # Take subset of dataset for faster testing debug_mode = opt.debug_mode if debug_mode: num_debug_imgs = 100 num_images_train = min(num_debug_imgs, len(dataset_train)) num_images_test = min(num_debug_imgs, len(dataset_train)) print( f'Warning: Debugging mode, only {num_images_train} images from datasets will be used.' ) else: num_images_train = len(dataset_train) num_images_test = len(dataset_test) dataset_train = torch.utils.data.Subset(dataset_train, list(range(num_images_train))) dataset_test = torch.utils.data.Subset(dataset_test, list(range(num_images_test))) batch_size = model.hyperparams['batch'] n_cpu = opt.n_cpu trainloader = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size, shuffle=True, collate_fn=collate_fn, num_workers=n_cpu) sampler = torch.utils.data.SequentialSampler(dataset_test) testloader = torch.utils.data.DataLoader(dataset_test, batch_size=batch_size, shuffle=False, collate_fn=collate_fn, num_workers=n_cpu) # optimizer optimizer = torch.optim.Adam( model.parameters(), lr=model.hyperparams['learning_rate'], weight_decay=model.hyperparams['decay'], ) epochs = opt.epochs evaluation_interval = opt.evaluation_interval checkpoint_interval = opt.checkpoint_interval log_image_interval = opt.log_image_interval for epoch_idx in range(epochs): print(f"Epoch {epoch_idx + 1}\n-------------------------------") train_loop(trainloader, model, optimizer, device) # Run Evaluation if (epoch_idx + 1) % evaluation_interval == 0: evaluate_metrics(model, testloader, device, iou_thres=0.5, conf_thres=0.1, nms_thres=0.5, mode="Test") if (epoch_idx + 1) % log_image_interval == 0: log_bbox_predictions(model, testloader, device, conf_thres=0.5, nms_thres=0.5, mode="Test", max_images_to_upload=16) # Save checkpoint if (epoch_idx + 1) % checkpoint_interval == 0: run_id = wandb.run.id save_dir = Path(f"checkpoints/{run_id}") save_dir.mkdir(parents=True, exist_ok=True) checkpoint_path = str(save_dir / f"yolov3_ckpt_{epoch_idx}.pth") torch.save( { 'epoch': epoch_idx, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, checkpoint_path)
help="allow for multi-scale training") opt = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") os.makedirs("output", exist_ok=True) os.makedirs("checkpoints", exist_ok=True) # Load data configuration data_config = parse_config.parse_data_config(opt.data_config) train_path, val_path = data_config['train'], data_config['valid'] class_names = parse_config.load_classes(data_config['names']) # Initialize model model = Darknet(opt.model_def).to(device) model.apply(utils.weights_init_normal) # load weights from checkpoint if opt.pretrained_weights: if opt.pretrained_weights.endswith(".pth"): model.load_state_dict( torch.load(opt.pretrained_weights, map_location=device)) else: model.load_darknet_weights(opt.pretrained_weights) # Initialize optimizer optimizer = torch.optim.Adam(model.parameters()) # Initialize data loader dataset = datasets.ListDataset(train_path) data_loader = torch.utils.data.DataLoader( dataset,
def main(): parser = argparse.ArgumentParser() parser.add_argument("--config_file", default="config/runs/config.json") parser.add_argument("--output_dir", default='output') args = parser.parse_args() with open(args.config_file) as config_buffer: config = json.loads(config_buffer.read()) exp_name = get_experiment_name(config) print(f"Experiment name: {exp_name}") out_dir = os.path.join(args.output_dir, exp_name) if os.path.exists(out_dir): print("experiment dir already exists! Removing...") shutil.rmtree(out_dir) os.makedirs(out_dir) log_dir = f"{out_dir}/logs" checkpoint_dir = f"{out_dir}/checkpoints" if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) tb_logger = SummaryWriter(log_dir) logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', handlers=[ logging.FileHandler(f"{out_dir}/log.log"), logging.StreamHandler(sys.stdout) ], level=logging.INFO) logger = logging.getLogger(__name__) logging.info("New session") seed = config["train"]["seed"] if seed > 0: np.random.seed(seed) torch.manual_seed(seed) ############################### # Prepare data loaders ############################### print("Loading datasets...") if config['val']['validate']: train_loader, val_concat_loader, val_loader_dict = prepare_dataloaders( config) else: train_loader = prepare_dataloaders(config) print("Loaded!") if config["train"]["debug"]: image_batch, target = next(iter(train_loader)) draw_image_batch_with_targets(image_batch[:4], target, cols=2) if config['val']['validate']: val_image_batch, val_target = next(iter(val_concat_loader)) draw_image_batch_with_targets(val_image_batch[:4], val_target, cols=2) ############################### # Construct the model ############################### device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Darknet(config["model"]["config"]).to(device) model.apply(weights_init_normal) print("Model initialized!") if config["train"]["freeze_feature_extractor"]: model.freeze_feature_extractor() print(f"Trainable params: {get_trainable_params_num(model):,}") # If specified we start from checkpoint if config["model"]["pretrained_weights"]: if config["model"]["pretrained_weights"].endswith(".pth"): model.load_state_dict( torch.load(config["model"]["pretrained_weights"])) else: model.load_darknet_weights(config["model"]["pretrained_weights"]) print("Pretrained weights loaded!") optimizer = torch.optim.Adam(model.parameters(), lr=config["train"]["learning_rate"]) ############################### # Training ############################### batches_done = 0 grad_accumulations = config["train"]["gradient_accumulations"] save_every = config["train"]["save_every"] if config["val"]["validate"]: val_iterator = iter(val_concat_loader) for epoch in range(config["train"]["nb_epochs"]): effective_loss = 0 loss_history = torch.zeros(len(train_loader)) logger.info(f"Epoch {epoch} started!") bar = tqdm(train_loader) for i, (image_batch, bboxes) in enumerate(bar): model.train() image_batch = image_batch.to(device) bboxes = bboxes.to(device) loss, outputs = model(image_batch, bboxes) effective_loss += loss.item() loss_history[i] = loss.item() loss.backward() if i % grad_accumulations == 0: # Accumulates gradient before each step optimizer.step() if config["train"]["gradient_clipping"]: torch.nn.utils.clip_grad_norm_(model.parameters(), 5) grad_norm = get_grad_norm(model) optimizer.zero_grad() if config["val"]["validate"]: model.eval() try: val_image_batch, val_bboxes = next(val_iterator) except StopIteration: val_iterator = iter(val_concat_loader) val_image_batch, val_bboxes = next(val_iterator) val_image_batch = val_image_batch.to(device) val_bboxes = val_bboxes.to(device) with torch.no_grad(): val_loss, val_outputs = model(val_image_batch, val_bboxes) tb_logger.add_scalar("loss/validation", val_loss, batches_done) bar.set_description( f"Loss: {effective_loss / grad_accumulations:.6f}") batches_done += 1 # Tensorboard logging for metric_name in metrics: metric_dict = {} for j, yolo_layer in enumerate(model.yolo_layers): metric_dict[f"yolo_{j}"] = yolo_layer.metrics[ metric_name] if metric_name == 'loss': metric_dict["overall"] = loss.item() tb_logger.add_scalars(metric_name, metric_dict, batches_done) tb_logger.add_scalar("grad_norm", grad_norm, batches_done) tb_logger.add_scalar("loss/effective_loss", effective_loss, batches_done) effective_loss = 0 # save model if save_every > 0 and batches_done % save_every == 0: torch.save(model.state_dict(), f"{checkpoint_dir}/yolov3_{batches_done}.pth") epoch_loss = loss_history.mean() print(f"Epoch loss: {epoch_loss}") tb_logger.add_scalar("epoch_loss", epoch_loss, epoch) if config["val"]["validate"]: result_dict = evaluate(model, val_loader_dict, config["val"]) for name, results in result_dict.items(): output_str = f"{name} evaluation results:\n" \ f"precision-{results['precision']},\n" \ f"recall-{results['recall']},\n" \ f"AP-{results['AP']},\n" \ f"F1-{results['F1']},\n" \ f"ap_class-{results['AP_class']}" logging.info(output_str) print(output_str) tb_logger.add_scalar(f"val_precision/{name}", results['precision'], epoch) tb_logger.add_scalar(f"val_recall/{name}", results['recall'], epoch) tb_logger.add_scalar(f"val_F1/{name}", results['F1'], epoch) tb_logger.add_scalar(f"val_AP/{name}", results['AP'], epoch) # save model torch.save(model.state_dict(), f"{checkpoint_dir}/yolov3_epoch_{epoch}.pth")