def main(args): torch.cuda.set_device(0) random.seed(0) torch.manual_seed(0) torch.cuda.manual_seed(0) torch.cuda.manual_seed_all(0) utils.init_distributed_mode(args) print(args) device = torch.device(args.device) # Data loading code print("Loading data") if 'voc2007' in args.dataset: dataset, num_classes = get_dataset(args.dataset, "trainval", get_transform(train=True), args.data_path) dataset_test, _ = get_dataset(args.dataset, "test", get_transform(train=False), args.data_path) else: dataset, num_classes = get_dataset(args.dataset, "train", get_transform(train=True), args.data_path) dataset_test, _ = get_dataset(args.dataset, "val", get_transform(train=False), args.data_path) print("Creating data loaders") num_images = len(dataset) if 'voc' in args.dataset: init_num = 1000 budget_num = 1000 if 'retina' in args.model: init_num = 1000 budget_num = 500 else: init_num = 5000 budget_num = 1000 indices = list(range(num_images)) random.shuffle(indices) labeled_set = indices[:init_num] unlabeled_set = indices[init_num:] train_sampler = SubsetRandomSampler(labeled_set) test_sampler = torch.utils.data.SequentialSampler(dataset_test) data_loader_test = DataLoader(dataset_test, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=utils.collate_fn) for cycle in range(args.cycles): if args.aspect_ratio_group_factor >= 0: group_ids = create_aspect_ratio_groups( dataset, k=args.aspect_ratio_group_factor) train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) else: train_batch_sampler = torch.utils.data.BatchSampler( train_sampler, args.batch_size, drop_last=True) data_loader = torch.utils.data.DataLoader( dataset, batch_sampler=train_batch_sampler, num_workers=args.workers, collate_fn=utils.collate_fn) print("Creating model") if 'voc' in args.dataset: if 'faster' in args.model: task_model = fasterrcnn_resnet50_fpn(num_classes=num_classes, min_size=600, max_size=1000) elif 'retina' in args.model: task_model = retinanet_resnet50_fpn(num_classes=num_classes, min_size=600, max_size=1000) else: if 'faster' in args.model: task_model = fasterrcnn_resnet50_fpn(num_classes=num_classes, min_size=800, max_size=1333) elif 'retina' in args.model: task_model = retinanet_resnet50_fpn(num_classes=num_classes, min_size=600, max_size=1000) task_model.to(device) if not args.init and cycle == 0 and args.skip: if 'faster' in args.model: checkpoint = torch.load(os.path.join( args.first_checkpoint_path, '{}_frcnn_1st.pth'.format(args.dataset)), map_location='cpu') elif 'retina' in args.model: checkpoint = torch.load(os.path.join( args.first_checkpoint_path, '{}_retinanet_1st.pth'.format(args.dataset)), map_location='cpu') task_model.load_state_dict(checkpoint['model']) # if 'coco' in args.dataset: # coco_evaluate(task_model, data_loader_test) # elif 'voc' in args.dataset: # voc_evaluate(task_model, data_loader_test, args.dataset) print("Getting stability") random.shuffle(unlabeled_set) if 'coco' in args.dataset: subset = unlabeled_set[:5000] else: subset = unlabeled_set # Update the labeled dataset and the unlabeled dataset, respectively labeled_set += subset[:budget_num] labeled_set = list(set(labeled_set)) # with open("vis/cycle_{}.txt".format(cycle), "rb") as fp: # Unpickling # labeled_set = pickle.load(fp) unlabeled_set = list(set(indices) - set(labeled_set)) # Create a new dataloader for the updated labeled dataset train_sampler = SubsetRandomSampler(labeled_set) continue params = [p for p in task_model.parameters() if p.requires_grad] task_optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) task_lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( task_optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) # Start active learning cycles training if args.test_only: if 'coco' in args.dataset: coco_evaluate(task_model, data_loader_test) elif 'voc' in args.dataset: voc_evaluate(task_model, data_loader_test, args.dataset) return print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.total_epochs): train_one_epoch(task_model, task_optimizer, data_loader, device, cycle, epoch, args.print_freq) task_lr_scheduler.step() # evaluate after pre-set epoch if (epoch + 1) == args.total_epochs: if 'coco' in args.dataset: coco_evaluate(task_model, data_loader_test) elif 'voc' in args.dataset: voc_evaluate(task_model, data_loader_test, args.dataset, path=args.results_path) if not args.skip and cycle == 0: if 'faster' in args.model: utils.save_on_master( { 'model': task_model.state_dict(), 'args': args }, os.path.join(args.first_checkpoint_path, '{}_frcnn_1st.pth'.format(args.dataset))) elif 'retina' in args.model: utils.save_on_master( { 'model': task_model.state_dict(), 'args': args }, os.path.join(args.first_checkpoint_path, '{}_retinanet_1st.pth'.format(args.dataset))) random.shuffle(unlabeled_set) # Update the labeled dataset and the unlabeled dataset, respectively labeled_set += unlabeled_set[:budget_num] labeled_set = list(set(labeled_set)) unlabeled_set = unlabeled_set[budget_num:] # Create a new dataloader for the updated labeled dataset train_sampler = SubsetRandomSampler(labeled_set) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str))
def main(args): utils.init_distributed_mode(args) print(args) device = torch.device(args.device) # Data loading code print("Loading data") dataset, num_classes = get_dataset(args.dataset, "train", get_transform(train=True), args.data_path) dataset_test, _ = get_dataset(args.dataset, "val", get_transform(train=False), args.data_path) print("Creating data loaders") if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test) else: train_sampler = torch.utils.data.RandomSampler(dataset) test_sampler = torch.utils.data.SequentialSampler(dataset_test) if args.aspect_ratio_group_factor >= 0: group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor) train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) else: train_batch_sampler = torch.utils.data.BatchSampler( train_sampler, args.batch_size, drop_last=True) data_loader = torch.utils.data.DataLoader( dataset, batch_sampler=train_batch_sampler, num_workers=args.workers, collate_fn=utils.collate_fn) data_loader_test = torch.utils.data.DataLoader( dataset_test, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=utils.collate_fn) print("Creating model") model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes, pretrained=args.pretrained) model.to(device) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model_without_ddp = model.module params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD( params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) if args.resume: checkpoint = torch.load(args.resume, map_location='cpu') model_without_ddp.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.test_only: evaluate(model, data_loader_test, device=device) return print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq) lr_scheduler.step() if args.output_dir: utils.save_on_master({ 'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'args': args, 'epoch': epoch}, os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) # evaluate after every epoch evaluate(model, data_loader_test, device=device) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str))
def main(args): utils.init_distributed_mode(args) print(args) device = torch.device(args.device) print("Loading data") # pdb.set_trace() transform = build_transforms(cfg, is_train=True) train_data = VisualGenomeDataset(args.data_dir, task='detection', split='train', transforms=transform) test_data = VisualGenomeDataset(args.data_dir, task='detection', split='test', transforms=transform) print("Creating data loaders") if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_data) test_sampler = torch.utils.data.distributed.DistributedSampler( test_data) else: train_sampler = torch.utils.data.RandomSampler(train_data) test_sampler = torch.utils.data.SequentialSampler(test_data) if args.aspect_ratio_group_factor >= 0: group_ids = create_aspect_ratio_groups( train_data, k=args.aspect_ratio_group_factor) train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) else: train_batch_sampler = torch.utils.data.BatchSampler(train_sampler, args.batch_size, drop_last=True) train_data_loader = torch.utils.data.DataLoader( train_data, batch_sampler=train_batch_sampler, num_workers=args.workers, collate_fn=utils.collate_fn) test_data_loader = torch.utils.data.DataLoader(test_data, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=utils.collate_fn) print("Creating model") model = fasterrcnn_resnet50_fpn(pretrained=True) in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, cfg.NUM_CALSSES) model.to(device) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) model_without_ddp = model.module params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) # lr_scheduler = torch.optim.lr_scheduler.StepLR( # optimizer, step_size=8, gamma=0.5) last_epoch = 0 if args.resume: print("from checkpoint*************") checkpoint = torch.load(args.resume, map_location='cpu') model_without_ddp.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) last_epoch = lr_scheduler.last_epoch if args.test_only: evaluate(model, test_data_loader, device=device) return print("Start training") start_time = time.time() for epoch in range(last_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) train_one_epoch(model, optimizer, train_data_loader, device, epoch, args.print_freq) # lr_scheduler.step() # if args.output_dir: # utils.save_on_master({ # 'model': model_without_ddp.state_dict(), # 'optimizer': optimizer.state_dict(), # 'lr_scheduler': lr_scheduler.state_dict(), # 'args': args}, # os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) # evaluate(model, test_data_loader, device=device) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str))