def __init__(self, batch_size=256, fixed=False): self.fixed = fixed self.batch_size = batch_size path_data = '/v9/whshin/imagenet_l2s_84_84' path_devkit = '/v9/whshin/ILSVRC2012_devkit_t12' # path = '/v9/whshin/imagenet_resized_32_32' print(f'Dataset path: {path_data}') # path ='./imagenet' composed_transforms = transforms.Compose([ # transforms.RandomResizedCrop(32), # transforms.Resize([32, 32], interpolation=2), # transforms.RandomHorizontalFlip(0.5), # transforms.ToTensor(), ]) whole_data = ImageNet(root=path_data, root_devkit=path_devkit, splits=['train', 'val'], transform=composed_transforms) whole_data[0] import pdb pdb.set_trace() # test_data = ImageNet(path, split='val', download=True, # transform=composed_transforms) self.m_train_d, self.m_valid_d, self.m_test_d = \ self._meta_data_split(whole_data) # , test_data) self.meta_train = self.meta_valid = self.meta_test = {}
def retrieve_dataset(dataset, image_directory, train_transform, test_transform, test_equals_val=True): if dataset == 'cifar10': train_dataset = torchvision.datasets.CIFAR10(root=image_directory, train=True, transform=train_transform, download=True) test_dataset = torchvision.datasets.CIFAR10(root=image_directory, train=False, transform=test_transform, download=True) # Are we using the test set as the test and validation set or should we create a pseudo-validation set from the train set? if not test_equals_val: train_dataset, val_dataset = CIFAR10.create_validation( train_dataset, test_dataset) test_dataset = CIFAR10.create_test(test_dataset) else: train_dataset.targets = torch.tensor(train_dataset.targets) test_dataset.targets = torch.tensor(test_dataset.targets) train_dataset = CIFAR10.create_train(train_dataset) val_dataset = CIFAR10.create_test(test_dataset, phase='val') test_dataset = CIFAR10.create_test(test_dataset, phase='test') else: raise ('Not yet implemented') train_dataset, test_dataset = ImageNet.create_train_and_test( dataset, dataframe, image_directory, train_transform, test_transform, visualize=visualize) val_dataset = ImageNet.create_validation(dataset, dataframe, image_directory, test_transform, visualize=visualize) return train_dataset, val_dataset, test_dataset
def load_dataset(self, dataset, training): self.logger.info("Loading datasets: %s" % dataset.name) if dataset.name == "shapenet": return ShapeNet(config.SHAPENET_ROOT, dataset.subset_train if training else dataset.subset_eval, dataset.mesh_pos, dataset.normalization, dataset.shapenet) elif dataset.name == "shapenet_demo": return ShapeNetImageFolder(dataset.predict.folder, dataset.normalization, dataset.shapenet) elif dataset.name == "imagenet": return ImageNet(config.IMAGENET_ROOT, "train" if training else "val") raise NotImplementedError("Unsupported dataset")
def main_worker(local_rank: int, config: object): """Single process.""" torch.cuda.set_device(local_rank) if config.distributed: dist_rank = config.node_rank * config.num_gpus_per_node + local_rank dist.init_process_group( backend=config.dist_backend, init_method=config.dist_url, world_size=config.world_size, rank=dist_rank, ) config.batch_size = config.batch_size // config.world_size config.num_workers = config.num_workers // config.world_size # Logging if local_rank == 0: logfile = os.path.join(config.checkpoint_dir, 'main.log') logger = get_rich_logger(logfile) if config.enable_wandb: configure_wandb(name='moco:' + config.hash, project=config.data, config=config) else: logger = None # Networks encoder = ResNetBackbone(name=config.backbone_type, data=config.data, in_channels=3) if config.projector_type == 'linear': head = LinearHead(encoder.out_channels, config.projector_dim) elif config.projector_type == 'mlp': head = MLPHead(encoder.out_channels, config.projector_dim) else: raise NotImplementedError if config.split_bn: encoder = SplitBatchNorm2d.convert_split_batchnorm(encoder) # Data trans_kwargs = dict(size=config.input_size, data=config.data, impl=config.augmentation, k=config.rand_k) query_trans = AUGMENTS[config.query_augment](**trans_kwargs) key_trans = AUGMENTS[config.key_augment](**trans_kwargs) finetune_trans = FinetuneAugment(**trans_kwargs) test_trans = TestAugment(**trans_kwargs) if config.data == 'cifar10': data_dir = os.path.join(config.data_root, 'cifar10') train_set = CIFAR10ForMoCo(data_dir, train=True, query_transform=query_trans, key_transform=key_trans) finetune_set = CIFAR10(data_dir, train=True, transform=finetune_trans) test_set = CIFAR10(data_dir, train=False, transform=test_trans) elif config.data == 'cifar100': data_dir = os.path.join(config.data_root, 'cifar100') train_set = CIFAR100ForMoCo(data_dir, train=True, query_transform=query_trans, key_transform=key_trans) finetune_set = CIFAR100(data_dir, train=True, transform=finetune_trans) test_set = CIFAR100(data_dir, train=False, transform=test_trans) elif config.data == 'svhn': data_dir = os.path.join(config.data_root, 'svhn') train_set = SVHNForMoCo(data_dir, split='train', query_transform=query_trans, key_transform=key_trans) finetune_set = SVHN(data_dir, split='train', transform=finetune_trans) test_set = SVHN(data_dir, split='test', transform=test_trans) elif config.data == 'stl10': data_dir = os.path.join(config.data_root, 'stl10') train_set = STL10ForMoCo(data_dir, split='train+unlabeled', query_transform=query_trans, key_transform=key_trans) finetune_set = STL10(data_dir, split='train', transform=finetune_trans) test_set = STL10(data_dir, split='test', transform=test_trans) elif config.data == 'tinyimagenet': data_dir = os.path.join(config.data_root, 'tiny-imagenet-200') train_set = TinyImageNetForMoCo(data_dir, split='train', query_transform=query_trans, key_transform=key_trans, in_memory=True) finetune_set = TinyImageNet(data_dir, split='train', transform=finetune_trans, in_memory=True) test_set = TinyImageNet(data_dir, split='val', transform=test_trans, in_memory=True) elif config.data == 'imagenet': data_dir = os.path.join(config.data_root, 'imagenet') train_set = ImageNetForMoCo(data_dir, split='train', query_transform=query_trans, key_transform=key_trans) finetune_set = ImageNet(data_dir, split='train', transform=finetune_trans) test_set = ImageNet(data_dir, split='val', transform=test_trans) else: raise ValueError(f"Invalid data argument: {config.data}") # Logging if local_rank == 0: logger.info(f'Data: {config.data}') logger.info(f'Observations: {len(train_set):,}') logger.info( f'Backbone ({config.backbone_type}): {encoder.num_parameters:,}') logger.info( f'Projector ({config.projector_type}): {head.num_parameters:,}') logger.info(f'Checkpoint directory: {config.checkpoint_dir}') else: logger = None # Model (Task) model = MoCo(encoder=encoder, head=head, queue=MemoryQueue(size=(config.projector_dim, config.num_negatives), device=local_rank), loss_function=MoCoLoss(config.temperature)) model.prepare(ckpt_dir=config.checkpoint_dir, optimizer=config.optimizer, learning_rate=config.learning_rate, weight_decay=config.weight_decay, cosine_warmup=config.cosine_warmup, cosine_cycles=config.cosine_cycles, cosine_min_lr=config.cosine_min_lr, epochs=config.epochs, batch_size=config.batch_size, num_workers=config.num_workers, key_momentum=config.key_momentum, distributed=config.distributed, local_rank=local_rank, mixed_precision=config.mixed_precision, resume=config.resume) # Train & evaluate start = time.time() model.run( train_set, memory_set=finetune_set, query_set=test_set, save_every=config.save_every, logger=logger, knn_k=config.knn_k, ) elapsed_sec = time.time() - start if logger is not None: elapsed_mins = elapsed_sec / 60 elapsed_hours = elapsed_mins / 60 logger.info( f'Total training time: {elapsed_mins:,.2f} minutes ({elapsed_hours:,.2f} hours).' ) logger.handlers.clear()
def main(unused_argv): # Load training and eval data # export_cifar('/backups/datasets/cifar-10-python.tar.gz', '/backups/work/CIFAR10') imagenet = ImageNet('/backups/work/ILSVRC2017/ILSVRC', shuffle=True, normalize=True, augment=False, one_hot=False, batch_size=128) # imagenet = CIFAR('/backups/work/CIFAR10', # shuffle=True, normalize=True, augment=True, one_hot=False, batch_size=32) densenet = DenseNet(num_classes=imagenet.num_classes, growth_rate=12, bc_mode=True, block_config=(6, 12, 24, 16), dropout_rate=0.2, reduction=0.5, weight_decay=1e-4, nesterov_momentum=0.9) def train_input_fn(learning_rate): dataset = imagenet.train_set dataset = dataset.repeat(1) dataset = dataset.skip(imagenet.train_set_size % imagenet.batch_size) iterator = dataset.make_one_shot_iterator() features, labels = iterator.get_next() return {'images': features, 'learning_rate': learning_rate}, labels def eval_input_fn(): dataset = imagenet.val_set dataset = dataset.repeat(1) iterator = dataset.make_one_shot_iterator() features, labels = iterator.get_next() return {'images': features}, labels # Create the Estimator sess_config = tf.ConfigProto() sess_config.gpu_options.allow_growth = True config = tf.estimator.RunConfig().replace(session_config=sess_config) classifier = tf.estimator.Estimator( model_fn=densenet.imagenet_model_fn, model_dir="/backups/work/logs/imagenet_model1", # params={'image_shape': imagenet.image_shape}, config=config) # Set up logging # tensors_to_log = {"accuracy": "Accuracy/train_accuracy"} # logging_hook = tf.train.LoggingTensorHook( # tensors=tensors_to_log, every_n_iter=100) # debug_hook = tfdbg.LocalCLIDebugHook() # debug_hook.add_tensor_filter("has_inf_or_nan", tfdbg.has_inf_or_nan) # # Train the model for i in range(30): # Train the model classifier.train(input_fn=lambda: train_input_fn(learning_rate=0.1)) # Evaluate the model and print results eval_results = classifier.evaluate(input_fn=eval_input_fn) print(eval_results) for i in range(30): # Train the model classifier.train(input_fn=lambda: train_input_fn(learning_rate=0.01)) # Evaluate the model and print results eval_results = classifier.evaluate(input_fn=eval_input_fn) print(eval_results) for i in range(30): # Train the model classifier.train(input_fn=lambda: train_input_fn(learning_rate=0.001)) # Evaluate the model and print results eval_results = classifier.evaluate(input_fn=eval_input_fn) print(eval_results) for i in range(10): # Train the model classifier.train(input_fn=lambda: train_input_fn(learning_rate=0.0001)) # Evaluate the model and print results eval_results = classifier.evaluate(input_fn=eval_input_fn) print(eval_results)
def main(): global args, best_prec1, train_rec, test_rec args = parser.parse_args() args.root = "work" args.folder = osp.join(args.root, args.arch) setproctitle.setproctitle(args.arch) os.makedirs(args.folder, exist_ok=True) args.distributed = args.world_size > 1 if args.distributed: dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) imagenet = ImageNet(args.data) # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](num_classes=imagenet.num_classes, pretrained=True) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch](num_classes=imagenet.num_classes) if not args.distributed: if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() else: model.cuda() model = torch.nn.parallel.DistributedDataParallel(model) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True from trainers.classification import ClassificationTrainer train_loader, valid_loader = imagenet.get_loader(args) trainer = ClassificationTrainer(model, criterion, args, optimizer) if args.evaluate: trainer.evaluate(valid_loader, model, criterion) return from torch.optim.lr_scheduler import MultiStepLR, StepLR # lr_scheduler = MultiStepLR(optimizer, milestones=[100, 150], gamma=0.1) lr_scheduler = StepLR(optimizer, step_size=30, gamma=0.1) trainer.fit(train_loader, valid_loader, start_epoch=0, max_epochs=200, lr_scheduler=lr_scheduler)
is_tensor=True).cuda() pred_vectors = pred_vectors.cuda() n = len(train_wnids) m = len(test_wnids) cnn = make_resnet50_base() cnn.load_state_dict(torch.load(args.cnn)) cnn = cnn.cuda() cnn.eval() TEST_TRAIN = args.test_train imagenet_path = 'materials/datasets/imagenet' dataset = ImageNet(imagenet_path) dataset.set_keep_ratio(args.keep_ratio) s_hits = torch.FloatTensor([0, 0, 0, 0, 0]).cuda() # top 1 2 5 10 20 s_tot = 0 results = {} if TEST_TRAIN: for i, wnid in enumerate(train_wnids, 1): subset = dataset.get_subset(wnid) hits, tot = test_on_subset(subset, cnn, n, pred_vectors, i - 1,