def main(_run, _config, experiment, experiment_variant, world_size, rank, init_method, data_dir, batch_size, val_batch_size, out_dir, momentum, wd, lr, epochs, device): cudnn.benchmark = True torch.cuda.set_device(device) is_master = rank == 0 dist.init_process_group('nccl', rank=rank, world_size=world_size, init_method=init_method) out_dir = pt.join(out_dir, experiment_variant) if not pt.exists(out_dir): os.makedirs(out_dir) train = make_loader(pt.join(data_dir, 'train.msgpack'), batch_size, world_size, rank, image_rng=AUGMENTATION_TRAIN) val = make_loader(pt.join(data_dir, 'val.msgpack'), val_batch_size, world_size, rank, image_params={'scale': 256 / 224}) network = get_network(experiment, experiment_variant) network = Normalize(module=network).to(device) network = nn.parallel.DistributedDataParallel(network, device_ids=[device]) network = Unpacker(network) optimizer, policy = make_policy(epochs, network, lr, momentum, wd) loss = CrossEntropyLoss(target_key='label').to(device) trainer = Trainer(network, optimizer, loss, AccuracyMetric(output_key='probs'), policy, None, train, val, out_dir, snapshot_interval=5 if is_master else None, quiet=True if not is_master else False) start = datetime.now() with train: with val: trainer.train(epochs, start_epoch=0) print('Total Time taken: ', datetime.now() - start)
def main(_run, _config, world_size, rank, init_method, datadir, batch_size, val_batch_size, num_workers, outdir, outdir_prefix, lr, wd, bn_momentum, bn_correct, warmup, num_epochs, resume, finetune, size, nsamples): cudnn.benchmark = True device = torch.device('cuda:0') # device is set by CUDA_VISIBLE_DEVICES torch.cuda.set_device(device) # rank 0 creates experiment observer is_master = rank == 0 # rank joins process group print('rank', rank, 'init_method', init_method) dist.init_process_group('nccl', rank=rank, world_size=world_size, init_method=init_method) # actual training stuff train = make_loader( pt.join(datadir, '') if datadir else None, batch_size, device, world_size, rank, num_workers, size, # this the parameter based on which augmentation is applied to the data gpu_augmentation=False, image_rng=None, nsamples=nsamples) # lr is scaled linearly to original batch size of 256 world_batch_size = world_size * batch_size k = world_batch_size / 256 lr = k * lr # outdir stuff if outdir is None: outdir = pt.join(outdir_prefix, '%dgpu' % (world_size, )) model = Net(num_classes=1000, batch_size=batch_size) model = model.to(device) model = nn.parallel.DistributedDataParallel(model, device_ids=[device]) #model = Unpacker(model) optimizer, policy = make_policy(num_epochs, model, lr, 0.9) print('\n policy defined') # loss for autoencoder loss = L1Loss(output_key='output', target_key='target_image').to(device) # this loss is for classifier classifier_loss = CrossEntropyLoss(output_key='probs', target_key='label').to(device) trainer = Trainer(model, optimizer, loss, None, policy, None, train, None, outdir, snapshot_interval=5 if is_master else None, quiet=rank != 0) print('\n trainer has been initialized') start = datetime.now() with train: trainer.train(num_epochs, start_epoch=0) print("Training complete in: " + str(datetime.now() - start))
def main(_run, _config, world_size, rank, init_method, datadir, batch_size, num_workers, outdir_suffix, outdir, lr, wd, warmup, num_epochs, nsamples): cudnn.benchmark = True device = torch.device('cuda:0') # device is set by CUDA_VISIBLE_DEVICES torch.cuda.set_device(device) # rank 0 creates experiment observer is_master = rank == 0 # rank joins process group print('rank', rank, 'init_method', init_method) dist.init_process_group('nccl', rank=rank, world_size=world_size, init_method=init_method) # actual training stuff train = make_loader( pt.join(datadir, '') if datadir else None, batch_size, device, world_size, rank, num_workers, # this the parameter based on which augmentation is applied to the data gpu_augmentation=False, image_rng=None, nsamples=nsamples) # lr is scaled linearly to original batch size of 256 # world_batch_size = world_size * batch_size # k = world_batch_size / 256 # lr = k * lr # outdir stuff if outdir is None: outdir = pt.join(ROOT, '../exp/', outdir_suffix) model = Net(num_classes=500, batch_size=batch_size) print('\n network parameters ', len(list(model.parameters()))) model = model.to(device) model = nn.parallel.DistributedDataParallel(model, device_ids=[device]) #model = Unpacker(model) optimizer, policy = make_policy(num_epochs, model, lr, 0.9, wd) # loss for autoencoder loss = L1Loss(output_key='output', target_key='target_image').to(device) # this loss is for classifier classifier_loss = CrossEntropyLoss(output_key='probs', target_key='label').to(device) trainer = Trainer(model, optimizer, loss, classifier_loss, rank, AccuracyMetric(output_key='softmax_output', target_key='label'), policy, None, train, None, outdir, snapshot_interval=4 if is_master else None, quiet=True if not is_master else False) print('\n Number of epochs are: ', num_epochs) start = datetime.now() with train: trainer.train(num_epochs, start_epoch=0) print("Training complete in: " + str(datetime.now() - start))
# only freeze the conv layers which is the first child in network ct = 0 for child in vgg.children(): if ct < 1: for name, param in child.named_parameters(): param.requires_grad = False ct += 1 # wrap the vgg network with Normalize and Unpacker vgg = Normalize(module = vgg).to(device) vgg = vgg.to(device) vgg = nn.parallel.DistributedDataParallel(vgg, device_ids=[device]) vgg = Unpacker(vgg) # define optimizer with parameters which only require gradients optimizer, policy = make_policy(epochs, vgg, lr, momentum, wd) loss = CrossEntropyLoss(target_key = 'label').to(device) trainer = Trainer(vgg, optimizer, loss, AccuracyMetric(output_key = 'probs'), policy, None, train, val, out_dir, snapshot_interval=5 if is_master else None, quiet = True if not is_master else False) start = datetime.now() with train: with val: trainer.train(epochs, start_epoch = 0) print('Total Time taken: ', datetime.now() - start)