def main(): args = parser.parse_args() bps.init() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function args.gpu = bps.local_rank() main_worker(args.gpu, ngpus_per_node, args) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
def main(): bps.init() torch.manual_seed(1) torch.cuda.manual_seed(1) torch.cuda.set_device(bps.local_rank()) # parse arguments args = parse_args() if args is None: exit() # open session gan = UGATIT(args) # build graph gan.build_model() if args.phase == 'train': gan.train() print(" [*] Training finished!") if args.phase == 'test': gan.test() print(" [*] Test finished!")
default=1000, help='number of benchmark iterations') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA') parser.add_argument('--no-wait', type=bool, default=True, help='wait for other worker request first') parser.add_argument('--gpu', type=int, default=-1, help='use a specified gpu') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() hvd.init() # Horovod: pin GPU to local rank. if args.gpu >= 0: torch.cuda.set_device(args.gpu) else: torch.cuda.set_device(hvd.local_rank()) cudnn.benchmark = True def log(s, nl=True): if hvd.rank() != 0: return print(s, end='\n' if nl else '')
help='number of warmup epochs') parser.add_argument('--momentum', type=float, default=0.9, help='SGD momentum') parser.add_argument('--wd', type=float, default=0.00005, help='weight decay') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=42, help='random seed') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() allreduce_batch_size = args.batch_size * args.batches_per_allreduce bps.init() torch.manual_seed(args.seed) if args.cuda: # BytePS: pin GPU to local rank. torch.cuda.set_device(bps.local_rank()) torch.cuda.manual_seed(args.seed) cudnn.benchmark = True # If set > 0, will resume training from a given checkpoint. resume_from_epoch = 0 for try_epoch in range(args.epochs, 0, -1): if os.path.exists(args.checkpoint_format.format(epoch=try_epoch)): resume_from_epoch = try_epoch break