if args.gpu is None: use_gpu = False else: use_gpu = True os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu device_list = args.gpu.split(",") for a in range(0, len(device_list)): gpu_list.append(int(a)) os.system("clear") config = create_config(configFilePath) cuda = torch.cuda.is_available() logger.info("CUDA available: %s" % str(cuda)) if not cuda and len(gpu_list) > 0: logger.error("CUDA is not available but specific gpu id") raise NotImplementedError parameters = init_all(config, gpu_list, args.checkpoint, "train") do_test = False if args.do_test: do_test = True save_eval = False if args.save_eval: save_eval = True train(parameters, config, gpu_list, do_test, save_eval)
device_list = args.gpu.split(",") for a in range(0, len(device_list)): gpu_list.append(int(a)) os.system("clear") config.set('distributed', 'local_rank', args.local_rank) if config.getboolean("distributed", "use"): torch.cuda.set_device(gpu_list[args.local_rank]) torch.distributed.init_process_group( backend=config.get("distributed", "backend")) config.set('distributed', 'gpu_num', len(gpu_list)) cuda = torch.cuda.is_available() logger.info("CUDA available: %s" % str(cuda)) if not cuda and len(gpu_list) > 0: logger.error("CUDA is not available but specific gpu id") raise NotImplementedError parameters = init_all(config, gpu_list, args.checkpoint, "train", local_rank=args.local_rank) do_test = False if args.do_test: do_test = True print(args.comment) train(parameters, config, gpu_list, do_test, args.local_rank)
config = create_config(configFilePath) if config.getboolean("distributed", "use"): torch.distributed.init_process_group(backend=config.get("distributed", "backend")) use_gpu = True gpu_list = [] if args.gpu: use_gpu = True os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu device_list = args.gpu.split(",") for a in range(0, len(device_list)): gpu_list.append(int(a)) else: use_gpu = False os.system("clear") cuda = torch.cuda.is_available() logger.info("CUDA available: %s" % str(cuda)) if not cuda and len(gpu_list) > 0: logger.error("CUDA is not available but specific gpu id") raise NotImplementedError parameters = init_all(config, gpu_list, args.checkpoint, "train") do_test = False if args.do_test: do_test = True train(parameters, config, gpu_list, do_test)
os.system("clear") config.set('distributed', 'local_rank', args.local_rank) if config.getboolean("distributed", "use"): torch.cuda.set_device(gpu_list[args.local_rank]) torch.distributed.init_process_group( backend=config.get("distributed", "backend")) config.set('distributed', 'gpu_num', len(gpu_list)) cuda = torch.cuda.is_available() logger.info("CUDA available: %s" % str(cuda)) if not cuda and len(gpu_list) > 0: logger.error("CUDA is not available but specific gpu id") raise NotImplementedError parameters = init_all(config, gpu_list, args.checkpoint, "train", local_rank=args.local_rank) do_test = False if args.do_test: do_test = True print(args.comment) train(parameters, config, gpu_list, do_test, args.local_rank, do_eval=args.do_eval)