def check_devices(): for i in range(device_count()): print("Found device {}:".format(i), get_device_name(i)) if device_count() == 0: print("No GPU device found") else: print("Current cuda device is", get_device_name(current_device()))
def _gpu_descriptor(self, gpu): if gpu == 'auto': if cuda.device_count() == 0: return False if self.supports_multiple_gpus(): return list(range(cuda.device_count())) return True return gpu
def machine_params(cls, mode="train", **kwargs) -> MachineParams: """Return the number of processes and gpu_ids to use with training.""" num_gpus = cuda.device_count() has_gpu = num_gpus != 0 sampler_devices = None if mode == "train": nprocesses = cls.num_train_processes() if torch.cuda.is_available( ) else 1 devices = (list(range(min(nprocesses, num_gpus))) if has_gpu else [torch.device("cpu")]) elif mode == "valid": devices = [num_gpus - 1] if has_gpu else [torch.device("cpu")] nprocesses = 2 if has_gpu else 0 else: nprocesses = 20 if has_gpu else 1 devices = (list(range(min(nprocesses, num_gpus))) if has_gpu else [torch.device("cpu")]) nprocesses = split_processes_onto_devices(nprocesses=nprocesses, ndevices=len(devices)) return MachineParams( nprocesses=nprocesses, devices=devices, sampler_devices=sampler_devices, sensor_preprocessor_graph=cls.resnet_preprocessor_graph( mode=mode) if cls.USE_RESNET_CNN else None, )
def main(): os.chdir(os.path.dirname(__file__)) args = get_arguments() constr_weight = get_constraint(args.weight_bits, 'weight') constr_activation = get_constraint(args.activation_bits, 'activation') if args.dataset == 'cifar10': network = resnet20 dataloader = dataloader_cifar else: if args.network == 'resnet18': network = resnet18 elif args.network == 'resnet50': network = resnet50 else: print('Not Support Network Type: %s' % args.network) return dataloader = dataloader_imagenet train_loader = dataloader(args.data_root, split='train', batch_size=args.batch_size) test_loader = dataloader(args.data_root, split='test', batch_size=args.batch_size) net = network(quan_first_last=args.quan_first_last, constr_activation=constr_activation, preactivation=args.preactivation) model_path = os.path.join(args.model_root, args.model_name + '.pth') name_weights_old = torch.load(model_path) name_weights_new = net.state_dict() name_weights_new.update(name_weights_old) net.load_state_dict(name_weights_new) add_lsqmodule(net, constr_weight) print(net) net = net.cuda() net = nn.DataParallel(net, device_ids=range(cuda.device_count())) quan_activation = isinstance(constr_activation, np.ndarray) postfix = '_w' if not quan_activation else '_a' new_model_name = args.prefix + args.model_name + '_lsq' + postfix cache_root = os.path.join('.', 'cache') train_loger = LogHelper(new_model_name, cache_root, quan_activation, args.resume) optimizer, lr_scheduler = get_optimizer(net=net, optimizer=args.optimizer, lr_base=args.learning_rate, weight_decay=args.weight_decay, lr_scheduler=args.lr_scheduler, total_epoch=args.total_epoch, quan_activation=quan_activation) trainer = Trainer(net=net, train_loader=train_loader, test_loader=test_loader, optimizer=optimizer, lr_scheduler=lr_scheduler, model_name=new_model_name, train_loger=train_loger) trainer(total_epoch=args.total_epoch, save_check_point=True, resume=args.resume)
def get_gpu_mem(self): if cutorch.is_available(): return sum([ cutorch.memory_cached(i) for i in range(cutorch.device_count()) ]) else: return 0
def main(config_path): path_to_config = Path(config_path) if not (path_to_config.exists()): raise ValueError('{} doesn\'t exist'.format(path_to_config)) elif path_to_config.suffix.lower( ) != '.json' or not path_to_config.is_file(): raise ValueError('{} is not .json config file'.format(path_to_config)) model_configs = load_json(path_to_config) path_to_data = Path(model_configs['path_to_data']) train_model = model_configs['train_model'] workers_num = model_configs['workers_num'] batch_size = model_configs['batch_size'] data_loaders = get_data_loaders(path_to_data, batch_size, workers_num, train_model) model = DeepLabV3Plus(model_configs['output_classes']) device = 'cpu' device_count = 0 if cuda.is_available() and model_configs['cuda_usage']: device = 'cuda' device_count = cuda.device_count() if device is not 'cpu' and device_count > 1: model = nn.DataParallel(model).cuda() elif device is not 'cpu': model = model.cuda() criterion = None metric = None optimizer = optim.SGD(model.parameters(), lr=model_configs['learning_rate'], momentum=0.9) info_paths = model_configs['info_paths'] writer = SummaryWriter(log_dir=info_paths['log_dir']) total_epochs = model_configs['epochs'] for epoch in range(total_epochs): model.train() train(model, data_loaders['train'], epoch, optimizer, criterion, metric, writer, device=device) model.val() val(model, criterion, metric, data_loaders['val'], epoch, writer, device=device)
def check_for_gpu(params) -> object: device_id = params['cuda_device'] if device_id is not None and device_id >= cuda.device_count(): raise ConfigurationError( "Experiment specified a GPU but none is available;" " if you want to run on CPU use the override" " 'trainer.cuda_device=-1' in the json config file.")
def __init__(self): self.numpy_version = numpy.__version__ self.platform_version = platform.platform() if cuda.is_available(): self.cuda_info = cuda.device_count() else: self.cuda_info = None
def __init__( self, name: str, model: Model, optimizer: Optimizer, cuda_device: int, grad_norm: Optional[float] = None, scaler: Optional[amp.GradScaler] = None, grad_clipping: Optional[float] = None, learning_rate_scheduler: Optional[LearningRateScheduler] = None, momentum_scheduler: Optional[MomentumScheduler] = None ) -> "ComponentOptimizer": self.name = name self.model = model self._optimizer = optimizer if cuda_device is None: from torch import cuda if cuda.device_count() > 0: cuda_device = 0 else: cuda_device = -1 check_for_gpu(cuda_device) self._cuda_device = int_to_device(cuda_device) self._grad_norm = grad_norm self._scaler = scaler self._grad_clipping = grad_clipping self._learning_rate_scheduler = learning_rate_scheduler self._momentum_scheduler = momentum_scheduler self._loss = {'train': ComponentLoss(), 'validation': ComponentLoss()}
def system_info(self): uname = platform.uname() gpus = [cuda.get_device_name(i) for i in range(cuda.device_count())] self.update({ 'python': platform.python_version(), 'machine': uname.machine, 'processor': uname.processor, 'os': os.name, 'os_name': platform.system(), 'os_ver': platform.release(), 'memory': str(psutil.virtual_memory().total // 2**30) + ' GB', 'storage': str(psutil.disk_usage('/').total // 2**30) + ' GB', 'user': pwd.getpwuid(os.getuid())[0], 'gpus': gpus, 'timestamp': datetime.now().strftime('%f-%S-%M-%H-%d-%m-%Y') })
def get_info(): return { "has_cuda": cuda.is_available(), "devices": [] if not cuda.is_available() else [cuda.get_device_name(i) for i in range(cuda.device_count())], }
def check_for_gpu(device_id: Union[int, List[int]]): if isinstance(device_id, list): for did in device_id: check_for_gpu(did) elif device_id is not None and device_id >= 0: num_devices_available = cuda.device_count() if num_devices_available == 0: # Torch will give a more informative exception than ours, so we want to include # that context as well if it's available. For example, if you try to run torch 1.5 # on a machine with CUDA10.1 you'll get the following: # # The NVIDIA driver on your system is too old (found version 10010). # torch_gpu_error = "" try: cuda._check_driver() except Exception as e: torch_gpu_error = "\n{0}".format(e) raise ConfigurationError( "Experiment specified a GPU but none is available;" " if you want to run on CPU use the override" " 'trainer.cuda_device=-1' in the json config file." + torch_gpu_error) elif device_id >= num_devices_available: raise ConfigurationError( f"Experiment specified GPU device {device_id}" f" but there are only {num_devices_available} devices " f" available.")
def __init__(self, module, device_ids=None, output_device=None, dim=0, allow_dict=True, allow_replication_callback=True, user_scattered=False, use_scatter_stream=True, persistent=False, copy_parameters=False, copy_buffers=True): super(DataParallel, self).__init__() if device_ids is None: device_ids = list(range(cuda.device_count())) if output_device is None: output_device = device_ids[0] self.dim = dim self.module = module self.device_ids = device_ids self.output_device = output_device if len(self.device_ids) == 1: self.module.cuda(device_ids[0]) self.allow_dict = allow_dict self.allow_replication_callback = allow_replication_callback self.user_scattered = user_scattered self.use_scatter_stream = use_scatter_stream self.persistent = persistent self.copy_parameters = copy_parameters self.copy_buffers = copy_buffers self.replicas = nn.ModuleList()
def get_num_nodes() -> int: """ Get the number of nodes. Note that this function assumes all nodes have the same number of processes. """ if not is_distributed(): return 1 else: return get_world_size() // device_count()
def control_gpu_count(train_on_gpu): if train_on_gpu: gpu_count = cuda.device_count() f.write(str(gpu_count) + " gpus detected.\n") if gpu_count > 1: multi_gpu = True else: multi_gpu = False
def _get_stream(device): """Gets a background stream for copying between CPU and GPU""" global _streams if device == -1: return None if _streams is None: _streams = [None] * cuda.device_count() if _streams[device] is None: _streams[device] = cuda.Stream(device) return _streams[device]
def wrapper( self: TorchModelBuilder[TModule, TOptimizer] ) -> Tuple[TModule, TOptimizer]: model, optimiser = construct_model_function(self) if cuda.is_available(): model.cuda() model = DataParallel(model, device_ids=range(cuda.device_count())) backends.cudnn.benchmark = True return model, optimiser
def get_min_used_gpu(): import torch.cuda as cutorch device = 0 min_used = 1e+10 for i in range(cutorch.device_count()): if min_used > torch.cuda.memory_allocated(i): min_used = torch.cuda.memory_allocated(i) device = i return device
def show_gpu_chooser(default=0, override=None): if override != None: if override.isdecimal(): idx = int(override) if cuda.is_available() and idx in range(cuda.device_count()): return "cuda:{}".format(idx) return override if not cuda.is_available(): return "cpu" gpustat.new_query().print_formatted(no_color=True) idx = input("Choose GPU (default {}):".format(default)) if idx == "cpu": return "cpu" if idx.isdecimal(): idx = int(idx) if idx in range(cuda.device_count()): return "cuda:{}".format(idx) return "cuda:{}".format(default)
def __init__(self, gpu, config, title=None): self.config = config self.title = title if gpu is None: gpu = [str(idx) for idx in range(cuda.device_count()) ] if cuda.is_available() else None self.gpu = gpu
def mode_allworkers_saveall(out_dir, mode): path = build_json(out_dir, include_workers="all", save_all=True) num_workers = 1 if bool(device_count()) is False else device_count() mode_args = list(SMDATAPARALLEL_PYTORCH_TEST_MNIST_ARGS) launch_smdataparallel_job( script_file_path=SMDATAPARALLEL_PYTORCH_TEST_MNIST_SCRIPT, script_args=mode_args, num_workers=num_workers, config_file_path=path, mode=mode, ) tr = create_trial(out_dir) assert len(tr.workers()) == num_workers assert len(tr.tensor_names()) > 25 assert len(tr.tensor( tr.tensor_names(collection="weights")[0]).workers(0)) == num_workers assert len(tr.tensor( tr.tensor_names(collection="losses")[0]).workers(0)) == num_workers
def __str__(self): s = six.StringIO() s.write('''Platform: {}\n'''.format(self.platform_version)) s.write('''NumPy: {}\n'''.format(self.numpy_version)) if self.cuda_info is None: s.write('''CUDA: Not Available\n''') else: s.write('''CUDA: {}\n'''.format(cuda.device_count())) return s.getvalue()
def get_pretrained_model(model_name): """Retrieve a pre-trained model from torchvision Params ------- model_name (str): name of the model (currently only accepts vgg16 and resnet50) Return -------- model (PyTorch model): cnn """ n_classes = 102 # Whether to train on gpu train_on_gpu = cuda.is_available() # Number of gpus if train_on_gpu: gpu_count = cuda.device_count() print(f'{gpu_count} gpus detected.') if gpu_count > 1: multi_gpu = True else: multi_gpu = False else: multi_gpu = False if model_name == 'vgg16': model = models.vgg16(pretrained=True) # Freeze early layers for param in model.parameters(): param.requires_grad = False n_inputs = model.classifier[6].in_features # Add on classifier model.classifier[6] = nn.Sequential( nn.Linear(n_inputs, 256), nn.ReLU(), nn.Dropout(0.2), nn.Linear(256, n_classes), nn.LogSoftmax(dim=1)) elif model_name == 'resnet50': model = models.resnet50(pretrained=True) for param in model.parameters(): param.requires_grad = False n_inputs = model.fc.in_features model.fc = nn.Sequential( nn.Linear(n_inputs, 256), nn.ReLU(), nn.Dropout(0.2), nn.Linear(256, n_classes), nn.LogSoftmax(dim=1)) # Move to gpu and parallelize if train_on_gpu: model = model.to('cuda') if multi_gpu: model = nn.DataParallel(model) return model
def system_info(): print(sys.version, "\n") print(f"PyTorch {torch.__version__} \n") print(f"Torch-vision {torchvision.__version__} \n") print("Available devices:") if cuda.is_available(): for i in range(cuda.device_count()): print(f"{i}: {cuda.get_device_name(i)}") else: print("CPUs only, no GPUs found")
def __get_min_used_gpu(k): import torch.cuda as cutorch device = [] min_used = 1e+10 for i in range(cutorch.device_count()): device.append(torch.cuda.memory_allocated(i)) print(device) _, top_device = torch.topk(torch.tensor(device), k, largest=False) top_device = list(top_device) return top_device
def system_info(): print(sys.version, "\n") print("PyTorch {}".format(torch.__version__), "\n") print("Torch-vision {}".format(torchvision.__version__), "\n") print("Available devices:") if cuda.is_available(): for i in range(cuda.device_count()): print("{}: {}".format(i, cuda.get_device_name(i))) else: print("CPUs")
def parallelize(model): """ Wrap pytorch model in layer to run on multiple GPUs """ import torch.cuda as cuda import torch.nn as nn device_ids = [i for i in range(cuda.device_count())] model = nn.DataParallel(model, device_ids=device_ids) return model
def get_free_gpus(bytes_needed=0): free_gpus = dict() gpu_stats = gpustat.new_query() for i in range(cuda.device_count()): bytes_free = 2**20 * (gpu_stats[i]["memory.total"] - gpu_stats[i]["memory.used"]) if bytes_free > bytes_needed: free_gpus[i] = bytes_free free_gpus = dict( sorted(free_gpus.items(), key=lambda gpu: gpu[1], reverse=True)) return list(free_gpus.keys())
def check_for_gpu(device_id: int): if device_id is not None and device_id >= 0: num_devices_available = cuda.device_count() if num_devices_available == 0: raise ConfigurationError("Experiment specified a GPU but none are available;" " if you want to run on CPU use the override" " 'trainer.cuda_device=-1' in the json config file.") elif device_id >= num_devices_available: raise ConfigurationError(f"Experiment specified GPU device {device_id}" f" but there are only {num_devices_available} devices " f" available.")
def set_gpu(self, args): # args.gpuid = "" # TODO disable cuda if args.gpuid[0] == -1: self.use_cuda = False else: torch.cuda.set_device(args.gpuid[0]) self.use_cuda = True # self.use_cuda = (len(args.gpuid) >= 1) print("{0} GPU(s) are available".format(cuda.device_count())) print("Using GPU {}".format(args.gpuid[0]))
def check_for_gpu(device_id: int): if device_id is not None and device_id >= cuda.device_count(): raise ConfigurationError("Experiment specified a GPU but none is available;" " if you want to run on CPU use the override" " 'trainer.cuda_device=-1' in the json config file.")