def pin_model_to_device(device, model): isCUDA = device == "GPU" if isCUDA: # Bluefog: pin GPU to local rank. device_id = (bf.local_rank() if bf.nccl_built() else bf.local_rank() % torch.cuda.device_count()) torch.cuda.set_device(device_id) model.cuda() return isCUDA
def hier_setup(): os.environ['BLUEFOG_NODES_PER_MACHINE'] = '2' bf.init() assert bf.size() % 2 == 0 machine_size = int(bf.size() // 2) bf.set_machine_topology(bf.ExponentialGraph(machine_size)) return bf.rank(), bf.size(), bf.local_rank(), bf.local_size()
def cast_and_place(tensor, dtype): if dtype.is_cuda: if bf.nccl_built() and bf.local_size() > torch.cuda.device_count(): raise EnvironmentError( "Cannot run number of processes in one machine more than GPU device count" " in NCCL environment") return tensor.cuda(bf.local_rank() % torch.cuda.device_count()).type(dtype) return tensor.type(dtype)
parser.add_argument("--save-plot-file", default='average_consensus_plot.png', help="Saving the plot in the file.") parser.add_argument('--seed', type=int, default=2020, help='Seed for randomness.') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() bf.init() torch.random.manual_seed(args.seed * bf.rank()) if args.cuda: device = bf.local_rank() % torch.cuda.device_count() x = torch.randn(args.data_size, device=device, dtype=torch.double) else: x = torch.randn(args.data_size, dtype=torch.double) if args.virtual_topology == "expo2": pass elif args.virtual_topology == "expo3": bf.set_topology(topology_util.ExponentialGraph(bf.size(), base=3)) elif args.virtual_topology == "expo4": bf.set_topology(topology_util.ExponentialGraph(bf.size(), base=4)) elif args.virtual_topology == "ring": bf.set_topology(topology_util.RingGraph(bf.size(), connect_style=1)) elif args.virtual_topology == "mesh": bf.set_topology(topology_util.RingGraph(bf.size(), connect_style=0), is_weighted=True)
parser.add_argument('--batch_size', type=int, default=100, help="batch size (default: 100).") parser.add_argument('--seed', type=int, default=3, help='set seed (default: 3).') parser.add_argument('--save_name', type=str, required=True, help='The file_postfix to save log') args = parser.parse_args() cudnn.benchmark = True cudnn.enabled = True torch.manual_seed(args.seed) np.random.seed(args.seed) bf.init() device_id = bf.local_rank() if bf.nccl_built() else bf.local_rank() % torch.cuda.device_count() torch.cuda.set_device(device_id) torch.cuda.manual_seed(args.seed) kwargs = {"num_workers": 4, "pin_memory": True} # load the data if args.dataset == "MNIST": train_set, test_set = MNIST_dataset_flat_dist(bf.rank()) NN_model = MNIST_two_layers elif args.dataset == "MNIST_Conv": train_set, test_set = MNIST_dataset_dist(bf.rank()) NN_model = LeNet elif args.dataset == "CIFAR10": train_set, test_set = CIFAR10_dataset_dist(bf.rank()) NN_model = vgg11
def log(s, nl=True): if bf.local_rank() != 0: return print(s, end='\n' if nl else '', flush=True)
def test_bluefog_local_rank(hier_setup): true_rank, true_size = mpi_env_rank_and_size() local_rank = bf.local_rank() assert true_rank % min(2, true_size) == local_rank
parser.add_argument('--data-size', type=int, default=2000, help='input data size') parser.add_argument('--data-dim', type=int, default=500, help='input data dimension') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() bf.init() if args.cuda: torch.cuda.set_device(bf.local_rank()) cudnn.benchmark = True def logistic_loss_step(x_, rho, X, y, tensor_name, calculate_by_hand=True): """Calculate gradient of logistic loss via pytorch autograd.""" if calculate_by_hand: # prob = torch.exp( -y * X.mm(x_.data)) prob = torch.exp(-y * torch.matmul(X, x_.data)) alpha = prob / (1 + prob) x_.grad = rho * x_.data - torch.mean(alpha * y * X, dim=0).reshape( -1, 1) return else: