def diffusion(X, y, w_opt, loss, maxite=2000, alpha=1e-1, **kwargs): if loss == 'logistic_regression': rho = kwargs.get('rho', 1e-1) elif loss == 'linear_regression': rho = 0 else: raise NotImplementedError( 'Task not supported. This example only supports' + ' linear_regression and logistic_regression') topology = bf.load_topology() self_weight, neighbor_weights = topology_util.GetRecvWeights( topology, bf.rank()) w = torch.zeros(n, 1, dtype=torch.double, requires_grad=True) phi = w.clone() mse = [] for i in range(maxite): # calculate loccal gradient via pytorch autograd loss_step(X, y, w, tensor_name='neighbor.allreduce.local_variable', loss=loss, rho=rho) # diffusion with torch.no_grad(): phi = w - alpha * w.grad.data w.data = bf.neighbor_allreduce(phi, self_weight=self_weight, src_weights=neighbor_weights, name='local variable') w.grad.data.zero_() # record convergence if bf.rank() == 0: mse.append(torch.norm(w.data - w_opt.data, p=2)) return w, mse
def exact_diffusion(X, y, w_opt, loss, maxite=2000, alpha=1e-1, use_Abar=True, **kwargs): if loss == 'logistic_regression': rho = kwargs.get('rho', 1e-1) elif loss == 'linear_regression': rho = 0 else: raise NotImplementedError( 'Task not supported. This example only supports' + ' linear_regression and logistic_regression') topology = bf.load_topology() self_weight, neighbor_weights = topology_util.GetRecvWeights( topology, bf.rank()) if bf.rank() == 0: print('self weights with A: {}\n'.format(self_weight)) print('neighbor weights with A:\n') for k, v in neighbor_weights.items(): print(k, v) w = torch.zeros(n, 1, dtype=torch.double, requires_grad=True) phi, psi, psi_prev = w.clone(), w.clone(), w.clone() mse = [] # construct A_bar if use_Abar: self_weight = (self_weight + 1) / 2 for k, v in neighbor_weights.items(): neighbor_weights[k] = v / 2 for i in range(maxite): # calculate loccal gradient via pytorch autograd loss_step(X, y, w, tensor_name='neighbor.allreduce.local_variable', loss=loss, rho=rho) # exact diffusion psi = w - alpha * w.grad.data phi = psi + w.data - psi_prev w.data = bf.neighbor_allreduce(phi, self_weight, neighbor_weights, name='local variable') psi_prev = psi.clone() w.grad.data.zero_() # record convergence if bf.rank() == 0: mse.append(torch.norm(w.data - w_opt.data, p=2)) return w, mse
def set_topology(self, topology: Optional[networkx.DiGraph] = None, is_weighted: bool = False) -> bool: """A function that sets the virtual topology MPI used. Args: Topo: A networkx.DiGraph object to decide the topology. If not provided a default exponential graph (base 2) structure is used. is_weighted: If set to true, the win_update and neighbor_allreduce will execute the weighted average instead, where the weights are the value used in topology matrix (including self weight). Note win_get/win_put/win_accumulate do not use this weight since win_update already uses these weights. Returns: A boolean value that whether topology is set correctly or not. Example: >>> import bluefog.torch as bf >>> from bluefog.common import topology_util >>> bf.init() >>> bf.set_topology(topology_util.RingGraph(bf.size())) """ if topology is None: topology = topology_util.ExponentialGraph(size=self.size()) if self.local_rank() == 0: logger.info( "Topology is not specified. Default Exponential Two topology is used." ) if not isinstance(topology, networkx.DiGraph): raise TypeError("topology must be a networkx.DiGraph obejct.") if topology_util.IsTopologyEquivalent(topology, self._topology): if self.local_rank() == 0: logger.debug( "Topology to set is the same as old one. Skip the setting." ) return True # We remove the self-rank for any cases because MPI graph_comm do not include it. destinations = sorted( [r for r in topology.successors(self.rank()) if r != self.rank()]) sources = sorted([ r for r in topology.predecessors(self.rank()) if r != self.rank() ]) indegree = len(sources) outdegree = len(destinations) sources_type = ctypes.c_int * indegree destinations_type = ctypes.c_int * outdegree if not is_weighted: self._MPI_LIB_CTYPES.bluefog_set_topology.argtypes = ([ ctypes.c_int, ctypes.POINTER(ctypes.c_int), ctypes.c_int, ctypes.POINTER(ctypes.c_int) ]) ret = self._MPI_LIB_CTYPES.bluefog_set_topology( indegree, sources_type(*sources), outdegree, destinations_type(*destinations)) else: # Here the source_weights is a vector containing weights from source, i.e., # (in-)neighbors, converted from the neighbor_weights dictionary. self_weight, neighbor_weights = topology_util.GetRecvWeights( topology, self.rank()) source_weights = [ neighbor_weights[r] for r in sorted(neighbor_weights.keys()) ] source_weights_type = ctypes.c_float * indegree self._MPI_LIB_CTYPES.bluefog_set_topology_with_weights.argtypes = ( [ ctypes.c_int, ctypes.POINTER(ctypes.c_int), ctypes.c_int, ctypes.POINTER(ctypes.c_int), ctypes.c_float, ctypes.POINTER(ctypes.c_float) ]) ret = self._MPI_LIB_CTYPES.bluefog_set_topology_with_weights( indegree, sources_type(*sources), outdegree, destinations_type(*destinations), self_weight, source_weights_type(*source_weights)) if ret != 1: if self.local_rank() == 0: logger.error( "Cannot set topology correctly. Three common reasons caused this. \n" "1. Has Bluefog been initialized? use bf.init(). \n" "2. The win_create has been called. It is not allowed to change\n" " the topology after that. You can call win_free() to unregister\n" " all window object first, then set the topology. \n" "3. Make sure all previous MPI ops are done. It is not allowed to \n" " change the topology while there is undone MPI ops.") return False self._topology = topology self._is_topo_weighted = is_weighted return True