def __init__(self, size: int, input_size: int, context_size: int, context_map_size: int = 4, learning_rate: float = 0.01, pred_clipping: float = 0.01, weight_clipping: float = 5, bias: bool = True): if size == 1: bias = False if bias: self._neurons = [ Neuron(input_size, context_size, context_map_size, pred_clipping, weight_clipping, learning_rate) for _ in range(max(1, size - 1)) ] self._bias = np.random.uniform(slogit(pred_clipping), slogit(1 - pred_clipping)) else: self._neurons = [ Neuron(input_size, context_size, context_map_size, pred_clipping, weight_clipping, learning_rate) for _ in range(size) ] self._bias = None
def predict(self, logits, context_input, targets=None): distances = self._context_maps.dot(context_input) if distances.ndim == 1: distances = distances.reshape(-1, 1) mapped_context_binary = (distances > self._context_bias).astype(np.int) current_context_indices = np.sum(mapped_context_binary * self._boolean_converter, axis=0) current_selected_weights = self._weights[current_context_indices, :] output_logits = current_selected_weights.dot(logits) if output_logits.ndim > 1: output_logits = output_logits.diagonal() output_logits = np.clip(output_logits, slogit(self._output_clipping), slogit(1 - self._output_clipping)) if targets is not None: sigmoids = sigmoid(output_logits) update_value = self.learning_rate * (sigmoids - targets) * logits for idx, ci in enumerate(current_context_indices): self._weights[ci, :] = np.clip( self._weights[ci, :] - update_value[:, idx], -self._weight_clipping, self._weight_clipping) return output_logits
def __init__(self, size: int, input_size: int, context_size: int, context_map_size: int, num_classes: int, learning_rate: DynamicParameter, pred_clipping: float, weight_clipping: float, bias: bool = True, context_bias: bool = True): super().__init__() assert size > 0 and input_size > 0 and context_size > 0 assert context_map_size >= 2 assert num_classes >= 2 self.num_classes = num_classes if num_classes > 2 else 1 self.learning_rate = learning_rate # clipping value for predictions self.pred_clipping = pred_clipping # clipping value for weights of layer self.weight_clipping = weight_clipping if bias and size > 1: self.bias = np.random.uniform(low=slogit(self.pred_clipping), high=slogit(1 - self.pred_clipping), size=(1, 1, self.num_classes)) self.size = size - 1 else: self.bias = None self.size = size self._context_maps = np.random.normal(size=(self.num_classes, self.size, context_map_size, context_size)) if context_bias: self._context_bias = np.random.normal(size=(self.num_classes, self.size, context_map_size, 1)) self._context_maps /= np.linalg.norm(self._context_maps, axis=-1, keepdims=True) else: self._context_bias = 0.0 self._boolean_converter = np.array([[2**i] for i in range(context_map_size)]) self._weights = np.full(shape=(self.num_classes, self.size, 2**context_map_size, input_size), fill_value=1 / input_size)
def predict(self, logit, context, target=None): if self.context_map_size > 0: # project side information and determine context index distances = torch.matmul(self._context_maps, context.T) mapped_context_binary = (distances > self._context_bias).int() current_context_indices = torch.sum(mapped_context_binary * self._boolean_converter, dim=-2) else: current_context_indices = torch.zeros( self.num_classes, self.size, 1, dtype=torch.int64 ) # select all context across all neurons in layer current_selected_weights = self._weights[ torch.arange(self.num_classes).reshape(-1, 1, 1), torch.arange(self.size).reshape(1, -1, 1 ), current_context_indices, :] if logit.ndim == 2: logit = torch.unsqueeze(logit, dim=-1) output_logits = torch.clamp(torch.matmul( current_selected_weights, torch.unsqueeze(logit.T, dim=-3)).diagonal(dim1=-2, dim2=-1), min=slogit(self.pred_clipping), max=slogit(1 - self.pred_clipping)).T if target is not None: sigmoids = torch.sigmoid(output_logits) # compute update diff = sigmoids - torch.unsqueeze(target, dim=1) update_values = self.learning_rate.value * torch.unsqueeze( diff, dim=-1) * torch.unsqueeze(logit.permute(0, 2, 1), dim=1) self._weights[torch.arange(self.num_classes).reshape(-1, 1, 1), torch.arange(self.size).reshape(1, -1, 1), current_context_indices, :] = torch.clamp( current_selected_weights - update_values.permute(2, 1, 0, 3), -self.weight_clipping, self.weight_clipping) if self.bias is not None: bias_append = torch.cat([self.bias] * output_logits.shape[0], dim=0) output_logits = torch.cat([bias_append, output_logits], dim=1) return output_logits
def predict(self, input: np.ndarray, target: Optional[np.ndarray] = None, return_probs: bool = False) -> np.ndarray: """ Predict the class for the given inputs, and optionally update the weights. Args: input (np.array[B, N]): Batch of B N-dim float input vectors. target (np.array[B]): Optional batch of B target class labels (bool, or int if num_classes given) which, if given, triggers an online update if given. return_probs (bool): Whether to return the classification probability (for each one-vs-all classifier if num_classes given) instead of the class. Returns: Predicted class per input instance (bool, or int if num_classes given), or classification probabilities if return_probs set. """ if input.ndim == 1: input = np.expand_dims(input, axis=0) # Base predictions base_preds = self.base_predictor(input) base_preds = np.asarray(base_preds, dtype=float) # Context context = np.asarray(input, dtype=float) # Target if target is not None: target = label_binarize(target, classes=list(range(self.num_classes))) # Base logits base_preds = np.clip(base_preds, a_min=self.pred_clipping, a_max=(1.0 - self.pred_clipping)) logits = slogit(base_preds) if self.bias: # introduce layer bias logits[:, 0] = self.base_bias # Layers for layer in self.layers: logits = layer.predict(logit=logits, context=context, target=target) logits = np.squeeze(logits, axis=1) if self.num_classes == 2: logits = np.squeeze(logits, axis=1) if return_probs: return sigmoid(logits) elif self.num_classes == 2: return logits > 0 else: return np.argmax(logits, axis=1)
def predict(self, logit, context, target=None): distances = np.matmul(self._context_maps, context.T) mapped_context_binary = (distances > self._context_bias).astype(np.int) current_context_indices = np.sum(mapped_context_binary * self._boolean_converter, axis=-2) current_selected_weights = np.take_along_axis( self._weights, indices=np.expand_dims(current_context_indices, axis=-1), axis=2) if logit.ndim == 2: logit = np.expand_dims(logit, axis=-1) output_logits = np.clip( np.matmul(current_selected_weights, np.expand_dims(logit.T, axis=-3)).diagonal(axis1=-2, axis2=-1), slogit(self.pred_clipping), slogit(1 - self.pred_clipping)).T if target is not None: sigmoids = sigmoid(output_logits) diff = sigmoids - np.expand_dims(target, axis=1) updates = self.learning_rate.value * np.expand_dims( diff, axis=-1) * np.expand_dims(np.swapaxes(logit, -1, -2), axis=1) np.add.at( self._weights, (np.arange(self.num_classes).reshape( -1, 1, 1, 1), np.arange(self.size).reshape(1, -1, 1, 1), np.expand_dims(current_context_indices, axis=-1)), -np.expand_dims(np.transpose(updates, np.array([2, 1, 0, 3])), axis=-2)) self._weights = np.clip(self._weights, -self.weight_clipping, self.weight_clipping) if self.bias is not None: output_logits = np.concatenate([ np.vstack([self.bias] * output_logits.shape[0]), output_logits ], axis=1) return output_logits
def __init__(self, layer_sizes: Sequence[int], input_size: int, num_classes: int = 2, context_map_size: int = 4, bias: bool = True, context_bias: bool = False, base_predictor: Optional[ Callable[[np.ndarray], np.ndarray]] = None, learning_rate: Union[float, DynamicParameter] = 1e-3, pred_clipping: float = 1e-3, weight_clipping: float = 5.0): nn.Module.__init__(self) GLNBase.__init__(self, layer_sizes, input_size, num_classes, context_map_size, bias, context_bias, base_predictor, learning_rate, pred_clipping, weight_clipping) # Initialize layers self.layers = nn.ModuleList() previous_size = self.base_pred_size if isinstance(learning_rate, float): self.learning_rate = ConstantParameter(learning_rate, 'learning_rate') elif isinstance(learning_rate, DynamicParameter): self.learning_rate = learning_rate else: raise ValueError('Invalid learning rate') if bias: self.base_bias = np.random.uniform(low=slogit(pred_clipping), high=slogit(1 - pred_clipping)) for size in (self.layer_sizes + (1,)): layer = Linear(size, previous_size, self.input_size, self.context_map_size, self.num_classes, self.learning_rate, self.pred_clipping, self.weight_clipping, self.bias, self.context_bias) self.layers.append(layer) previous_size = size if torch.cuda.is_available(): self.cuda()
def __init__(self, layer_sizes: Sequence[int], input_size: int, context_map_size: int = 4, num_classes: int = 2, base_predictor: Optional[ Callable[[np.ndarray], np.ndarray]] = None, learning_rate: Union[DynamicParameter, float] = 1e-4, pred_clipping: float = 1e-3, weight_clipping: float = 5.0, bias: bool = True, context_bias: bool = True): super().__init__(layer_sizes, input_size, context_map_size, num_classes, base_predictor, learning_rate, pred_clipping, weight_clipping, bias, context_bias) # Initialize layers self.layers = list() previous_size = self.base_pred_size if bias: self.base_bias = np.random.uniform(low=slogit(pred_clipping), high=slogit(1 - pred_clipping)) if isinstance(learning_rate, float): self.learning_rate = ConstantParameter(learning_rate, 'learning_rate') elif isinstance(learning_rate, DynamicParameter): self.learning_rate = learning_rate else: raise ValueError('Invalid learning rate') for size in self.layer_sizes: layer = Linear(size, previous_size, self.input_size, self.context_map_size, self.num_classes, self.learning_rate, self.pred_clipping, self.weight_clipping, self.bias, self.context_bias) self.layers.append(layer) previous_size = size
def __init__(self, size: int, input_size: int, context_size: int, context_map_size: int, num_classes: int, learning_rate: DynamicParameter, pred_clipping: float, weight_clipping: float, bias: bool, context_bias: bool): super().__init__() assert size > 0 and input_size > 0 and context_size > 0 assert context_map_size >= 0 assert num_classes >= 2 self.context_map_size = context_map_size self.num_classes = num_classes if num_classes > 2 else 1 self.learning_rate = learning_rate # clipping value for predictions self.pred_clipping = pred_clipping # clipping value for weights of layer self.weight_clipping = weight_clipping if bias and size > 1: self.bias = torch.empty( (1, 1, self.num_classes)).uniform_(slogit(self.pred_clipping), slogit(1 - self.pred_clipping)) self.size = size - 1 else: self.bias = None self.size = size if context_map_size > 0: self._context_maps = torch.as_tensor( np.random.normal(size=(self.num_classes, self.size, context_map_size, context_size)), dtype=torch.float32) # constant values for halfspace gating if context_map_size == 0: pass elif context_bias: context_bias_shape = (self.num_classes, self.size, context_map_size, 1) self._context_bias = torch.tensor( np.random.normal(size=context_bias_shape), dtype=torch.float32) self._context_maps /= torch.norm(self._context_maps, dim=-1, keepdim=True) else: self._context_bias = torch.tensor(0.0) self.bias = nn.Parameter(self.bias, requires_grad=False) if context_map_size > 0: self._context_maps = nn.Parameter(self._context_maps, requires_grad=False) self._context_bias = nn.Parameter(self._context_bias, requires_grad=False) # array to convert mapped_context_binary context to index self._boolean_converter = nn.Parameter(torch.as_tensor( np.array([[2**i] for i in range(context_map_size)])), requires_grad=False) # weights for the whole layer weights_shape = (self.num_classes, self.size, 2**context_map_size, input_size) self._weights = nn.Parameter(torch.full(size=weights_shape, fill_value=1 / input_size, dtype=torch.float32), requires_grad=False)