Example #1
0
    def __init__(self,
                 size: int,
                 input_size: int,
                 context_size: int,
                 context_map_size: int = 4,
                 learning_rate: float = 0.01,
                 pred_clipping: float = 0.01,
                 weight_clipping: float = 5,
                 bias: bool = True):

        if size == 1:
            bias = False

        if bias:
            self._neurons = [
                Neuron(input_size, context_size, context_map_size,
                       pred_clipping, weight_clipping, learning_rate)
                for _ in range(max(1, size - 1))
            ]
            self._bias = np.random.uniform(slogit(pred_clipping),
                                           slogit(1 - pred_clipping))
        else:
            self._neurons = [
                Neuron(input_size, context_size, context_map_size,
                       pred_clipping, weight_clipping, learning_rate)
                for _ in range(size)
            ]
            self._bias = None
Example #2
0
    def predict(self, logits, context_input, targets=None):
        distances = self._context_maps.dot(context_input)
        if distances.ndim == 1:
            distances = distances.reshape(-1, 1)

        mapped_context_binary = (distances > self._context_bias).astype(np.int)
        current_context_indices = np.sum(mapped_context_binary *
                                         self._boolean_converter,
                                         axis=0)
        current_selected_weights = self._weights[current_context_indices, :]

        output_logits = current_selected_weights.dot(logits)
        if output_logits.ndim > 1:
            output_logits = output_logits.diagonal()

        output_logits = np.clip(output_logits, slogit(self._output_clipping),
                                slogit(1 - self._output_clipping))

        if targets is not None:
            sigmoids = sigmoid(output_logits)
            update_value = self.learning_rate * (sigmoids - targets) * logits

            for idx, ci in enumerate(current_context_indices):
                self._weights[ci, :] = np.clip(
                    self._weights[ci, :] - update_value[:, idx],
                    -self._weight_clipping, self._weight_clipping)

        return output_logits
Example #3
0
    def __init__(self,
                 size: int,
                 input_size: int,
                 context_size: int,
                 context_map_size: int,
                 num_classes: int,
                 learning_rate: DynamicParameter,
                 pred_clipping: float,
                 weight_clipping: float,
                 bias: bool = True,
                 context_bias: bool = True):
        super().__init__()

        assert size > 0 and input_size > 0 and context_size > 0
        assert context_map_size >= 2
        assert num_classes >= 2

        self.num_classes = num_classes if num_classes > 2 else 1
        self.learning_rate = learning_rate
        # clipping value for predictions
        self.pred_clipping = pred_clipping
        # clipping value for weights of layer
        self.weight_clipping = weight_clipping

        if bias and size > 1:
            self.bias = np.random.uniform(low=slogit(self.pred_clipping),
                                          high=slogit(1 - self.pred_clipping),
                                          size=(1, 1, self.num_classes))
            self.size = size - 1
        else:
            self.bias = None
            self.size = size

        self._context_maps = np.random.normal(size=(self.num_classes,
                                                    self.size,
                                                    context_map_size,
                                                    context_size))
        if context_bias:
            self._context_bias = np.random.normal(size=(self.num_classes,
                                                        self.size,
                                                        context_map_size, 1))
            self._context_maps /= np.linalg.norm(self._context_maps,
                                                 axis=-1,
                                                 keepdims=True)
        else:
            self._context_bias = 0.0
        self._boolean_converter = np.array([[2**i]
                                            for i in range(context_map_size)])
        self._weights = np.full(shape=(self.num_classes, self.size,
                                       2**context_map_size, input_size),
                                fill_value=1 / input_size)
Example #4
0
    def predict(self, logit, context, target=None):
        if self.context_map_size > 0:
            # project side information and determine context index
            distances = torch.matmul(self._context_maps, context.T)
            mapped_context_binary = (distances > self._context_bias).int()
            current_context_indices = torch.sum(mapped_context_binary *
                                                self._boolean_converter,
                                                dim=-2)
        else:
            current_context_indices = torch.zeros(
                self.num_classes, self.size, 1, dtype=torch.int64
            )

        # select all context across all neurons in layer
        current_selected_weights = self._weights[
            torch.arange(self.num_classes).reshape(-1, 1, 1),
            torch.arange(self.size).reshape(1, -1, 1
                                            ), current_context_indices, :]

        if logit.ndim == 2:
            logit = torch.unsqueeze(logit, dim=-1)

        output_logits = torch.clamp(torch.matmul(
            current_selected_weights,
            torch.unsqueeze(logit.T, dim=-3)).diagonal(dim1=-2, dim2=-1),
                                    min=slogit(self.pred_clipping),
                                    max=slogit(1 - self.pred_clipping)).T

        if target is not None:
            sigmoids = torch.sigmoid(output_logits)
            # compute update
            diff = sigmoids - torch.unsqueeze(target, dim=1)
            update_values = self.learning_rate.value * torch.unsqueeze(
                diff, dim=-1) * torch.unsqueeze(logit.permute(0, 2, 1), dim=1)
            self._weights[torch.arange(self.num_classes).reshape(-1, 1, 1),
                          torch.arange(self.size).reshape(1, -1, 1),
                          current_context_indices, :] = torch.clamp(
                              current_selected_weights -
                              update_values.permute(2, 1, 0, 3),
                              -self.weight_clipping, self.weight_clipping)

        if self.bias is not None:
            bias_append = torch.cat([self.bias] * output_logits.shape[0],
                                    dim=0)
            output_logits = torch.cat([bias_append, output_logits], dim=1)

        return output_logits
Example #5
0
    def predict(self,
                input: np.ndarray,
                target: Optional[np.ndarray] = None,
                return_probs: bool = False) -> np.ndarray:
        """
        Predict the class for the given inputs, and optionally update the weights.

        Args:
            input (np.array[B, N]): Batch of B N-dim float input vectors.
            target (np.array[B]): Optional batch of B target class labels (bool, or int if
                num_classes given) which, if given, triggers an online update if given.
            return_probs (bool): Whether to return the classification probability (for each
                one-vs-all classifier if num_classes given) instead of the class.

        Returns:
            Predicted class per input instance (bool, or int if num_classes given),
            or classification probabilities if return_probs set.
        """
        if input.ndim == 1:
            input = np.expand_dims(input, axis=0)

        # Base predictions
        base_preds = self.base_predictor(input)
        base_preds = np.asarray(base_preds, dtype=float)

        # Context
        context = np.asarray(input, dtype=float)

        # Target
        if target is not None:
            target = label_binarize(target,
                                    classes=list(range(self.num_classes)))

        # Base logits
        base_preds = np.clip(base_preds,
                             a_min=self.pred_clipping,
                             a_max=(1.0 - self.pred_clipping))
        logits = slogit(base_preds)
        if self.bias:
            # introduce layer bias
            logits[:, 0] = self.base_bias

        # Layers
        for layer in self.layers:
            logits = layer.predict(logit=logits,
                                   context=context,
                                   target=target)

        logits = np.squeeze(logits, axis=1)
        if self.num_classes == 2:
            logits = np.squeeze(logits, axis=1)

        if return_probs:
            return sigmoid(logits)
        elif self.num_classes == 2:
            return logits > 0
        else:
            return np.argmax(logits, axis=1)
Example #6
0
    def predict(self, logit, context, target=None):
        distances = np.matmul(self._context_maps, context.T)
        mapped_context_binary = (distances > self._context_bias).astype(np.int)
        current_context_indices = np.sum(mapped_context_binary *
                                         self._boolean_converter,
                                         axis=-2)
        current_selected_weights = np.take_along_axis(
            self._weights,
            indices=np.expand_dims(current_context_indices, axis=-1),
            axis=2)

        if logit.ndim == 2:
            logit = np.expand_dims(logit, axis=-1)

        output_logits = np.clip(
            np.matmul(current_selected_weights,
                      np.expand_dims(logit.T, axis=-3)).diagonal(axis1=-2,
                                                                 axis2=-1),
            slogit(self.pred_clipping), slogit(1 - self.pred_clipping)).T

        if target is not None:
            sigmoids = sigmoid(output_logits)
            diff = sigmoids - np.expand_dims(target, axis=1)
            updates = self.learning_rate.value * np.expand_dims(
                diff, axis=-1) * np.expand_dims(np.swapaxes(logit, -1, -2),
                                                axis=1)

            np.add.at(
                self._weights,
                (np.arange(self.num_classes).reshape(
                    -1, 1, 1, 1), np.arange(self.size).reshape(1, -1, 1, 1),
                 np.expand_dims(current_context_indices, axis=-1)),
                -np.expand_dims(np.transpose(updates, np.array([2, 1, 0, 3])),
                                axis=-2))
            self._weights = np.clip(self._weights, -self.weight_clipping,
                                    self.weight_clipping)

        if self.bias is not None:
            output_logits = np.concatenate([
                np.vstack([self.bias] * output_logits.shape[0]), output_logits
            ],
                                           axis=1)

        return output_logits
Example #7
0
    def __init__(self,
                 layer_sizes: Sequence[int],
                 input_size: int,
                 num_classes: int = 2,
                 context_map_size: int = 4,
                 bias: bool = True,
                 context_bias: bool = False,
                 base_predictor: Optional[
                     Callable[[np.ndarray], np.ndarray]] = None,
                 learning_rate: Union[float, DynamicParameter] = 1e-3,
                 pred_clipping: float = 1e-3,
                 weight_clipping: float = 5.0):

        nn.Module.__init__(self)
        GLNBase.__init__(self, layer_sizes, input_size, num_classes,
                         context_map_size, bias, context_bias, base_predictor,
                         learning_rate, pred_clipping, weight_clipping)

        # Initialize layers
        self.layers = nn.ModuleList()
        previous_size = self.base_pred_size

        if isinstance(learning_rate, float):
            self.learning_rate = ConstantParameter(learning_rate,
                                                   'learning_rate')
        elif isinstance(learning_rate, DynamicParameter):
            self.learning_rate = learning_rate
        else:
            raise ValueError('Invalid learning rate')

        if bias:
            self.base_bias = np.random.uniform(low=slogit(pred_clipping),
                                               high=slogit(1 - pred_clipping))
        for size in (self.layer_sizes + (1,)):
            layer = Linear(size, previous_size, self.input_size,
                           self.context_map_size, self.num_classes,
                           self.learning_rate, self.pred_clipping,
                           self.weight_clipping, self.bias, self.context_bias)
            self.layers.append(layer)
            previous_size = size

        if torch.cuda.is_available():
            self.cuda()
Example #8
0
    def __init__(self,
                 layer_sizes: Sequence[int],
                 input_size: int,
                 context_map_size: int = 4,
                 num_classes: int = 2,
                 base_predictor: Optional[
                     Callable[[np.ndarray], np.ndarray]] = None,
                 learning_rate: Union[DynamicParameter, float] = 1e-4,
                 pred_clipping: float = 1e-3,
                 weight_clipping: float = 5.0,
                 bias: bool = True,
                 context_bias: bool = True):

        super().__init__(layer_sizes, input_size, context_map_size,
                         num_classes, base_predictor, learning_rate,
                         pred_clipping, weight_clipping, bias, context_bias)

        # Initialize layers
        self.layers = list()
        previous_size = self.base_pred_size
        if bias:
            self.base_bias = np.random.uniform(low=slogit(pred_clipping),
                                               high=slogit(1 - pred_clipping))

        if isinstance(learning_rate, float):
            self.learning_rate = ConstantParameter(learning_rate,
                                                   'learning_rate')
        elif isinstance(learning_rate, DynamicParameter):
            self.learning_rate = learning_rate
        else:
            raise ValueError('Invalid learning rate')

        for size in self.layer_sizes:
            layer = Linear(size, previous_size, self.input_size,
                           self.context_map_size, self.num_classes,
                           self.learning_rate, self.pred_clipping,
                           self.weight_clipping, self.bias, self.context_bias)
            self.layers.append(layer)
            previous_size = size
Example #9
0
    def __init__(self,
                 size: int,
                 input_size: int,
                 context_size: int,
                 context_map_size: int,
                 num_classes: int,
                 learning_rate: DynamicParameter,
                 pred_clipping: float,
                 weight_clipping: float,
                 bias: bool,
                 context_bias: bool):
        super().__init__()

        assert size > 0 and input_size > 0 and context_size > 0
        assert context_map_size >= 0
        assert num_classes >= 2

        self.context_map_size = context_map_size
        self.num_classes = num_classes if num_classes > 2 else 1
        self.learning_rate = learning_rate
        # clipping value for predictions
        self.pred_clipping = pred_clipping
        # clipping value for weights of layer
        self.weight_clipping = weight_clipping

        if bias and size > 1:
            self.bias = torch.empty(
                (1, 1,
                 self.num_classes)).uniform_(slogit(self.pred_clipping),
                                             slogit(1 - self.pred_clipping))
            self.size = size - 1
        else:
            self.bias = None
            self.size = size

        if context_map_size > 0:
            self._context_maps = torch.as_tensor(
                np.random.normal(size=(self.num_classes, self.size,
                                       context_map_size, context_size)),
                dtype=torch.float32)

        # constant values for halfspace gating
        if context_map_size == 0:
            pass
        elif context_bias:
            context_bias_shape = (self.num_classes, self.size,
                                  context_map_size, 1)
            self._context_bias = torch.tensor(
                np.random.normal(size=context_bias_shape), dtype=torch.float32)
            self._context_maps /= torch.norm(self._context_maps,
                                             dim=-1,
                                             keepdim=True)
        else:
            self._context_bias = torch.tensor(0.0)

        self.bias = nn.Parameter(self.bias, requires_grad=False)

        if context_map_size > 0:
            self._context_maps = nn.Parameter(self._context_maps,
                                              requires_grad=False)
            self._context_bias = nn.Parameter(self._context_bias,
                                              requires_grad=False)

            # array to convert mapped_context_binary context to index
            self._boolean_converter = nn.Parameter(torch.as_tensor(
                np.array([[2**i] for i in range(context_map_size)])),
                                                   requires_grad=False)

        # weights for the whole layer
        weights_shape = (self.num_classes, self.size, 2**context_map_size,
                         input_size)
        self._weights = nn.Parameter(torch.full(size=weights_shape,
                                                fill_value=1 / input_size,
                                                dtype=torch.float32),
                                     requires_grad=False)