Beispiel #1
0
    def compute_output_projection_per_sequence(
        self,
        # [flock_size, n_frequent_seqs, seq_length]
        frequent_seqs: torch.Tensor,
        outputs: torch.Tensor,
    ):
        """Compute output projection for multiple sequences

        Returns:
            Tensor [flock_size, n_frequent_seqs, n_cluster_centers]
        """
        # Convert each cluster center id to a set of one-hot vectors corresponding to the cluster in
        # the cluster center space.
        # [flock_size, n_frequent_seqs, seq_length, n_cluster_centers]
        frequent_seqs_unrolled = safe_id_to_one_hot(frequent_seqs,
                                                    self.n_cluster_centers,
                                                    self._float_dtype)

        # Scale the one hot vectors and sum the individual parts of a cluster probability vector.
        # [flock_size, n_frequent_seqs, n_cluster_centers]
        torch.sum(frequent_seqs_unrolled * self._output_prob_scaling,
                  dim=2,
                  out=outputs)

        # Normalize.
        normalize_probs_(outputs, 2)
    def _compute_predicted_clusters(self,
                                    frequent_seqs: torch.Tensor,
                                    seq_likelihoods: torch.Tensor,
                                    predicted_clusters: torch.Tensor,
                                    cluster_rewards: torch.Tensor = None):
        """Calculate the past, current and predicted cluster probabilities based on sequence likelihoods."""
        # Convert each cluster center id to a set of one-hot vectors corresponding to the cluster in
        # the cluster center space.
        # [flock_size, n_frequent_seqs, seq_length, n_cluster_centers]
        frequent_seqs_unrolled = safe_id_to_one_hot(frequent_seqs,
                                                    self.n_cluster_centers,
                                                    self._float_dtype)

        # Expand sequence likelihoods so we can multiply the unrolled freq_seq with it
        seq_likelihoods_expanded = seq_likelihoods.view(
            self._flock_size, self.n_frequent_seqs, 1, 1)
        seq_likelihoods_expanded = seq_likelihoods_expanded.expand(
            self._flock_size, self.n_frequent_seqs, self.seq_length, 1)

        # Obtain likelihoods of clusters by multiplying by _seq_likelihoods and summing
        cluster_likelihoods = (frequent_seqs_unrolled *
                               seq_likelihoods_expanded).sum(1)
        if cluster_rewards is not None:
            cluster_rewards.copy_(cluster_likelihoods[:,
                                                      self.seq_lookbehind, :])

        # Normalise.
        normalize_probs_(cluster_likelihoods, 2)

        # Now copy the normalised likelihoods into the predicted_clusters
        predicted_clusters.copy_(cluster_likelihoods)
Beispiel #3
0
    def inverse_projection(self,
                           data: torch.Tensor,
                           n_top_sequences: int = 1) -> torch.Tensor:
        """Calculates the inverse projection for the given output tensor.

        Output projection is computed for all frequent_seq, top n_top_sequences best matching are aggregated and
        projected to SP input space.

        Args:
            data: Tensor matching the shape of projection_output (flock_size, n_cluster_centers).
            n_top_sequences: Number of top sequences to aggregate
        """
        if data.shape != self.projection_outputs.shape:
            raise IllegalArgumentException(
                f"The provided tensor {list(data.shape)} doesn't match "
                f"the shape of projection_outputs {list(self.projection_outputs.shape)}"
            )

        # Compute output projections for each sequence from frequent_seqs
        # [flock_size, n_cluster_centers]
        projection_outputs = torch.empty_like(self.projection_outputs)
        tp_output_projection = TPOutputProjection(
            self.flock_size, self.n_frequent_seqs, self.n_cluster_centers,
            self.seq_length, self.seq_lookahead, self._device)
        tp_output_projection.compute_output_projection_per_sequence(
            self.frequent_seqs, projection_outputs)

        # Compute similarities with input data
        # [flock_size, n_frequent_seqs]
        similarities = tp_output_projection.compute_similarity(
            data, projection_outputs)

        # Scale similarities by seq likelihood
        similarities.mul_(
            self.frequent_seq_likelihoods_priors_clusters_context)

        # Take just top n_top_sequences best matching sequences
        # [flock_size, n_top_sequences]
        sorted_idxs = similarities.sort(dim=1,
                                        descending=True)[1][:,
                                                            0:n_top_sequences]
        # [flock_size, n_top_sequences, seq_length]
        indices = sorted_idxs.unsqueeze(-1).expand(
            (self.flock_size, n_top_sequences, self.seq_length))
        # [flock_size, n_top_sequences, seq_length]
        matched_sequences = torch.gather(self.frequent_seqs, 1, indices)

        # Convert sequences to SP output space - one_hot representation
        # [flock_size, n_top_sequences * seq_length, n_cluster_centers]
        one_hots_per_flock = safe_id_to_one_hot(
            matched_sequences.view((self.flock_size, -1)),
            self.n_cluster_centers)

        # Final aggregation of sequences - just sum and normalize
        # [flock_size, n_cluster_centers]
        summed = one_hots_per_flock.sum(dim=1)
        normalize_probs_(summed, dim=1)
        return summed
    def _compute_seq_probs_without_priors(
            seq_likelihoods_priors_clusters_context: torch.Tensor,
            frequent_seq_occurrences: torch.Tensor,
            seq_probs_clusters_context: torch.Tensor):
        # Compute the sequence probabilities without the priors
        torch.div(input=seq_likelihoods_priors_clusters_context,
                  other=frequent_seq_occurrences,
                  out=seq_probs_clusters_context)

        # ... and get rid of any nans (invalid freq_seqs have 0 occurrences)
        seq_probs_clusters_context.masked_fill_(
            torch.isnan(seq_probs_clusters_context), 0)

        # normalize to get probabilities
        normalize_probs_(seq_probs_clusters_context, dim=1)
 def _compute_best_matching_context(
         self, cluster_data: torch.Tensor,
         predicted_clusters_by_context: torch.Tensor,
         best_matching_context: torch.Tensor):
     """
     Args:
         cluster_data: ['flock_size', 'n_cluster_centers']
         predicted_clusters_by_context: ['flock_size', 'n_providers', 'context_size', 'n_cluster_centers']
         best_matching_context: ['flock_size', 'n_providers', 'context_size']
     """
     input_data_expanded = multi_unsqueeze(cluster_data, [1, 2]).expand(
         predicted_clusters_by_context.shape)
     multiplied = input_data_expanded * predicted_clusters_by_context
     summed = multiplied.sum(dim=3)
     normalize_probs_(summed, dim=2, add_constant=True)
     best_matching_context.copy_(summed)
Beispiel #6
0
    def compute_output_projection(
        self,
        # [flock_size, n_frequent_seqs, seq_length]
        frequent_seqs: torch.Tensor,
        seq_likelihoods: torch.Tensor,
        outputs: torch.Tensor,
    ):
        """Transfer the sequence likelihoods into the cluster space with more weight on clusters near in time.

        Compute output projection for each sequence and aggregate (sum and normalize) all sequences per flock.

        Returns:
            Tensor [flock_size, n_cluster_centers]

        """

        # Convert each cluster center id to a set of one-hot vectors corresponding to the cluster in
        # the cluster center space.
        # [flock_size, n_frequent_seqs, seq_length, n_cluster_centers]
        frequent_seqs_unrolled = safe_id_to_one_hot(frequent_seqs,
                                                    self.n_cluster_centers,
                                                    self._float_dtype)

        # Scale the one hot vectors and sum the individual parts of a cluster probability vector.
        # [flock_size, n_frequent_seqs, n_cluster_centers]
        torch.sum(frequent_seqs_unrolled * self._output_prob_scaling,
                  dim=2,
                  out=self.frequent_seqs_scaled)

        seq_likelihoods_expanded = seq_likelihoods.unsqueeze(-1).expand(
            self._flock_size, self.n_frequent_seqs, self.n_cluster_centers)

        # Scale the individual sequence cluster probabilities by their likelihoods.
        # [flock_size, n_frequent_seqs, n_cluster_centers]
        self.frequent_seqs_scaled *= seq_likelihoods_expanded

        # Sum the cluster probabilities across all sequences in each flock.
        # [flock_size, n_cluster_centers]
        torch.sum(self.frequent_seqs_scaled, dim=1, out=outputs)

        # Normalize.
        normalize_probs_(outputs, 1)
Beispiel #7
0
def test_forward_passive():
    device = 'cuda'
    float_dtype = get_float(device)
    flock_size = 2
    buffer_size = 6
    context_size = 2
    n_cluster_centers = 3
    n_frequent_seqs = 3
    n_providers = 1

    buffer = create_tp_buffer(flock_size=flock_size,
                              buffer_size=buffer_size,
                              n_cluster_centers=n_cluster_centers,
                              n_frequent_seqs=n_frequent_seqs,
                              context_size=context_size,
                              n_providers=n_providers,
                              device=device)

    # region setup tensors

    nan = FLOAT_NAN
    small_const = SMALL_CONSTANT

    buffer.clusters.stored_data = torch.tensor(
        [
            [
                [0, 1, 0],
                [1, 0, 0],  # current_ptr
                [0, 1, 0],
                [1, 0, 0],
                [0, 0, 1],
                [1, 0, 0]
            ],
            [
                [0, 1, 0],
                [0, 0, 1],
                [1, 0, 0],
                [0, 0, 1],  # current_ptr
                [nan, nan, nan],
                [nan, nan, nan]
            ]
        ],
        dtype=float_dtype,
        device=device)
    normalize_probs_(buffer.clusters.stored_data, dim=2, add_constant=True)

    buffer.contexts.stored_data = torch.tensor(
        [
            [
                [[1, 1]],
                [[0.1, 0.1]],  # current_ptr
                [[0, 1]],
                [[1, 0.5]],
                [[1, 0]],
                [[0, 0]]
            ],
            [
                [[1, 1]],
                [[1, 1]],
                [[0.9, 0]],
                [[1, 0]],  # current_ptr
                [[nan, nan]],
                [[nan, nan]]
            ]
        ],
        dtype=float_dtype,
        device=device)
    move_probs_towards_50_(buffer.contexts.stored_data)

    # 2 is used just for checking that nothing else changed.
    buffer.seq_probs.stored_data.fill_(2)
    buffer.outputs.stored_data.fill_(2)

    buffer.current_ptr = torch.tensor([1, 3], dtype=torch.int64, device=device)
    buffer.total_data_written = torch.tensor([8, 3],
                                             dtype=torch.int64,
                                             device=device)

    frequent_context_likelihoods = torch.tensor(
        [[[[4.5, 2.5], [1, 2.5], [3, 2.5]], [[4, 2], [0.1, 2], [3, 2]],
          [[0, 0], [0, 0], [0, 0]]],
         [[[0, 1.5], [1, 1.5], [2.9, 1.5]], [[0, 0], [0, 0], [0, 0]],
          [[0, 0], [0, 0], [0, 0]]]],
        dtype=float_dtype,
        device=device)
    add_small_constant_(frequent_context_likelihoods, small_const)

    cluster_data = torch.tensor([[0, 0, 1], [0.5, 0.5, 0]],
                                dtype=float_dtype,
                                device=device)

    # sequences:
    # [1, 0, 2],
    # [0, 2, 0 or 1]

    context_data = torch.tensor(
        [[[[0, 1], [0, 0], [0, 0]]], [[[1, 0.5], [0, 0], [0, 0]]]],
        dtype=float_dtype,
        device=device)

    # contexts:
    # [[1.0, 1.0], [0.1, 0.1], [0.0, 1.0]],
    # [[0.9, 0.0], [1.0, 0.0], [1.0, 0.5]]

    # Pre-fill the output tensors so that we can check that they were written into
    projection_output = torch.full((flock_size, n_cluster_centers),
                                   fill_value=-2,
                                   dtype=float_dtype,
                                   device=device)
    action_output = torch.full((flock_size, n_cluster_centers),
                               fill_value=-2,
                               dtype=float_dtype,
                               device=device)

    # endregion

    process = create_tp_flock_untrained_forward_process(
        flock_size=flock_size,
        n_frequent_seqs=n_frequent_seqs,
        n_cluster_centers=n_cluster_centers,
        context_size=context_size,
        buffer=buffer,
        cluster_data=cluster_data,
        context_data=context_data,
        projection_outputs=projection_output,
        action_outputs=action_output,
        do_subflocking=True,
        device=device)

    process.run_and_integrate()

    # temporary process expected values

    # region expected_values
    expected_buffer_current_ptr = torch.tensor([2, 4],
                                               dtype=torch.int64,
                                               device=device)
    expected_buffer_total_data_written = torch.tensor([9, 4],
                                                      dtype=torch.int64,
                                                      device=device)

    expected_buffer_clusters = torch.tensor(
        [
            [
                [0, 1, 0],
                [1, 0, 0],
                [0, 0, 1],  # current_ptr
                [1, 0, 0],
                [0, 0, 1],
                [1, 0, 0]
            ],
            [
                [0, 1, 0],
                [0, 0, 1],
                [1, 0, 0],
                [0, 0, 1],
                [0.5, 0.5, 0],  # current_ptr
                [nan, nan, nan]
            ]
        ],
        dtype=float_dtype,
        device=device)
    normalize_probs_(expected_buffer_clusters, dim=2, add_constant=True)

    expected_buffer_contexts = torch.tensor(
        [
            [
                [[1, 1]],
                [[0.1, 0.1]],
                [[0, 1]],  # current_ptr
                [[1, 0.5]],
                [[1, 0]],
                [[0, 0]]
            ],
            [
                [[1, 1]],
                [[1, 1]],
                [[0.9, 0]],
                [[1, 0]],
                [[1, 0.5]],  # current_ptr
                [[nan, nan]]
            ]
        ],
        dtype=float_dtype,
        device=device)
    move_probs_towards_50_(expected_buffer_contexts)

    # There are 3 frequent sequences, so current pointer says that the first one is 100% probable.
    expected_buffer_seq_probs = torch.tensor(
        [
            [
                [2, 2, 2],
                [2, 2, 2],
                [0, 0, 0],  # current_ptr
                [2, 2, 2],
                [2, 2, 2],
                [2, 2, 2]
            ],
            [
                [2, 2, 2],
                [2, 2, 2],
                [2, 2, 2],
                [2, 2, 2],
                [0, 0, 0],  # current_ptr
                [2, 2, 2]
            ]
        ],
        dtype=float_dtype,
        device=device)

    fill_value = (1.0 / n_cluster_centers)
    # There are 3 cluster centers.
    expected_buffer_outputs = torch.tensor(
        [
            [
                [2, 2, 2],
                [2, 2, 2],
                [fill_value, fill_value, fill_value],  # current_ptr
                [2, 2, 2],
                [2, 2, 2],
                [2, 2, 2]
            ],
            [
                [2, 2, 2],
                [2, 2, 2],
                [2, 2, 2],
                [2, 2, 2],
                [fill_value, fill_value, fill_value],  # current_ptr
                [2, 2, 2]
            ]
        ],
        dtype=float_dtype,
        device=device)

    expected_projection_output = torch.full((2, 3),
                                            fill_value=fill_value,
                                            dtype=float_dtype,
                                            device=device)
    expected_action_output = torch.full((2, 3),
                                        fill_value=fill_value,
                                        dtype=float_dtype,
                                        device=device)

    # endregion

    assert same(expected_projection_output, projection_output, eps=1e-4)
    assert same(expected_action_output, action_output, eps=1e-4)
    # test also storing into buffer
    assert same(expected_buffer_current_ptr, buffer.current_ptr)
    assert same(expected_buffer_total_data_written, buffer.total_data_written)
    assert same(expected_buffer_outputs, buffer.outputs.stored_data, eps=1e-4)
    assert same(expected_buffer_seq_probs,
                buffer.seq_probs.stored_data,
                eps=1e-4)
    assert same(expected_buffer_clusters,
                buffer.clusters.stored_data,
                eps=1e-4)
    assert same(expected_buffer_contexts,
                buffer.contexts.stored_data,
                eps=1e-4)