Exemple #1
0
def L2_approx(data):
    N = data.shape[0]
    # Approximate L2 normalization.
    L2_norm_slice = compute_L2_normalization(data) / N**2
    L2_norm_approx = np.sum(L2_norm_slice)
    # True L2 normalization.
    L2_norm_true = compute_L2_normalization(np.atleast_2d(np.mean(data, 0)))
    return L2_norm_true, L2_norm_approx
 def get_tr_kernel(self, sstats_list):
     self.N_tr = sstats_list[0].reshape((-1, self.D)).shape[0]
     # Initialise train kernel.
     tr_kernel = np.zeros((self.N_tr, self.N_tr))
     # Initialise normalization constants.
     self.Zx = np.zeros(self.N_tr)
     for ii, sstats in enumerate(sstats_list):
         self._append_data(
             *standardize(FVModel.sstats_to_features(sstats, self.gmm)))
         self.xx[ii] = power_normalize(self.xx[ii], 0.5)
         self.Zx += compute_L2_normalization(self.xx[ii])
         tr_kernel += np.dot(self.xx[ii], self.xx[ii].T)
     # Normalize kernel.
     tr_kernel /= np.sqrt(
         self.Zx[:, np.newaxis] * self.Zx[np.newaxis])
     return tr_kernel
 def get_te_kernel(self, sstats_list):
     self.N_te = sstats_list[0].reshape((-1, self.D)).shape[0]
     # Initialise train kernel.
     te_kernel = np.zeros((self.N_te, self.N_tr))
     # Initialise normalization constants.
     self.Zy = np.zeros(self.N_te)
     for ii, sstats in enumerate(sstats_list):
         yy = standardize(
             FVModel.sstats_to_features(sstats, self.gmm), self.mu[ii],
             self.sigma[ii])[0]
         yy = power_normalize(yy, 0.5)
         self.Zy += compute_L2_normalization(yy)
         te_kernel += np.dot(yy, self.xx[ii].T)
     # Normalize kernel.
     te_kernel /= np.sqrt(
         self.Zy[:, np.newaxis] * self.Zx[np.newaxis])
     return te_kernel
def exact_sliding_window(
    slice_data, clf, deltas, selector, scalers, sqrt_type='', l2_norm_type=''):

    results = []
    weights, bias = clf

    nr_descriptors_T = slice_data.nr_descriptors[:, np.newaxis]

    # Multiply by the number of descriptors.
    fisher_vectors = slice_data.fisher_vectors * nr_descriptors_T
    counts = slice_data.counts * nr_descriptors_T

    begin_frames, end_frames = slice_data.begin_frames, slice_data.end_frames
    N = fisher_vectors.shape[0]

    if selector.integral:
        fisher_vectors = integral(fisher_vectors)
        nr_descriptors_T = integral(nr_descriptors_T)

    if selector.integral and sqrt_type == 'approx':
        counts = integral(counts)

    for delta in deltas:

        # Build mask.
        mask = selector.get_mask(N, delta)
        begin_frame_idxs, end_frame_idxs = selector.get_frame_idxs(N, delta)

        # Aggregate data into bigger slices.
        agg_fisher_vectors = (
            sum_by(fisher_vectors, mask) /
            sum_by(nr_descriptors_T, mask))
        agg_fisher_vectors[np.isnan(agg_fisher_vectors)] = 0

        agg_begin_frames = begin_frames[begin_frame_idxs]
        agg_end_frames = end_frames[end_frame_idxs]

        assert len(agg_fisher_vectors) == len(agg_begin_frames) == len(agg_end_frames)

        # Normalize aggregated data.
        if scalers[0] is not None:
            agg_fisher_vectors = scalers[0].transform(agg_fisher_vectors)
        if sqrt_type == 'exact':
            agg_fisher_vectors = power_normalize(agg_fisher_vectors, 0.5)
        if sqrt_type == 'approx':
            agg_counts = (
                sum_by(counts, mask) /
                sum_by(nr_descriptors_T, mask))
            agg_fisher_vectors = approximate_signed_sqrt(
                agg_fisher_vectors, agg_counts, pi_derivatives=False)
        if scalers[1] is not None:
            agg_fisher_vectors = scalers[1].transform(agg_fisher_vectors)

        # More efficient, to apply L2 on the scores than on the FVs.
        l2_norms = (
            compute_L2_normalization(agg_fisher_vectors)
            if l2_norm_type != 'none'
            else np.ones(len(agg_fisher_vectors)))

        # Predict with the linear classifier.
        scores = (
            - np.dot(agg_fisher_vectors, weights.T).squeeze()
            / np.sqrt(l2_norms)
            + bias)

        nan_idxs = np.isnan(scores)
        results += zip(
            agg_begin_frames[~nan_idxs],
            agg_end_frames[~nan_idxs],
            scores[~nan_idxs])

    return results
Exemple #5
0
def exact_sliding_window(slice_data,
                         clf,
                         deltas,
                         selector,
                         scalers,
                         sqrt_type='',
                         l2_norm_type=''):

    results = []
    weights, bias = clf

    nr_descriptors_T = slice_data.nr_descriptors[:, np.newaxis]

    # Multiply by the number of descriptors.
    fisher_vectors = slice_data.fisher_vectors * nr_descriptors_T
    counts = slice_data.counts * nr_descriptors_T

    begin_frames, end_frames = slice_data.begin_frames, slice_data.end_frames
    N = fisher_vectors.shape[0]

    if selector.integral:
        fisher_vectors = integral(fisher_vectors)
        nr_descriptors_T = integral(nr_descriptors_T)

    if selector.integral and sqrt_type == 'approx':
        counts = integral(counts)

    for delta in deltas:

        # Build mask.
        mask = selector.get_mask(N, delta)
        begin_frame_idxs, end_frame_idxs = selector.get_frame_idxs(N, delta)

        # Aggregate data into bigger slices.
        agg_fisher_vectors = (sum_by(fisher_vectors, mask) /
                              sum_by(nr_descriptors_T, mask))
        agg_fisher_vectors[np.isnan(agg_fisher_vectors)] = 0

        agg_begin_frames = begin_frames[begin_frame_idxs]
        agg_end_frames = end_frames[end_frame_idxs]

        assert len(agg_fisher_vectors) == len(agg_begin_frames) == len(
            agg_end_frames)

        # Normalize aggregated data.
        if scalers[0] is not None:
            agg_fisher_vectors = scalers[0].transform(agg_fisher_vectors)
        if sqrt_type == 'exact':
            agg_fisher_vectors = power_normalize(agg_fisher_vectors, 0.5)
        if sqrt_type == 'approx':
            agg_counts = (sum_by(counts, mask) /
                          sum_by(nr_descriptors_T, mask))
            agg_fisher_vectors = approximate_signed_sqrt(agg_fisher_vectors,
                                                         agg_counts,
                                                         pi_derivatives=False)
        if scalers[1] is not None:
            agg_fisher_vectors = scalers[1].transform(agg_fisher_vectors)

        # More efficient, to apply L2 on the scores than on the FVs.
        l2_norms = (compute_L2_normalization(agg_fisher_vectors)
                    if l2_norm_type != 'none' else np.ones(
                        len(agg_fisher_vectors)))

        # Predict with the linear classifier.
        scores = (-np.dot(agg_fisher_vectors, weights.T).squeeze() /
                  np.sqrt(l2_norms) + bias)

        nan_idxs = np.isnan(scores)
        results += zip(agg_begin_frames[~nan_idxs], agg_end_frames[~nan_idxs],
                       scores[~nan_idxs])

    return results