def data_to_kernels(tr_data, te_data): scaler = Scaler(copy=False) scaler.fit_transform(tr_data) #tr_data, mu, sigma = standardize(tr_data) tr_data = power_normalize(tr_data, 0.5) tr_data = L2_normalize(tr_data) #te_data, _, _ = standardize(te_data, mu, sigma) scaler.transform(te_data) te_data = power_normalize(te_data, 0.5) te_data = L2_normalize(te_data) tr_kernel = np.dot(tr_data, tr_data.T) te_kernel = np.dot(te_data, tr_data.T) return tr_kernel, te_kernel
def evaluate_worker(( cls, weight, bias, tr_scalers, slice_data, video_mask, visual_word_mask, prediction_type, verbose)): if prediction_type == 'approx': slice_vw_counts = slice_data.counts * slice_data.nr_descriptors[:, np.newaxis] slice_vw_l2_norms = visual_word_l2_norm(slice_data.fisher_vectors, visual_word_mask) slice_vw_scores = visual_word_scores(slice_data.fisher_vectors, weight, bias, visual_word_mask) predictions = approximate_video_scores( slice_vw_scores, slice_vw_counts, slice_vw_l2_norms, slice_data.nr_descriptors[:, np.newaxis], video_mask) elif prediction_type == 'exact': # Aggregate slice data into video data. video_data = ( sum_by(slice_data.fisher_vectors, video_mask) / sum_by(slice_data.nr_descriptors, video_mask)[:, np.newaxis]) # Apply exact normalization on the test video data. if tr_scalers[0] is not None: video_data = tr_scalers[0].transform(video_data) video_data = power_normalize(video_data, 0.5) if tr_scalers[1] is not None: video_data = tr_scalers[1].transform(video_data) video_data = exact_l2_normalize(video_data) # Apply linear classifier. predictions = np.sum(- video_data * weight, axis=1) predictions += bias if verbose > 1: print cls, return cls, predictions
def square_root(data): if sqrt_type == 'exact': return power_normalize(data, 0.5) elif sqrt_type == 'approx': return approximate_signed_sqrt( data, tr_video_counts, pi_derivatives=False, verbose=verbose) elif sqrt_type == 'none': return data else: assert False
def square_root(data): if sqrt_type == 'exact': return power_normalize(data, 0.5) elif sqrt_type == 'approx': return approximate_signed_sqrt(data, tr_video_counts, pi_derivatives=False, verbose=verbose) elif sqrt_type == 'none': return data else: assert False
def double_normalization(filenames, sstats_in , sstats_out, N, len_sstats, gmm): """ The slices in each sample are converted to Fisher vectors, square- rooted, L2 normalized, and then aggregated together. Inputs ------ filenames: list of str The names of the files to be aggregated. Usually, this should be the entire dataset, i.e. dataset.get_data('train')[0] + dataset.get_data('test')[0]. sstats_in: SstatsMap instance The sufficient statistics that we operate on. sstats_out: SstatsMap instance The resulting sufficient statistics. N: int The number of slices that are aggregated together. If N is -1 all the slices in the clip are aggregated together. len_sstats: int The length of a typical sufficient statistics vector. gmm: yael.gmm object The Gaussian mixture model for the current sufficient statistics. """ assert len_sstats == gmm.k + 2 * gmm.k * gmm.d, ( "GMM and len_sstats don't match") for filename in filenames: if sstats_out.exists(filename): continue if not sstats_in.exists(filename): print 'Not found ' + filename continue if sstats_in.getsize(filename) == 0: print 'Not computed ' + filename continue sstats = sstats_in.read(filename).reshape((-1, len_sstats)) info = sstats_in.read_info(filename) fv = FVModel.sstats_to_features(sstats, gmm) fv = power_normalize(fv, 0.5) fv = L2_normalize(fv) agg_sstats, agg_info = _aggregate(fv, info, N) sstats_out.write(filename, agg_sstats, info=agg_info)
def double_normalization(filenames, sstats_in, sstats_out, N, len_sstats, gmm): """ The slices in each sample are converted to Fisher vectors, square- rooted, L2 normalized, and then aggregated together. Inputs ------ filenames: list of str The names of the files to be aggregated. Usually, this should be the entire dataset, i.e. dataset.get_data('train')[0] + dataset.get_data('test')[0]. sstats_in: SstatsMap instance The sufficient statistics that we operate on. sstats_out: SstatsMap instance The resulting sufficient statistics. N: int The number of slices that are aggregated together. If N is -1 all the slices in the clip are aggregated together. len_sstats: int The length of a typical sufficient statistics vector. gmm: yael.gmm object The Gaussian mixture model for the current sufficient statistics. """ assert len_sstats == gmm.k + 2 * gmm.k * gmm.d, ( "GMM and len_sstats don't match") for filename in filenames: if sstats_out.exists(filename): continue if not sstats_in.exists(filename): print 'Not found ' + filename continue if sstats_in.getsize(filename) == 0: print 'Not computed ' + filename continue sstats = sstats_in.read(filename).reshape((-1, len_sstats)) info = sstats_in.read_info(filename) fv = FVModel.sstats_to_features(sstats, gmm) fv = power_normalize(fv, 0.5) fv = L2_normalize(fv) agg_sstats, agg_info = _aggregate(fv, info, N) sstats_out.write(filename, agg_sstats, info=agg_info)
def get_tr_kernel(self, sstats_list): self.N_tr = sstats_list[0].reshape((-1, self.D)).shape[0] # Initialise train kernel. tr_kernel = np.zeros((self.N_tr, self.N_tr)) # Initialise normalization constants. self.Zx = np.zeros(self.N_tr) for ii, sstats in enumerate(sstats_list): self._append_data( *standardize(FVModel.sstats_to_features(sstats, self.gmm))) self.xx[ii] = power_normalize(self.xx[ii], 0.5) self.Zx += compute_L2_normalization(self.xx[ii]) tr_kernel += np.dot(self.xx[ii], self.xx[ii].T) # Normalize kernel. tr_kernel /= np.sqrt( self.Zx[:, np.newaxis] * self.Zx[np.newaxis]) return tr_kernel
def get_te_kernel(self, sstats_list): self.N_te = sstats_list[0].reshape((-1, self.D)).shape[0] # Initialise train kernel. te_kernel = np.zeros((self.N_te, self.N_tr)) # Initialise normalization constants. self.Zy = np.zeros(self.N_te) for ii, sstats in enumerate(sstats_list): yy = standardize( FVModel.sstats_to_features(sstats, self.gmm), self.mu[ii], self.sigma[ii])[0] yy = power_normalize(yy, 0.5) self.Zy += compute_L2_normalization(yy) te_kernel += np.dot(yy, self.xx[ii].T) # Normalize kernel. te_kernel /= np.sqrt( self.Zy[:, np.newaxis] * self.Zx[np.newaxis]) return te_kernel
def evaluate_worker((cls, weight, bias, tr_scalers, slice_data, video_mask, visual_word_mask, prediction_type, verbose)): if prediction_type == 'approx': slice_vw_counts = slice_data.counts * slice_data.nr_descriptors[:, np. newaxis] slice_vw_l2_norms = visual_word_l2_norm(slice_data.fisher_vectors, visual_word_mask) slice_vw_scores = visual_word_scores(slice_data.fisher_vectors, weight, bias, visual_word_mask) predictions = approximate_video_scores( slice_vw_scores, slice_vw_counts, slice_vw_l2_norms, slice_data.nr_descriptors[:, np.newaxis], video_mask) elif prediction_type == 'exact': # Aggregate slice data into video data. video_data = ( sum_by(slice_data.fisher_vectors, video_mask) / sum_by(slice_data.nr_descriptors, video_mask)[:, np.newaxis]) # Apply exact normalization on the test video data. if tr_scalers[0] is not None: video_data = tr_scalers[0].transform(video_data) video_data = power_normalize(video_data, 0.5) if tr_scalers[1] is not None: video_data = tr_scalers[1].transform(video_data) video_data = exact_l2_normalize(video_data) # Apply linear classifier. predictions = np.sum(-video_data * weight, axis=1) predictions += bias if verbose > 1: print cls, return cls, predictions
def exact_sliding_window( slice_data, clf, deltas, selector, scalers, sqrt_type='', l2_norm_type=''): results = [] weights, bias = clf nr_descriptors_T = slice_data.nr_descriptors[:, np.newaxis] # Multiply by the number of descriptors. fisher_vectors = slice_data.fisher_vectors * nr_descriptors_T counts = slice_data.counts * nr_descriptors_T begin_frames, end_frames = slice_data.begin_frames, slice_data.end_frames N = fisher_vectors.shape[0] if selector.integral: fisher_vectors = integral(fisher_vectors) nr_descriptors_T = integral(nr_descriptors_T) if selector.integral and sqrt_type == 'approx': counts = integral(counts) for delta in deltas: # Build mask. mask = selector.get_mask(N, delta) begin_frame_idxs, end_frame_idxs = selector.get_frame_idxs(N, delta) # Aggregate data into bigger slices. agg_fisher_vectors = ( sum_by(fisher_vectors, mask) / sum_by(nr_descriptors_T, mask)) agg_fisher_vectors[np.isnan(agg_fisher_vectors)] = 0 agg_begin_frames = begin_frames[begin_frame_idxs] agg_end_frames = end_frames[end_frame_idxs] assert len(agg_fisher_vectors) == len(agg_begin_frames) == len(agg_end_frames) # Normalize aggregated data. if scalers[0] is not None: agg_fisher_vectors = scalers[0].transform(agg_fisher_vectors) if sqrt_type == 'exact': agg_fisher_vectors = power_normalize(agg_fisher_vectors, 0.5) if sqrt_type == 'approx': agg_counts = ( sum_by(counts, mask) / sum_by(nr_descriptors_T, mask)) agg_fisher_vectors = approximate_signed_sqrt( agg_fisher_vectors, agg_counts, pi_derivatives=False) if scalers[1] is not None: agg_fisher_vectors = scalers[1].transform(agg_fisher_vectors) # More efficient, to apply L2 on the scores than on the FVs. l2_norms = ( compute_L2_normalization(agg_fisher_vectors) if l2_norm_type != 'none' else np.ones(len(agg_fisher_vectors))) # Predict with the linear classifier. scores = ( - np.dot(agg_fisher_vectors, weights.T).squeeze() / np.sqrt(l2_norms) + bias) nan_idxs = np.isnan(scores) results += zip( agg_begin_frames[~nan_idxs], agg_end_frames[~nan_idxs], scores[~nan_idxs]) return results
def load_kernels( dataset, tr_norms=['std', 'sqrt', 'L2'], te_norms=['std', 'sqrt', 'L2'], analytical_fim=False, pi_derivatives=False, sqrt_nr_descs=False, only_train=False, verbose=0, do_plot=False, outfile=None): tr_outfile = outfile % "train" if outfile is not None else outfile # Load sufficient statistics. samples, _ = dataset.get_data('train') tr_data, tr_counts, tr_labels = load_video_data( dataset, samples, outfile=tr_outfile, analytical_fim=analytical_fim, pi_derivatives=pi_derivatives, sqrt_nr_descs=sqrt_nr_descs, verbose=verbose) if verbose > 0: print "Train data: %dx%d" % tr_data.shape if do_plot: plot_fisher_vector(tr_data[0], 'before') scalers = [] for norm in tr_norms: if norm == 'std': scaler = Scaler() tr_data = scaler.fit_transform(tr_data) scalers.append(scaler) elif norm == 'sqrt': tr_data = power_normalize(tr_data, 0.5) elif norm == 'sqrt_cnt': tr_data = approximate_signed_sqrt( tr_data, tr_counts, pi_derivatives=pi_derivatives) elif norm == 'L2': tr_data = L2_normalize(tr_data) if do_plot: plot_fisher_vector(tr_data[0], 'after_%s' % norm) tr_kernel = np.dot(tr_data, tr_data.T) if only_train: return tr_kernel, tr_labels, scalers, tr_data te_outfile = outfile % "test" if outfile is not None else outfile # Load sufficient statistics. samples, _ = dataset.get_data('test') te_data, te_counts, te_labels = load_video_data( dataset, samples, outfile=te_outfile, analytical_fim=analytical_fim, pi_derivatives=pi_derivatives, sqrt_nr_descs=sqrt_nr_descs, verbose=verbose) if verbose > 0: print "Test data: %dx%d" % te_data.shape ii = 0 for norm in te_norms: if norm == 'std': te_data = scalers[ii].transform(te_data) ii += 1 elif norm == 'sqrt': te_data = power_normalize(te_data, 0.5) elif norm == 'sqrt_cnt': te_data = approximate_signed_sqrt( te_data, te_counts, pi_derivatives=pi_derivatives) elif norm == 'L2': te_data = L2_normalize(te_data) te_kernel = np.dot(te_data, tr_data.T) return tr_kernel, tr_labels, te_kernel, te_labels
def load_kernels(dataset, tr_norms=['std', 'sqrt', 'L2'], te_norms=['std', 'sqrt', 'L2'], analytical_fim=False, pi_derivatives=False, sqrt_nr_descs=False, only_train=False, verbose=0, do_plot=False, outfile=None): tr_outfile = outfile % "train" if outfile is not None else outfile # Load sufficient statistics. samples, _ = dataset.get_data('train') tr_data, tr_counts, tr_labels = load_video_data( dataset, samples, outfile=tr_outfile, analytical_fim=analytical_fim, pi_derivatives=pi_derivatives, sqrt_nr_descs=sqrt_nr_descs, verbose=verbose) if verbose > 0: print "Train data: %dx%d" % tr_data.shape if do_plot: plot_fisher_vector(tr_data[0], 'before') scalers = [] for norm in tr_norms: if norm == 'std': scaler = Scaler() tr_data = scaler.fit_transform(tr_data) scalers.append(scaler) elif norm == 'sqrt': tr_data = power_normalize(tr_data, 0.5) elif norm == 'sqrt_cnt': tr_data = approximate_signed_sqrt(tr_data, tr_counts, pi_derivatives=pi_derivatives) elif norm == 'L2': tr_data = L2_normalize(tr_data) if do_plot: plot_fisher_vector(tr_data[0], 'after_%s' % norm) tr_kernel = np.dot(tr_data, tr_data.T) if only_train: return tr_kernel, tr_labels, scalers, tr_data te_outfile = outfile % "test" if outfile is not None else outfile # Load sufficient statistics. samples, _ = dataset.get_data('test') te_data, te_counts, te_labels = load_video_data( dataset, samples, outfile=te_outfile, analytical_fim=analytical_fim, pi_derivatives=pi_derivatives, sqrt_nr_descs=sqrt_nr_descs, verbose=verbose) if verbose > 0: print "Test data: %dx%d" % te_data.shape ii = 0 for norm in te_norms: if norm == 'std': te_data = scalers[ii].transform(te_data) ii += 1 elif norm == 'sqrt': te_data = power_normalize(te_data, 0.5) elif norm == 'sqrt_cnt': te_data = approximate_signed_sqrt(te_data, te_counts, pi_derivatives=pi_derivatives) elif norm == 'L2': te_data = L2_normalize(te_data) te_kernel = np.dot(te_data, tr_data.T) return tr_kernel, tr_labels, te_kernel, te_labels
def exact_sliding_window(slice_data, clf, deltas, selector, scalers, sqrt_type='', l2_norm_type=''): results = [] weights, bias = clf nr_descriptors_T = slice_data.nr_descriptors[:, np.newaxis] # Multiply by the number of descriptors. fisher_vectors = slice_data.fisher_vectors * nr_descriptors_T counts = slice_data.counts * nr_descriptors_T begin_frames, end_frames = slice_data.begin_frames, slice_data.end_frames N = fisher_vectors.shape[0] if selector.integral: fisher_vectors = integral(fisher_vectors) nr_descriptors_T = integral(nr_descriptors_T) if selector.integral and sqrt_type == 'approx': counts = integral(counts) for delta in deltas: # Build mask. mask = selector.get_mask(N, delta) begin_frame_idxs, end_frame_idxs = selector.get_frame_idxs(N, delta) # Aggregate data into bigger slices. agg_fisher_vectors = (sum_by(fisher_vectors, mask) / sum_by(nr_descriptors_T, mask)) agg_fisher_vectors[np.isnan(agg_fisher_vectors)] = 0 agg_begin_frames = begin_frames[begin_frame_idxs] agg_end_frames = end_frames[end_frame_idxs] assert len(agg_fisher_vectors) == len(agg_begin_frames) == len( agg_end_frames) # Normalize aggregated data. if scalers[0] is not None: agg_fisher_vectors = scalers[0].transform(agg_fisher_vectors) if sqrt_type == 'exact': agg_fisher_vectors = power_normalize(agg_fisher_vectors, 0.5) if sqrt_type == 'approx': agg_counts = (sum_by(counts, mask) / sum_by(nr_descriptors_T, mask)) agg_fisher_vectors = approximate_signed_sqrt(agg_fisher_vectors, agg_counts, pi_derivatives=False) if scalers[1] is not None: agg_fisher_vectors = scalers[1].transform(agg_fisher_vectors) # More efficient, to apply L2 on the scores than on the FVs. l2_norms = (compute_L2_normalization(agg_fisher_vectors) if l2_norm_type != 'none' else np.ones( len(agg_fisher_vectors))) # Predict with the linear classifier. scores = (-np.dot(agg_fisher_vectors, weights.T).squeeze() / np.sqrt(l2_norms) + bias) nan_idxs = np.isnan(scores) results += zip(agg_begin_frames[~nan_idxs], agg_end_frames[~nan_idxs], scores[~nan_idxs]) return results
def load_kernels_all( src_cfg, e_std_1, sqrt, e_std_2, l2_norm, afim, nr_slices_to_aggregate=None, verbose=0): dataset = Dataset( CFG[src_cfg]['dataset_name'], **CFG[src_cfg]['dataset_params']) spms = CFG[src_cfg].get('spms', [(1, -1, -1)]) # FIXME Hack. encodings = CFG[src_cfg].get('encodings', ['fv']) class DummyScaler(object): def fit_transform(self, X): return X def transform(self, X): return X def get_slice(bin, spm, D): N_bins = np.prod(spm) return slice(D / N_bins * bin, D / N_bins * (bin + 1)) def loader(split, spm=None, encoding=None, bin=None): """Loads sufficient statistics.""" samples, _ = dataset.get_data(split) N = len(set(map(str, samples))) outfile = OUTFILE % ( src_cfg, split, afim, '_' + encoding, ''.join(map(str, spm))) data, counts, labels = load_video_data( dataset, samples, outfile=outfile, analytical_fim=afim, pi_derivatives=PI_DERIVATIVES, sqrt_nr_descs=SQRT_NR_DESCS, encoding=encoding, spm=spm, verbose=verbose) _, D_data = data.shape _, D_counts = counts.shape I_data = get_slice(bin, spm, D_data) I_counts = get_slice(bin, spm, D_counts) return data[:, I_data], counts[:, I_counts], labels def sample_counter(split): samples, _ = dataset.get_data(split) return len(set(map(str, samples))) def compute_approx_l2_normalization( data, split, scalers, counts, spm=None, encoding=None, bin=None): if sqrt == 'none': counts = np.ones(counts.shape) # Prepare cached filename. outfile = OUTFILE % ( src_cfg, split, afim, '_' + encoding, ''.join(map(str, spm))) suffix = "norm_slices_%d_scalers_%s_%s" % ( nr_slices_to_aggregate, e_std_1, e_std_2) norm_filename = outfile + '.' + suffix samples, _ = dataset.get_data(split) video_l2_norms = load_corrected_norms( dataset, samples, nr_slices_to_aggregate, analytical_fim=afim, scalers=scalers, spm=spm, encoding=encoding, verbose=verbose, outfile=norm_filename)[0] I = get_slice(bin, spm, video_l2_norms.shape[1]) return compute_approx_l2_normalization_(video_l2_norms[:, I], counts) SQUARE_ROOT_TABLE = { 'exact': lambda data, **kwargs: power_normalize(data, 0.5), 'approx': lambda data, counts: approximate_signed_sqrt( data, counts, pi_derivatives=PI_DERIVATIVES), 'none': lambda data, **kwargs: data, } COMPUTE_L2_NORM_TABLE = { 'exact': lambda data, **kwargs: compute_exact_l2_normalization(data), 'approx': compute_approx_l2_normalization, 'none': lambda data, **kwargs: np.ones(data.shape[0], dtype=np.float32), } def get_scaler(bool): return StandardScaler(with_mean=False) if bool else DummyScaler() normalizations = { 'scaler_1' : get_scaler(e_std_1), 'square_root' : SQUARE_ROOT_TABLE[sqrt], 'scaler_2' : get_scaler(e_std_2), 'compute_l2_norm' : COMPUTE_L2_NORM_TABLE[l2_norm], } return load_kernels_l2_norm_enc( sample_counter, loader, normalizations, spms, encodings)
def load_kernels_all(src_cfg, e_std_1, sqrt, e_std_2, l2_norm, afim, nr_slices_to_aggregate=None, verbose=0): dataset = Dataset(CFG[src_cfg]['dataset_name'], **CFG[src_cfg]['dataset_params']) spms = CFG[src_cfg].get('spms', [(1, -1, -1)]) # FIXME Hack. encodings = CFG[src_cfg].get('encodings', ['fv']) class DummyScaler(object): def fit_transform(self, X): return X def transform(self, X): return X def get_slice(bin, spm, D): N_bins = np.prod(spm) return slice(D / N_bins * bin, D / N_bins * (bin + 1)) def loader(split, spm=None, encoding=None, bin=None): """Loads sufficient statistics.""" samples, _ = dataset.get_data(split) N = len(set(map(str, samples))) outfile = OUTFILE % (src_cfg, split, afim, '_' + encoding, ''.join( map(str, spm))) data, counts, labels = load_video_data(dataset, samples, outfile=outfile, analytical_fim=afim, pi_derivatives=PI_DERIVATIVES, sqrt_nr_descs=SQRT_NR_DESCS, encoding=encoding, spm=spm, verbose=verbose) _, D_data = data.shape _, D_counts = counts.shape I_data = get_slice(bin, spm, D_data) I_counts = get_slice(bin, spm, D_counts) return data[:, I_data], counts[:, I_counts], labels def sample_counter(split): samples, _ = dataset.get_data(split) return len(set(map(str, samples))) def compute_approx_l2_normalization(data, split, scalers, counts, spm=None, encoding=None, bin=None): if sqrt == 'none': counts = np.ones(counts.shape) # Prepare cached filename. outfile = OUTFILE % (src_cfg, split, afim, '_' + encoding, ''.join( map(str, spm))) suffix = "norm_slices_%d_scalers_%s_%s" % (nr_slices_to_aggregate, e_std_1, e_std_2) norm_filename = outfile + '.' + suffix samples, _ = dataset.get_data(split) video_l2_norms = load_corrected_norms(dataset, samples, nr_slices_to_aggregate, analytical_fim=afim, scalers=scalers, spm=spm, encoding=encoding, verbose=verbose, outfile=norm_filename)[0] I = get_slice(bin, spm, video_l2_norms.shape[1]) return compute_approx_l2_normalization_(video_l2_norms[:, I], counts) SQUARE_ROOT_TABLE = { 'exact': lambda data, **kwargs: power_normalize(data, 0.5), 'approx': lambda data, counts: approximate_signed_sqrt( data, counts, pi_derivatives=PI_DERIVATIVES), 'none': lambda data, **kwargs: data, } COMPUTE_L2_NORM_TABLE = { 'exact': lambda data, **kwargs: compute_exact_l2_normalization(data), 'approx': compute_approx_l2_normalization, 'none': lambda data, **kwargs: np.ones(data.shape[0], dtype=np.float32), } def get_scaler(bool): return StandardScaler(with_mean=False) if bool else DummyScaler() normalizations = { 'scaler_1': get_scaler(e_std_1), 'square_root': SQUARE_ROOT_TABLE[sqrt], 'scaler_2': get_scaler(e_std_2), 'compute_l2_norm': COMPUTE_L2_NORM_TABLE[l2_norm], } return load_kernels_l2_norm_enc(sample_counter, loader, normalizations, spms, encodings)