Exemple #1
0
def load_subsample_descriptors(dataset):
    """ Returns a NxD dimensional matrix representing a subsample of the
    descriptors. The subset files are either subset.siftgeo or subset.dat.
    Their location is induced from dataset.
    
    Note: the temporal information is discarded.
    
    """
    filename = os.path.join(dataset.FEAT_DIR, 'subset')
    if os.path.exists(filename + '.siftgeo'):
        from video_vwgeo import read_video_points_from_siftgeo
        siftgeos = read_video_points_from_siftgeo(filename + '.siftgeo')
        N = len(siftgeos)
        D = len(siftgeos[0][1])
        # Put the descriptors in a numpy matrix.
        descriptors = np.zeros((N, D), dtype=np.float32)
        for ii, siftgeo in enumerate(siftgeos):
            descriptors[ii] = siftgeo[1]
    elif os.path.exists(filename + '.dat'):
        # TODO Make a function that given the FEAT_TYPE returns the descriptor
        # length.
        dims = 0 if 'mfcc' in dataset.FTYPE else 3
        descs_len = get_descs_len(dataset.FTYPE) + dims
        descriptors = np.fromfile(filename + '.dat',
                                  dtype=np.float32).reshape(-1, descs_len)
        # Discard temporal information.
        descriptors = descriptors[:, dims:]
    else:
        raise IOError
    return descriptors
Exemple #2
0
def load_subsample_descriptors(dataset):
    """ Returns a NxD dimensional matrix representing a subsample of the
    descriptors. The subset files are either subset.siftgeo or subset.dat.
    Their location is induced from dataset.
    
    Note: the temporal information is discarded.
    
    """
    filename =  os.path.join(dataset.FEAT_DIR, 'subset')
    if os.path.exists(filename + '.siftgeo'):
        from video_vwgeo import read_video_points_from_siftgeo
        siftgeos = read_video_points_from_siftgeo(filename + '.siftgeo')
        N = len(siftgeos)
        D = len(siftgeos[0][1])
        # Put the descriptors in a numpy matrix.
        descriptors = np.zeros((N, D), dtype=np.float32)
        for ii, siftgeo in enumerate(siftgeos):
            descriptors[ii] = siftgeo[1]
    elif os.path.exists(filename + '.dat'):
        # TODO Make a function that given the FEAT_TYPE returns the descriptor
        # length.
        dims = 0 if 'mfcc' in dataset.FTYPE else 3
        descs_len = get_descs_len(dataset.FTYPE) + dims
        descriptors = np.fromfile(
            filename + '.dat', dtype=np.float32).reshape(-1, descs_len)
        # Discard temporal information.
        descriptors = descriptors[:, dims:]
    else:
        raise IOError
    return descriptors
Exemple #3
0
    def compute_statistics_worker(self, samples, grid, pca, gmm):
        """ Worker function for computing the sufficient statistics. It takes
        each sample, read the points and their locations and computes the
        sufficient statistics for the points in each individual bin. The bins
        are specified by grid.

        """
        for sample in samples:
            sample_id = SampID(sample)
            # Prepare descriptors: select according to the grid and apply PCA.
            siftgeo = read_video_points_from_siftgeo(
                self.descriptor_filename % sample)

            # TODO Use function get_video_resolution from Dataset
            video_infos = get_video_infos(
                os.path.join(self.dataset.PREFIX, 'videos',
                             sample_id.movie + '.avi'))
            W, H = video_infos['img_size']
            # Filter descriptors into multiple bags according to their location.
            bag_xx, bag_ll = self._bin_descriptors(siftgeo, pca, grid, (W, H), (sample_id.bf, sample_id.ef))
            all_x = range(1, grid[0] + 1)
            all_y = range(1, grid[1] + 1)
            all_t = range(1, grid[2] + 1)
            for ii, bin in enumerate(product(all_x, all_y, all_t)):
                if not self.exist_statistics:
                    fn = os.path.join(self.temp_path, self.bare_fn % (sample, grid[0], grid[1], grid[2], ii))
                    try:
                        with open(fn) as ff:
                            pass
                    except IOError:
                        with open(fn, 'w') as ff:
                            try:
                                ss = self.model._compute_statistics(
                                    vstack(bag_xx[bin]), gmm)
                            except ValueError:
                                # The current window cell contains no descriptors.
                                #print 'ValueError %s' % fn
                                ss = np.array(zeros(self.K + 2 * self.nr_pca_comps * self.K), dtype=np.float32)
                            ss.tofile(ff)
                if self.model.is_spatial_model and not self.exist_spatial_statistics:
                    # Compute spatial descriptors.
                    fn = os.path.join(self.temp_path, self.spatial_bare_fn % (sample, grid[0], grid[1], grid[2], ii))
                    try:
                        with open(fn) as ff:
                            pass
                    except IOError:
                        with open(fn, 'w') as ff:
                            try:
                                ss = self.model._compute_spatial_statistics(vstack(bag_xx[bin]), vstack(bag_ll[bin]), gmm)
                            except ValueError:
                                # The current window cell contains no descriptors.
                                #print 'ValueError %s' % fn
                                ss = np.array(zeros(self.K + 2 * 3 * self.K), dtype=np.float32)
                            ss.tofile(ff)
    def test_read_descriptors_from_video(self):
        for params in self.parameters:
            infile, ip_type, nr_descriptors, begin_frames, end_frames, result_file = params
            siftgeo = []
            for chunk in read_descriptors_from_video(
                infile, nr_descriptors=nr_descriptors, ip_type=ip_type,
                begin_frames=begin_frames, end_frames=end_frames):

                if nr_descriptors == 1:
                    assert chunk.shape[0] == 1
                else:
                    assert chunk.shape[0] > 1

                siftgeo.append(chunk)

            result = np.vstack(siftgeo)
            expected_result = siftgeo_to_matrix(
                read_video_points_from_siftgeo(result_file))
            assert_allclose(result[:, 3:], expected_result[:, 3:], rtol=1e-05)
    def test_read_descriptors_from_video(self):
        for params in self.parameters:
            infile, ip_type, nr_descriptors, begin_frames, end_frames, result_file = params
            siftgeo = []
            for chunk in read_descriptors_from_video(
                    infile,
                    nr_descriptors=nr_descriptors,
                    ip_type=ip_type,
                    begin_frames=begin_frames,
                    end_frames=end_frames):

                if nr_descriptors == 1:
                    assert chunk.shape[0] == 1
                else:
                    assert chunk.shape[0] > 1

                siftgeo.append(chunk)

            result = np.vstack(siftgeo)
            expected_result = siftgeo_to_matrix(
                read_video_points_from_siftgeo(result_file))
            assert_allclose(result[:, 3:], expected_result[:, 3:], rtol=1e-05)
Exemple #6
0
    def compute_statistics_worker(self, samples, grid, pca, gmm):
        """ Worker function for computing the sufficient statistics. It takes
        each sample, read the points and their locations and computes the
        sufficient statistics for the points in each individual bin. The bins
        are specified by grid.

        """
        for sample in samples:
            sample_id = SampID(sample)
            # Prepare descriptors: select according to the grid and apply PCA.
            siftgeo = read_video_points_from_siftgeo(self.descriptor_filename %
                                                     sample)

            # TODO Use function get_video_resolution from Dataset
            video_infos = get_video_infos(
                os.path.join(self.dataset.PREFIX, 'videos',
                             sample_id.movie + '.avi'))
            W, H = video_infos['img_size']
            # Filter descriptors into multiple bags according to their location.
            bag_xx, bag_ll = self._bin_descriptors(
                siftgeo, pca, grid, (W, H), (sample_id.bf, sample_id.ef))
            all_x = range(1, grid[0] + 1)
            all_y = range(1, grid[1] + 1)
            all_t = range(1, grid[2] + 1)
            for ii, bin in enumerate(product(all_x, all_y, all_t)):
                if not self.exist_statistics:
                    fn = os.path.join(
                        self.temp_path,
                        self.bare_fn % (sample, grid[0], grid[1], grid[2], ii))
                    try:
                        with open(fn) as ff:
                            pass
                    except IOError:
                        with open(fn, 'w') as ff:
                            try:
                                ss = self.model._compute_statistics(
                                    vstack(bag_xx[bin]), gmm)
                            except ValueError:
                                # The current window cell contains no descriptors.
                                #print 'ValueError %s' % fn
                                ss = np.array(
                                    zeros(self.K +
                                          2 * self.nr_pca_comps * self.K),
                                    dtype=np.float32)
                            ss.tofile(ff)
                if self.model.is_spatial_model and not self.exist_spatial_statistics:
                    # Compute spatial descriptors.
                    fn = os.path.join(
                        self.temp_path, self.spatial_bare_fn %
                        (sample, grid[0], grid[1], grid[2], ii))
                    try:
                        with open(fn) as ff:
                            pass
                    except IOError:
                        with open(fn, 'w') as ff:
                            try:
                                ss = self.model._compute_spatial_statistics(
                                    vstack(bag_xx[bin]), vstack(bag_ll[bin]),
                                    gmm)
                            except ValueError:
                                # The current window cell contains no descriptors.
                                #print 'ValueError %s' % fn
                                ss = np.array(zeros(self.K + 2 * 3 * self.K),
                                              dtype=np.float32)
                            ss.tofile(ff)