Example #1
0
def unsup_images(data_view, trn, N):
    """
    Return a block of 
    """
    if trn == 'DevTrain':
        # -- extract training images, and put them into channel-major format
        imgs = larray.reindex(data_view.image_pixels,
                data_view.dev_train['lpathidx'][0, :N])[:]
        imgs = np.asarray(imgs)
        assert 'int' in str(imgs.dtype)
        foobar.append_ndarray_signature(imgs, 'unsup_images')
        foobar.append_trace('unsup_images N', N)
        return imgs.transpose(0, 3, 1, 2).copy()
    else:
        raise NotImplementedError()
Example #2
0
def test_usage():
    np.random.seed(123)

    def load_rgb(pth):
        return pth + '_rgb'
    def load_grey(pth):
        return pth + '_grey'
    def to_64x64(img):
        return img + '_64x64'

    paths = ['a', 'b', 'c', 'd']  # imagine some huge list of image paths
    rgb_imgs = larray.lmap(load_rgb, paths)

    train_set = larray.reindex(rgb_imgs, np.random.permutation(len(paths))
                              ).loop()

    l10 = list(train_set[range(10)])
    print l10
    assert ['d', 'a', 'b', 'c'] == [l[0] for l in l10[:4]]
Example #3
0
def test_using_precompute():
    np.random.seed(123)

    # example library code  starts here
    def load_rgb(pth):
        return pth + '_rgb'
    def load_grey(pth):
        return pth + '_grey'
    def to_64x64(img):
        return img + '_64x64'

    paths = ['a', 'b', 'c', 'd']  # imagine some huge list of image paths
    rgb_imgs = larray.lmap(load_rgb, paths)
    grey_imgs = larray.lmap(load_grey, paths)
    paths_64x64 = larray.lmap(to_64x64, grey_imgs)

    train_set = larray.reindex(paths_64x64, np.random.permutation(len(paths))).loop()

    # example user code starts here.
    # It is easy to memmap the __array__ of paths_64x64, but
    # it is more difficult to compute derived things using that
    # memmap.
    
    # pretend this is a memmap of a precomputed quantity, for example.
    use_paths_64x64 = ['stuff', 'i', 'saved', 'from', 'disk']

    # the rest of the original graph (e.g. train_set)
    # doesn't know about our new memmap
    # or mongo-backed proxy, or whatever we're doing.

    new_train_set = larray.clone(train_set, given={paths_64x64: use_paths_64x64})

    l10 = list(new_train_set[range(10)])
    print l10
    assert l10 == [
            'from', 'stuff', 'i', 'saved',
            'from', 'stuff', 'i', 'saved',
            'from', 'stuff']
Example #4
0
def test_using_precompute():
    np.random.seed(123)

    # example library code  starts here
    def load_rgb(pth):
        return pth + '_rgb'
    def load_grey(pth):
        return pth + '_grey'
    def to_64x64(img):
        return img + '_64x64'

    paths = ['a', 'b', 'c', 'd']  # imagine some huge list of image paths
    grey_imgs = larray.lmap(load_grey, paths)
    paths_64x64 = larray.lmap(to_64x64, grey_imgs)

    train_set = larray.reindex(paths_64x64, np.random.permutation(len(paths))
                              ).loop()

    # example user code starts here.
    # It is easy to memmap the __array__ of paths_64x64, but
    # it is more difficult to compute derived things using that
    # memmap.
    
    # pretend this is a memmap of a precomputed quantity, for example.
    use_paths_64x64 = ['stuff', 'i', 'saved', 'from', 'disk']

    # the rest of the original graph (e.g. train_set)
    # doesn't know about our new memmap
    # or mongo-backed proxy, or whatever we're doing.

    new_train_set = larray.clone(train_set, given={paths_64x64: use_paths_64x64})

    l10 = list(new_train_set[range(10)])
    print l10
    assert l10 == [
            'from', 'stuff', 'i', 'saved',
            'from', 'stuff', 'i', 'saved',
            'from', 'stuff']
    def normalized_image_match_features(self,
                                        task,
                                        svm_dct,
                                        role,
                                        batched_lmap_speed_thresh=None):
        assert role in ('train', 'test')
        if batched_lmap_speed_thresh is None:
            batched_lmap_speed_thresh = self.batched_lmap_speed_thresh
        image_features, cdict = self.get_image_features(
            task, batched_lmap_speed_thresh=batched_lmap_speed_thresh)
        del cdict  # -- no longer used (waste of memory)
        pipeline = self.pipeline
        info('Indexing into image_features of shape %s' %
             str(image_features.shape))

        comps = [getattr(comparisons, cc) for cc in self.comparison_names]
        n_features = np.prod(image_features.shape[1:])
        n_trn = len(task.lidx)

        x_trn_shp = (n_trn, len(comps), n_features)
        info('Allocating training ndarray of shape %s' % str(x_trn_shp))
        x_trn = np.empty(x_trn_shp, dtype='float32')

        # -- pre-compute all of the image_features we will need
        all_l_features = reindex(image_features, task.lidx)[:]
        all_r_features = reindex(image_features, task.ridx)[:]

        all_l_features = all_l_features.reshape(len(all_l_features), -1)
        all_r_features = all_r_features.reshape(len(all_r_features), -1)

        foobar.append_ndarray_signature(all_l_features,
                                        'normalized_image_match l_features',
                                        task.name)
        foobar.append_ndarray_signature(all_r_features,
                                        'normalized_image_match r_features',
                                        task.name)

        if role == 'train':
            if np.allclose(all_l_features.var(axis=0), 0.0):
                raise ValueError('Homogeneous features (non-finite features)')

            xmean_l, xstd_l = mean_and_std(all_l_features,
                                           remove_std0=pipeline['remove_std0'])
            xmean_r, xstd_r = mean_and_std(all_r_features,
                                           remove_std0=pipeline['remove_std0'])
            xmean = (xmean_l + xmean_r) / 2.0
            # -- this is an ad-hoc way of blending the variances.
            xstd = np.sqrt(
                np.maximum(xstd_l, xstd_r)**2 + pipeline['varthresh'])

            foobar.append_ndarray_signature(xmean,
                                            'normalized_image_match xmean',
                                            task.name)
            foobar.append_ndarray_signature(xstd,
                                            'normalized_image_match xstd',
                                            task.name)

            svm_dct['xmean'] = xmean
            svm_dct['xstd'] = xstd
        else:
            xmean = svm_dct['xmean']
            xstd = svm_dct['xstd']

        info('Computing comparison features')

        # -- now compute the "comparison functions" into x_trn
        for jj, (lfeat, rfeat) in enumerate(zip(all_l_features,
                                                all_r_features)):
            lfeat_z = (lfeat - xmean) / xstd
            rfeat_z = (rfeat - xmean) / xstd
            for ci, comp in enumerate(comps):
                x_trn[jj, ci, :] = comp(lfeat_z, rfeat_z)

        if pipeline['divrowl2']:
            info('Dividing by feature norms')
            # -- now normalize by average feature norm because some
            #    comparison functions come out smaller than others
            if role == 'train':
                svm_dct['divrowl2_avg_nrm'] = {}
                for ci, cname in enumerate(self.comparison_names):
                    avg_nrm = average_row_l2norm(x_trn[:, ci, :]) + 1e-7
                    svm_dct['divrowl2_avg_nrm'][cname] = avg_nrm

            avg_nrm_vec = [
                svm_dct['divrowl2_avg_nrm'][cname]
                for cname in self.comparison_names
            ]
            x_trn /= np.asarray(avg_nrm_vec)[None, :, None]
            foobar.append_trace('get_normlized_features avg_nrm', avg_nrm_vec)

        # -- collapse comparison and feature dimensions
        x_trn.shape = (x_trn.shape[0], x_trn.shape[1] * x_trn.shape[2])

        foobar.append_ndarray_signature(x_trn, 'normalized_image_match x_trn',
                                        task.name)
        info('normalized_image_match_features complete')
        return x_trn
Example #6
0
    def normalized_image_match_features(self, task, svm_dct, role,
            batched_lmap_speed_thresh=None):
        assert role in ('train', 'test')
        if batched_lmap_speed_thresh is None:
            batched_lmap_speed_thresh = self.batched_lmap_speed_thresh
        image_features, cdict = self.get_image_features(task,
                batched_lmap_speed_thresh=batched_lmap_speed_thresh)
        del cdict # -- no longer used (waste of memory)
        pipeline = self.pipeline
        info('Indexing into image_features of shape %s' %
                str(image_features.shape))

        comps = [getattr(comparisons, cc)
                for cc in self.comparison_names]
        n_features = np.prod(image_features.shape[1:])
        n_trn = len(task.lidx)

        x_trn_shp = (n_trn, len(comps), n_features)
        info('Allocating training ndarray of shape %s' % str(x_trn_shp))
        x_trn = np.empty(x_trn_shp, dtype='float32')

        # -- pre-compute all of the image_features we will need
        all_l_features = reindex(image_features, task.lidx)[:]
        all_r_features = reindex(image_features, task.ridx)[:]

        all_l_features = all_l_features.reshape(len(all_l_features), -1)
        all_r_features = all_r_features.reshape(len(all_r_features), -1)

        foobar.append_ndarray_signature(all_l_features,
            'normalized_image_match l_features', task.name)
        foobar.append_ndarray_signature(all_r_features,
            'normalized_image_match r_features', task.name)

        if role == 'train':
            if np.allclose(all_l_features.var(axis=0), 0.0):
                raise ValueError(
                    'Homogeneous features (non-finite features)')

            xmean_l, xstd_l = mean_and_std(all_l_features,
                    remove_std0=pipeline['remove_std0'])
            xmean_r, xstd_r = mean_and_std(all_r_features,
                    remove_std0=pipeline['remove_std0'])
            xmean = (xmean_l + xmean_r) / 2.0
            # -- this is an ad-hoc way of blending the variances.
            xstd = np.sqrt(np.maximum(xstd_l, xstd_r) ** 2
                           + pipeline['varthresh'])

            foobar.append_ndarray_signature(
                xmean, 'normalized_image_match xmean', task.name)
            foobar.append_ndarray_signature(
                xstd, 'normalized_image_match xstd', task.name)


            svm_dct['xmean'] = xmean
            svm_dct['xstd'] = xstd
        else:
            xmean = svm_dct['xmean']
            xstd = svm_dct['xstd']

        info('Computing comparison features')

        # -- now compute the "comparison functions" into x_trn
        for jj, (lfeat, rfeat) in enumerate(
                zip(all_l_features, all_r_features)):
            lfeat_z = (lfeat - xmean) / xstd
            rfeat_z = (rfeat - xmean) / xstd
            for ci, comp in enumerate(comps):
                x_trn[jj, ci, :] = comp(lfeat_z, rfeat_z)

        if pipeline['divrowl2']:
            info('Dividing by feature norms')
            # -- now normalize by average feature norm because some
            #    comparison functions come out smaller than others
            if role == 'train':
                svm_dct['divrowl2_avg_nrm'] = {}
                for ci, cname in enumerate(self.comparison_names):
                    avg_nrm = average_row_l2norm(x_trn[:, ci, :]) + 1e-7
                    svm_dct['divrowl2_avg_nrm'][cname] = avg_nrm

            avg_nrm_vec = [svm_dct['divrowl2_avg_nrm'][cname]
                           for cname in self.comparison_names]
            x_trn /= np.asarray(avg_nrm_vec)[None, :, None]
            foobar.append_trace('get_normlized_features avg_nrm', avg_nrm_vec)

        # -- collapse comparison and feature dimensions
        x_trn.shape = (x_trn.shape[0], x_trn.shape[1] * x_trn.shape[2])

        foobar.append_ndarray_signature(
            x_trn, 'normalized_image_match x_trn', task.name)
        info('normalized_image_match_features complete')
        return x_trn