def unsup_images(data_view, trn, N): """ Return a block of """ if trn == 'DevTrain': # -- extract training images, and put them into channel-major format imgs = larray.reindex(data_view.image_pixels, data_view.dev_train['lpathidx'][0, :N])[:] imgs = np.asarray(imgs) assert 'int' in str(imgs.dtype) foobar.append_ndarray_signature(imgs, 'unsup_images') foobar.append_trace('unsup_images N', N) return imgs.transpose(0, 3, 1, 2).copy() else: raise NotImplementedError()
def test_usage(): np.random.seed(123) def load_rgb(pth): return pth + '_rgb' def load_grey(pth): return pth + '_grey' def to_64x64(img): return img + '_64x64' paths = ['a', 'b', 'c', 'd'] # imagine some huge list of image paths rgb_imgs = larray.lmap(load_rgb, paths) train_set = larray.reindex(rgb_imgs, np.random.permutation(len(paths)) ).loop() l10 = list(train_set[range(10)]) print l10 assert ['d', 'a', 'b', 'c'] == [l[0] for l in l10[:4]]
def test_using_precompute(): np.random.seed(123) # example library code starts here def load_rgb(pth): return pth + '_rgb' def load_grey(pth): return pth + '_grey' def to_64x64(img): return img + '_64x64' paths = ['a', 'b', 'c', 'd'] # imagine some huge list of image paths rgb_imgs = larray.lmap(load_rgb, paths) grey_imgs = larray.lmap(load_grey, paths) paths_64x64 = larray.lmap(to_64x64, grey_imgs) train_set = larray.reindex(paths_64x64, np.random.permutation(len(paths))).loop() # example user code starts here. # It is easy to memmap the __array__ of paths_64x64, but # it is more difficult to compute derived things using that # memmap. # pretend this is a memmap of a precomputed quantity, for example. use_paths_64x64 = ['stuff', 'i', 'saved', 'from', 'disk'] # the rest of the original graph (e.g. train_set) # doesn't know about our new memmap # or mongo-backed proxy, or whatever we're doing. new_train_set = larray.clone(train_set, given={paths_64x64: use_paths_64x64}) l10 = list(new_train_set[range(10)]) print l10 assert l10 == [ 'from', 'stuff', 'i', 'saved', 'from', 'stuff', 'i', 'saved', 'from', 'stuff']
def test_using_precompute(): np.random.seed(123) # example library code starts here def load_rgb(pth): return pth + '_rgb' def load_grey(pth): return pth + '_grey' def to_64x64(img): return img + '_64x64' paths = ['a', 'b', 'c', 'd'] # imagine some huge list of image paths grey_imgs = larray.lmap(load_grey, paths) paths_64x64 = larray.lmap(to_64x64, grey_imgs) train_set = larray.reindex(paths_64x64, np.random.permutation(len(paths)) ).loop() # example user code starts here. # It is easy to memmap the __array__ of paths_64x64, but # it is more difficult to compute derived things using that # memmap. # pretend this is a memmap of a precomputed quantity, for example. use_paths_64x64 = ['stuff', 'i', 'saved', 'from', 'disk'] # the rest of the original graph (e.g. train_set) # doesn't know about our new memmap # or mongo-backed proxy, or whatever we're doing. new_train_set = larray.clone(train_set, given={paths_64x64: use_paths_64x64}) l10 = list(new_train_set[range(10)]) print l10 assert l10 == [ 'from', 'stuff', 'i', 'saved', 'from', 'stuff', 'i', 'saved', 'from', 'stuff']
def normalized_image_match_features(self, task, svm_dct, role, batched_lmap_speed_thresh=None): assert role in ('train', 'test') if batched_lmap_speed_thresh is None: batched_lmap_speed_thresh = self.batched_lmap_speed_thresh image_features, cdict = self.get_image_features( task, batched_lmap_speed_thresh=batched_lmap_speed_thresh) del cdict # -- no longer used (waste of memory) pipeline = self.pipeline info('Indexing into image_features of shape %s' % str(image_features.shape)) comps = [getattr(comparisons, cc) for cc in self.comparison_names] n_features = np.prod(image_features.shape[1:]) n_trn = len(task.lidx) x_trn_shp = (n_trn, len(comps), n_features) info('Allocating training ndarray of shape %s' % str(x_trn_shp)) x_trn = np.empty(x_trn_shp, dtype='float32') # -- pre-compute all of the image_features we will need all_l_features = reindex(image_features, task.lidx)[:] all_r_features = reindex(image_features, task.ridx)[:] all_l_features = all_l_features.reshape(len(all_l_features), -1) all_r_features = all_r_features.reshape(len(all_r_features), -1) foobar.append_ndarray_signature(all_l_features, 'normalized_image_match l_features', task.name) foobar.append_ndarray_signature(all_r_features, 'normalized_image_match r_features', task.name) if role == 'train': if np.allclose(all_l_features.var(axis=0), 0.0): raise ValueError('Homogeneous features (non-finite features)') xmean_l, xstd_l = mean_and_std(all_l_features, remove_std0=pipeline['remove_std0']) xmean_r, xstd_r = mean_and_std(all_r_features, remove_std0=pipeline['remove_std0']) xmean = (xmean_l + xmean_r) / 2.0 # -- this is an ad-hoc way of blending the variances. xstd = np.sqrt( np.maximum(xstd_l, xstd_r)**2 + pipeline['varthresh']) foobar.append_ndarray_signature(xmean, 'normalized_image_match xmean', task.name) foobar.append_ndarray_signature(xstd, 'normalized_image_match xstd', task.name) svm_dct['xmean'] = xmean svm_dct['xstd'] = xstd else: xmean = svm_dct['xmean'] xstd = svm_dct['xstd'] info('Computing comparison features') # -- now compute the "comparison functions" into x_trn for jj, (lfeat, rfeat) in enumerate(zip(all_l_features, all_r_features)): lfeat_z = (lfeat - xmean) / xstd rfeat_z = (rfeat - xmean) / xstd for ci, comp in enumerate(comps): x_trn[jj, ci, :] = comp(lfeat_z, rfeat_z) if pipeline['divrowl2']: info('Dividing by feature norms') # -- now normalize by average feature norm because some # comparison functions come out smaller than others if role == 'train': svm_dct['divrowl2_avg_nrm'] = {} for ci, cname in enumerate(self.comparison_names): avg_nrm = average_row_l2norm(x_trn[:, ci, :]) + 1e-7 svm_dct['divrowl2_avg_nrm'][cname] = avg_nrm avg_nrm_vec = [ svm_dct['divrowl2_avg_nrm'][cname] for cname in self.comparison_names ] x_trn /= np.asarray(avg_nrm_vec)[None, :, None] foobar.append_trace('get_normlized_features avg_nrm', avg_nrm_vec) # -- collapse comparison and feature dimensions x_trn.shape = (x_trn.shape[0], x_trn.shape[1] * x_trn.shape[2]) foobar.append_ndarray_signature(x_trn, 'normalized_image_match x_trn', task.name) info('normalized_image_match_features complete') return x_trn
def normalized_image_match_features(self, task, svm_dct, role, batched_lmap_speed_thresh=None): assert role in ('train', 'test') if batched_lmap_speed_thresh is None: batched_lmap_speed_thresh = self.batched_lmap_speed_thresh image_features, cdict = self.get_image_features(task, batched_lmap_speed_thresh=batched_lmap_speed_thresh) del cdict # -- no longer used (waste of memory) pipeline = self.pipeline info('Indexing into image_features of shape %s' % str(image_features.shape)) comps = [getattr(comparisons, cc) for cc in self.comparison_names] n_features = np.prod(image_features.shape[1:]) n_trn = len(task.lidx) x_trn_shp = (n_trn, len(comps), n_features) info('Allocating training ndarray of shape %s' % str(x_trn_shp)) x_trn = np.empty(x_trn_shp, dtype='float32') # -- pre-compute all of the image_features we will need all_l_features = reindex(image_features, task.lidx)[:] all_r_features = reindex(image_features, task.ridx)[:] all_l_features = all_l_features.reshape(len(all_l_features), -1) all_r_features = all_r_features.reshape(len(all_r_features), -1) foobar.append_ndarray_signature(all_l_features, 'normalized_image_match l_features', task.name) foobar.append_ndarray_signature(all_r_features, 'normalized_image_match r_features', task.name) if role == 'train': if np.allclose(all_l_features.var(axis=0), 0.0): raise ValueError( 'Homogeneous features (non-finite features)') xmean_l, xstd_l = mean_and_std(all_l_features, remove_std0=pipeline['remove_std0']) xmean_r, xstd_r = mean_and_std(all_r_features, remove_std0=pipeline['remove_std0']) xmean = (xmean_l + xmean_r) / 2.0 # -- this is an ad-hoc way of blending the variances. xstd = np.sqrt(np.maximum(xstd_l, xstd_r) ** 2 + pipeline['varthresh']) foobar.append_ndarray_signature( xmean, 'normalized_image_match xmean', task.name) foobar.append_ndarray_signature( xstd, 'normalized_image_match xstd', task.name) svm_dct['xmean'] = xmean svm_dct['xstd'] = xstd else: xmean = svm_dct['xmean'] xstd = svm_dct['xstd'] info('Computing comparison features') # -- now compute the "comparison functions" into x_trn for jj, (lfeat, rfeat) in enumerate( zip(all_l_features, all_r_features)): lfeat_z = (lfeat - xmean) / xstd rfeat_z = (rfeat - xmean) / xstd for ci, comp in enumerate(comps): x_trn[jj, ci, :] = comp(lfeat_z, rfeat_z) if pipeline['divrowl2']: info('Dividing by feature norms') # -- now normalize by average feature norm because some # comparison functions come out smaller than others if role == 'train': svm_dct['divrowl2_avg_nrm'] = {} for ci, cname in enumerate(self.comparison_names): avg_nrm = average_row_l2norm(x_trn[:, ci, :]) + 1e-7 svm_dct['divrowl2_avg_nrm'][cname] = avg_nrm avg_nrm_vec = [svm_dct['divrowl2_avg_nrm'][cname] for cname in self.comparison_names] x_trn /= np.asarray(avg_nrm_vec)[None, :, None] foobar.append_trace('get_normlized_features avg_nrm', avg_nrm_vec) # -- collapse comparison and feature dimensions x_trn.shape = (x_trn.shape[0], x_trn.shape[1] * x_trn.shape[2]) foobar.append_ndarray_signature( x_trn, 'normalized_image_match x_trn', task.name) info('normalized_image_match_features complete') return x_trn