def _get_distance(model, data, load_from_cache=False, save_to_cache=False): if load_from_cache: with open(_cached_distmat, 'rb') as f: D, G, P = cPickle.load(f) else: print "Computing distance matrix ..." x = T.matrix('x') y = model.get_output(x) output_func = theano.function(inputs=[x], outputs=y) X = [_preproc(image) for pid, image in data[0]] Y = [_preproc(image) for pid, image in data[1]] X = imageproc.images2mat(X).astype(theano.config.floatX) Y = imageproc.images2mat(Y).astype(theano.config.floatX) R = [0] * X.shape[0] for i in xrange(X.shape[0]): R[i] = output_func(X[i:i + 1, :]).ravel() R = numpy.asarray(R) D = cdist(R, Y, 'euclidean') G = numpy.asarray([pid for pid, image in data[0]]) P = numpy.asarray([pid for pid, image in data[1]]) if save_to_cache: with open(_cached_distmat, 'wb') as f: cPickle.dump((D, G, P), f, protocol=cPickle.HIGHEST_PROTOCOL) with open(_cached_output, 'wb') as f: cPickle.dump((X, Y, R), f, protocol=cPickle.HIGHEST_PROTOCOL) return (D, G, P)
def _get_distance(model, data, load_from_cache=False, save_to_cache=False): if load_from_cache: with open(_cached_distmat, 'rb') as f: D, G, P = cPickle.load(f) else: print "Computing distance matrix ..." x = T.matrix('x') y = model.get_output(x) output_func = theano.function(inputs=[x], outputs=y) X = [_preproc(image) for pid, image in data[0]] Y = [_preproc(image) for pid, image in data[1]] X = imageproc.images2mat(X).astype(theano.config.floatX) Y = imageproc.images2mat(Y).astype(theano.config.floatX) R = [0] * X.shape[0] for i in xrange(X.shape[0]): R[i] = output_func(X[i:i+1, :]).ravel() R = numpy.asarray(R) D = cdist(R, Y, 'euclidean') G = numpy.asarray([pid for pid, image in data[0]]) P = numpy.asarray([pid for pid, image in data[1]]) if save_to_cache: with open(_cached_distmat, 'wb') as f: cPickle.dump((D, G, P), f, protocol=cPickle.HIGHEST_PROTOCOL) with open(_cached_output, 'wb') as f: cPickle.dump((X, Y, R), f, protocol=cPickle.HIGHEST_PROTOCOL) return (D, G, P)
def _select_group(images, attributes, group, gtype, load_from_cache=False, save_to_cache=False): print "Selecting Group ..." print "===================" if load_from_cache: with open(_cached_group, 'rb') as f: selected_images, selected_attributes = cPickle.load(f) else: attr_id = [attribute_names.index(name) for name in group] attributes = [attr[attr_id] for attr in attributes] if gtype == 'unique': judge_func = lambda x: x.sum() == 1 select_func = lambda x: numpy.where(x == 1)[0] else: judge_func = lambda x: x.sum() != 0 select_func = lambda x: x selected_images = [] selected_attributes = [] for img, attr in zip(images, attributes): if judge_func(attr): # Some data are mis-labeled selected_images.append(img) selected_attributes.append(select_func(attr)) selected_images = imageproc.images2mat(selected_images) selected_attributes = imageproc.images2mat(selected_attributes) if save_to_cache: with open(_cached_group, 'wb') as f: cPickle.dump((selected_images, selected_attributes), f, protocol=cPickle.HIGHEST_PROTOCOL) return (selected_images, selected_attributes)
def _mask_dataset(): # Load model and compile function with open('../cache/foreground_model.pkl', 'rb') as f: model, threshold = cPickle.load(f) x = T.matrix('x') y = model.get_output(x) output_func = theano.function(inputs=[x], outputs=(y >= threshold)) # Load data image_data = DataLoader('../data/cuhk_small.mat', verbose=True) # Pre-processing print "Pre-processing ..." images = image_data.get_all_images() images = [_input_preproc(image) for image in images] images = imageproc.images2mat(images).astype(theano.config.floatX) # Compute masks print "Computing masks ..." masks = output_func(images) # Save masks print "Saving data ..." mask_data = DataSaver() cur_index = 0 for gid in xrange(image_data.get_n_groups()): m, v = image_data.get_n_pedes_views(gid) mask_data.add_group(m, v) for pid in xrange(m): n_images = image_data.get_n_images(gid, pid) for vid, n in enumerate(n_images): view_masks = [0] * n for k in xrange(n): mask = masks[cur_index, :] mask = mask.reshape(160, 80, 1) orig_image = image_data.get_image(gid, pid, vid, k) orig_image = imageproc.imresize(orig_image, (160, 80, 3)) view_masks[k] = (mask * orig_image).astype(numpy.uint8) cur_index += 1 mask_data.set_images(gid, pid, vid, view_masks) mask_data.save('../data/cuhk_small_masked.mat')
def _prepare_data(load_from_cache=False, save_to_cache=False): if load_from_cache: with open(_cached_datasets, 'rb') as f: datasets = cPickle.load(f) else: # Setup data files input_data = DataLoader( '../data/parse/cuhk_large_labeled_subsampled.mat', verbose=True) target_data = DataLoader( '../data/parse/cuhk_large_labeled_subsampled_parse.mat', verbose=True) input_images = input_data.get_all_images() target_images = target_data.get_all_images() # Pre-processing print "Pre-processing ..." inputs = [_input_preproc(image) for image in input_images] inputs = imageproc.images2mat(inputs) targets = [_target_preproc(image) for image in target_images] targets = imageproc.images2mat(targets) # Prepare the datasets print "Prepare the datasets ..." datasets = Datasets(inputs, targets) datasets.split(train_ratio=0.5, valid_ratio=0.3) if save_to_cache: with open(_cached_datasets, 'wb') as f: cPickle.dump(datasets, f, protocol=cPickle.HIGHEST_PROTOCOL) return datasets
def _prepare_data(load_from_cache=False, save_to_cache=False): if load_from_cache: with open(_cached_datasets, 'rb') as f: views_data, datasets = cPickle.load(f) else: image_data = DataLoader('../data/cuhk_small_masked.mat', verbose=True) # Prepare the view-first order data representation print "Preparing the view-first order data ..." n_pedes, n_views = [], [] for gid in xrange(image_data.get_n_groups()): m, v = image_data.get_n_pedes_views(gid) n_pedes.append(m) n_views.append(v) assert min(n_views) == max(n_views), \ "The number of views in each group should be equal" v = n_views[0] views_data = [[] for __ in xrange(v)] for gid in xrange(image_data.get_n_groups()): bias = sum(n_pedes[0:gid]) group_data = data_manager.view_repr(image_data.get_pedes(gid)) for vid in xrange(v): view_data = group_data[vid] view_data = [(pid+bias, image) for pid, image in view_data] views_data[vid].extend(view_data) # Prepare the datasets print "Prepare the datasets ..." X, Y = [], [] for gid in xrange(image_data.get_n_groups()): m, v = image_data.get_n_pedes_views(gid) for pid in xrange(m): n_images = image_data.get_n_images(gid, pid) for vi in xrange(v): for vj in xrange(vi+1, v): for i in xrange(n_images[vi]): for j in xrange(n_images[vj]): X.append(_preproc( image_data.get_image(gid, pid, vi, i))) Y.append(_preproc( image_data.get_image(gid, pid, vj, j))) X = imageproc.images2mat(X).astype(theano.config.floatX) Y = imageproc.images2mat(Y).astype(theano.config.floatX) datasets = Datasets(X, Y) datasets.split(train_ratio=0.8, valid_ratio=0.1) if save_to_cache: with open(_cached_datasets, 'wb') as f: cPickle.dump((views_data, datasets), f, protocol=cPickle.HIGHEST_PROTOCOL) return (views_data, datasets)
def _prepare_data(load_from_cache=False, save_to_cache=False): if load_from_cache: with open(_cached_datasets, 'rb') as f: views_data, datasets = cPickle.load(f) else: image_data = DataLoader('../data/cuhk_small_masked.mat', verbose=True) # Prepare the view-first order data representation print "Preparing the view-first order data ..." n_pedes, n_views = [], [] for gid in xrange(image_data.get_n_groups()): m, v = image_data.get_n_pedes_views(gid) n_pedes.append(m) n_views.append(v) assert min(n_views) == max(n_views), \ "The number of views in each group should be equal" v = n_views[0] views_data = [[] for __ in xrange(v)] for gid in xrange(image_data.get_n_groups()): bias = sum(n_pedes[0:gid]) group_data = data_manager.view_repr(image_data.get_pedes(gid)) for vid in xrange(v): view_data = group_data[vid] view_data = [(pid + bias, image) for pid, image in view_data] views_data[vid].extend(view_data) # Prepare the datasets print "Prepare the datasets ..." X, Y = [], [] for gid in xrange(image_data.get_n_groups()): m, v = image_data.get_n_pedes_views(gid) for pid in xrange(m): n_images = image_data.get_n_images(gid, pid) for vi in xrange(v): for vj in xrange(vi + 1, v): for i in xrange(n_images[vi]): for j in xrange(n_images[vj]): X.append( _preproc( image_data.get_image(gid, pid, vi, i))) Y.append( _preproc( image_data.get_image(gid, pid, vj, j))) X = imageproc.images2mat(X).astype(theano.config.floatX) Y = imageproc.images2mat(Y).astype(theano.config.floatX) datasets = Datasets(X, Y) datasets.split(train_ratio=0.8, valid_ratio=0.1) if save_to_cache: with open(_cached_datasets, 'wb') as f: cPickle.dump((views_data, datasets), f, protocol=cPickle.HIGHEST_PROTOCOL) return (views_data, datasets)