def get_positives(mixcomp, settings, indices, files, crop=False): im_size = settings['detector']['image_size'] # Use the same seed for all mixture components! That will make them easier to compare, # without having to sample to infinity. # HERE: Make it possible to input data directly! descriptor = gv.load_descriptor(settings) radii = settings['detector']['spread_radii'] psize = settings['detector']['subsample_size'] rotspread = settings['detector'].get('rotation_spreading_radius', 0) cb = settings['detector'].get('crop_border') all_feats = [] for index in indices: ag.info("Fetching positives from image of index {0} and mixture component {1}".format(index, mixcomp)) gray_im = gv.img.asgray(gv.img.load_image(files[index])) if crop: gray_im = gv.img.crop(gray_im, im_size) else: gray_im = gv.img.resize(gray_im, im_size) #gray_im = gv.img.resize(gray_im, im_size) feats = descriptor.extract_features(gray_im, settings=dict(spread_radii=radii, subsample_size=psize, rotation_spreading_radius=rotspread, crop_border=cb)) all_feats.append(feats) return mixcomp, np.asarray(all_feats)
def get_pos_and_neg(mixcomp, settings, bb, indices, files, neg_files, duplicates_mult=1): im_size = settings['detector']['image_size'] size = gv.bb.size(bb) # Use the same seed for all mixture components! That will make them easier to compare, # without having to sample to infinity. # HERE: Make it possible to input data directly! descriptor = gv.load_descriptor(settings) all_pos_feats = [] all_neg_feats = [] radii = settings['detector']['spread_radii'] psize = settings['detector']['subsample_size'] rotspread = settings['detector'].get('rotation_spreading_radius', 0) duplicates = settings['detector'].get('duplicates', 1) * duplicates_mult cb = settings['detector'].get('crop_border') crop_image = settings['detector'].get('crop_image') sett = dict(spread_radii=radii, subsample_size=psize, rotation_spreading_radius=rotspread, crop_border=cb, crop_image=crop_image) extra = {} if settings['detector'].get('selective_bkg'): ag.info("SELECTIVE!") extra['selective'] = True extra['concentrations'] = _get_avg_positives(mixcomp, settings, bb, indices, files, neg_files, descriptor, sett) alpha_maps = [] args = [(index, mixcomp, files, im_size, bb, duplicates, neg_files, descriptor, sett, extra) for index in indices] #index, mixcomp, files, im_size, bb, duplicates): for pos_feats, neg_feats, alpha in gv.parallel.starmap_unordered(__process_one, args): all_pos_feats.extend(pos_feats) all_neg_feats.extend(neg_feats) alpha_maps.append(alpha) all_neg_feats = np.asarray(all_neg_feats) all_pos_feats = np.asarray(all_pos_feats) #alpha_maps = np.asarray(alpha_maps) #support = alpha_maps.mean(axis=0) return mixcomp, all_neg_feats, all_pos_feats, alpha_maps, extra
def _get_background_model(settings, neg_files): descriptor = gv.load_descriptor(settings) neg_count = settings['detector'].get('train_neg_limit', 50) rs = np.random.RandomState(0) radii = settings['detector']['spread_radii'] psize = settings['detector']['subsample_size'] rotspread = settings['detector'].get('rotation_spreading_radius', 0) cb = settings['detector'].get('crop_border') bkg_counts = np.zeros(descriptor.num_features, dtype=int) count = 0 sett = dict(spread_radii=radii, subsample_size=psize, rotation_spreading_radius=rotspread, crop_border=cb) factors = rs.uniform(0.2, 1.0, size=neg_count) argses = [(neg_files[i], descriptor, sett, factors[i]) for i in xrange(neg_count)] for feats, c in gv.parallel.starmap_unordered(__process_bkg, argses): #for fn in itr.islice(neg_files, neg_count): if 0: im = gv.img.asgray(gv.img.load_image(fn)) # Randomly resize factor = rs.uniform(0.2, 1.0) im = gv.img.resize_with_factor_new(im, factor) ag.info(im.shape) feats = descriptor.extract_features(im, settings=sett) count += np.prod(feats.shape[:2]) bkg_counts += np.apply_over_axes(np.sum, feats, [0, 1]).ravel() count += c bkg_counts += feats assert count > 0, "Did not find any background images!" bkg = bkg_counts.astype(np.float64) / count return bkg
def _create_kernel_for_mixcomp(mixcomp, settings, bb, indices, files, neg_files): im_size = settings['detector']['image_size'] size = gv.bb.size(bb) orig_size = size gen = generate_random_patches(neg_files, size, seed=0) descriptor = gv.load_descriptor(settings) radii = settings['detector']['spread_radii'] psize = settings['detector']['subsample_size'] rotspread = settings['detector'].get('rotation_spreading_radius', 0) duplicates = settings['detector'].get('duplicates', 1) cb = settings['detector'].get('crop_border') crop_image = settings['detector'].get('crop_image') totals = 0 bkg = None kern = None alpha_cum = None setts = dict(spread_radii=radii, subsample_size=psize, rotation_spreading_radius=rotspread, crop_border=cb) counts = 0 all_b = [] all_X = [] all_s = [] for index in indices: ag.info("Processing image of index {0} and mixture component {1}".format(index, mixcomp)) gray_im, alpha = _load_cad_image(files[index], im_size, bb, crop=crop_image) bin_alpha = (alpha > 0.05).astype(np.uint32) if alpha_cum is None: alpha_cum = bin_alpha else: alpha_cum += bin_alpha for dup in xrange(duplicates): neg_im = gen.next() neg_feats = descriptor.extract_features(neg_im, settings=setts) superimposed_im = neg_im * (1 - alpha) + gray_im * alpha feats = descriptor.extract_features(superimposed_im, settings=setts) counts += 1 #bkg_feats = gv.sub.subsample(bkg_feats, psize) if bkg is None: bkg = neg_feats.astype(np.uint32) else: bkg += neg_feats #feats = gv.sub.subsample(feats, psize) if kern is None: kern = feats.astype(np.uint32) else: kern += feats # NEW TODO: This throws out low-activity negatives #if abs(neg_feats.mean() - 0.2) < 0.05: #if neg_feats.mean() < 0.05: if True: all_b.append(neg_feats) all_X.append(feats) all_s.append(bin_alpha) totals += 1 ag.info('COUNTS', counts) np.seterr(divide='raise') kern = kern.astype(np.float64) / totals bkg = bkg.astype(np.float64) / totals #kern = kern.astype(np.float64) / total #kern = np.clip(kern, eps, 1-eps) #bkg = bkg.astype(np.float64) / total support = alpha_cum.astype(np.float64) / len(indices) return kern, bkg, orig_size, support
def superimposed_model(settings, threading=True): num_mixtures = settings['detector']['num_mixtures'] # Train a mixture model to get a clustering of the angles of the object descriptor = gv.load_descriptor(settings) detector = gv.BernoulliDetector(num_mixtures, descriptor, settings['detector']) files = get_training_files(detector) neg_files = sorted(glob.glob(settings['detector']['neg_dir'])) ag.info("Checkpoint 1") testing_type = detector.settings.get('testing_type') # Extract clusters (manual or through EM) ############################################################################## detector, comps = cluster(detector, files) each_mix_N = np.bincount(comps, minlength=num_mixtures) ############################################################################## ag.info("Checkpoint 3") ag.info("Checkpoint 4") support = detector.support kernels = [] #ag.info("TODO, quitting") #return detector # Determine bounding boxes ############################################################################## psize = settings['detector']['subsample_size'] bbs = calc_bbs(detector) ag.info("Checkpoint 6") ag.info("Checkpoint 7") bkgs = [] orig_sizes = [] new_support = [] im_size = settings['detector']['image_size'] ag.info("Checkpoint 8") all_negs = [] ag.info("Checkpoint 9") # Retrieve features and support ############################################################################## ag.info('Fetching positives again...') all_pos_feats = [] all_neg_feats = [] alphas = [] all_alphas = [] all_binarized_alphas = [] if settings['detector'].get('superimpose'): detector.extra['concentrations'] = [] argses = [(m, settings, bbs[m], list(np.where(comps == m)[0]), files, neg_files, settings['detector'].get('stand_multiples', 1)) for m in range(detector.num_mixtures)] for mixcomp, neg_feats, pos_feats, alpha_maps, extra in itr.starmap(get_pos_and_neg, argses): alpha = np.mean(alpha_maps, axis=0) alpha_maps = np.asarray(alpha_maps) all_alphas.append(alpha_maps) all_binarized_alphas.append(alpha_maps > 0.05) alphas.append(alpha) all_neg_feats.append(neg_feats) all_pos_feats.append(pos_feats) detector.extra['concentrations'].append(extra.get('concentrations', {})) ag.info('Done.') # Setup some places to store things if 'weights' not in detector.extra: detector.extra['weights'] = [None] * detector.num_mixtures if 'sturf' not in detector.extra: detector.extra['sturf'] = [{} for _ in xrange(detector.num_mixtures)] for m in xrange(detector.num_mixtures): detector.extra['sturf'].append(dict()) obj = all_pos_feats[m].mean(axis=0) bkg = all_neg_feats[m].mean(axis=0) size = gv.bb.size(bbs[m]) kernels.append(obj) bkgs.append(bkg) orig_sizes.append(size) new_support.append(alphas[m]) if 0: for m in xrange(detector.num_mixtures): obj = all_pos_feats[m].mean(axis=0) bkg = all_neg_feats[m].mean(axis=0) size = gv.bb.size(bbs[m]) eps = 0.025 obj = np.clip(obj, eps, 1 - eps) avg = np.clip(avg, eps, 1 - eps) #lmb = obj / avg #w = np.clip(np.log(obj / avg), -1, 1) w = np.log(obj / (1 - obj) * ((1 - avg) / avg)) #w = np.log( #w_avg = np.apply_over_axes(np.sum, w * support[...,np.newaxis], [0, 1]) / support.sum() #w -= w_avg * support[...,np.newaxis] if 'weights' not in detector.extra: detector.extra['weights'] = [] detector.extra['weights'].append(w) if 'sturf' not in detector.extra: detector.extra['sturf'] = [] detector.extra['sturf'].append(dict()) kernels.append(obj) bkgs.append(bkg) orig_sizes.append(size) new_support.append(alphas[m]) detector.settings['per_mixcomp_bkg'] = True else: # Get a single background model for this one bkg = _get_background_model(settings, neg_files) crop_image = detector.settings.get('crop_image') import pdb; pdb.set_trace() argses = [(m, settings, list(np.where(comps == m)[0]), files, crop_image) for m in range(detector.num_mixtures)] for m, pos_feats in gv.parallel.starmap(get_positives, argses): obj = pos_feats.mean(axis=0) all_pos_feats.append(pos_feats) kernels.append(obj) bkgs.append(bkg) size = gv.bb.size(bbs[m]) orig_sizes.append(size) support = np.ones(settings['detector']['image_size']) new_support.append(support) detector.settings['per_mixcomp_bkg'] = True # False # Get weights and support for m in xrange(detector.num_mixtures): #kern = detector.kernel_templates[m] #bkg = detector.fixed_spread_bkg[m] obj = all_pos_feats[m].mean(axis=0) bkg = all_neg_feats[m].mean(axis=0) if detector.eps is None: detector.prepare_eps(bkg) weights = detector.build_clipped_weights(obj, bkg, detector.eps) detector.extra['weights'][m] = weights detector.extra['sturf'][m]['support'] = arrange_support(alphas[m], weights.shape, psize) # Modify weights if not detector.settings.get('plain'): for m in xrange(detector.num_mixtures): weights = detector.extra['weights'][m] F = detector.num_features indices = get_key_points(weights, suppress_radius=detector.settings.get('indices_suppress_radius', 4), even=True) L0 = indices.shape[0] // F kp_weights = np.zeros((L0, F)) M = np.zeros(weights.shape, dtype=np.uint8) counts = np.zeros(F) for index in indices: f = index[2] M[tuple(index)] = 1 kp_weights[counts[f],f] = weights[tuple(index)] counts[f] += 1 #theta = np.load('theta3.npy')[1:-1,1:-1] #th = theta #eth = np.load('empty_theta.npy') #support = 1-th[:,:,np.arange(1,F+1),np.arange(F)].mean(-1) #offset = gv.sub.subsample_offset_shape(alphas[m].shape, psize) support = detector.extra['sturf'][m]['support'] # def subsample_offset_shape(shape, size): pos, neg = all_pos_feats[m].astype(bool), all_neg_feats[m].astype(bool) #avg = np.apply_over_axes( diff = pos ^ neg appeared = pos & ~neg disappeared = ~pos & neg #bs = (support > 0.5)[np.newaxis,...,np.newaxis] A = appeared.mean(0) / (0.00001+((1-neg).mean(0))) D = disappeared.mean(0) / (0.00001+neg.mean(0)) #ss = D.mean(-1)[...,np.newaxis] ss = support[...,np.newaxis] B = (np.apply_over_axes(np.mean, A*ss, [0, 1])).squeeze() / ss.mean() def clogit(x): return gv.logit(gv.bclip(x, 0.025)) def find_zero(fun, l, u, depth=30): m = np.mean([l, u]) if depth == 0: return m v = fun(m) if v > 0: return find_zero(fun, l, m, depth-1) else: return find_zero(fun, m, u, depth-1) # Find zero-crossing #for f in xrange(F): # Now construct weights from these deltas #weights = ((clogit(ss * deltas + A) - clogit(B))) #weights = (ss * (clogit(deltas + pos.mean(0)) - clogit(neg.mean(0)))) avg = np.apply_over_axes(np.mean, pos * M * ss, [1, 2]) / (ss * M).mean() if 0: for l0, l1, f in gv.multirange(*weights.shape): def fun(w): return -(np.clip(pos[:,l0,l1,f].mean(), 0.005, 0.995) - np.mean(expit(w + logit(avg[...,f])))) weights[l0,l1,f] = find_zero(fun, -10, 10) if 1: # Print these to file from matplotlib.pylab import cm grid = gv.plot.ImageGrid(detector.num_features, 1, weights.shape[:2], border_color=(0.5, 0.5, 0.5)) mm = np.fabs(weights).max() for f in xrange(detector.num_features): grid.set_image(weights[...,f], f, 0, vmin=-mm, vmax=mm, cmap=cm.RdBu_r) fn = os.path.join(os.path.expandvars('$HOME'), 'html', 'plots', 'plot2.png') grid.save(fn, scale=10) os.chmod(fn, 0644) #A = appeared.mean(0) / (0.00001+((1-neg).mean(0))) #mm = (A * ss).mean() / ss.mean() #xx = (bs & pos) | (~bs & appeared) #avg = xx.mean(0) weights1 = ss*(weights - np.apply_over_axes(np.mean, weights * ss, [0, 1])/ss.mean()) detector.extra['sturf'][m]['weights1'] = weights1 eps = 0.025 avg_pos = (np.apply_over_axes(np.mean, pos * ss, [0, 1, 2]) / ss.mean()).squeeze().clip(eps, 1-eps) avg_neg = (np.apply_over_axes(np.mean, neg * ss, [0, 1, 2]) / ss.mean()).squeeze().clip(eps, 1-eps) #w_avg = np.apply_over_axes(np.sum, weights * support[...,np.newaxis], [0, 1]) / support.sum() # #w_avg = (logit(np.apply_over_axes(np.mean, pos, [0, 1, 2])) - \ # logit(np.apply_over_axes(np.mean, neg, [0, 1, 2]))).squeeze() w_avg = logit(avg_pos) - logit(avg_neg) detector.extra['sturf'][m]['wavg'] = w_avg detector.extra['sturf'][m]['reweighted'] = (w_avg * support[...,np.newaxis]).squeeze() #weights -= w_avg * support[...,np.newaxis] #weights *= support[...,np.newaxis] * M if 0: weights *= support[...,np.newaxis] avg_weights = np.apply_over_axes(np.mean, weights, [0, 1]) / M.mean(0).mean(0) avg_w = kp_weights.mean(0) weights -= avg_w - (-kp_weights.var(0) / 2) weights *= support[...,np.newaxis] print((weights * M).mean(0)) #weights = (weights - w_avg) * support[...,np.newaxis] #weights -= (w_avg + 0.0) * support[...,np.newaxis] weights -= w_avg * support[...,np.newaxis] F = detector.num_features if 0: for f in xrange(F): #zz = np.random.normal(-1.5, size=(1, 1, 50)) zz = np.random.normal(-1.5, size=(1, 1, 50)).ravel() betas = np.zeros(len(zz)) for i, z in enumerate(zz): def fun(beta): w = weights[...,f] - beta * support return np.log(1 - expit(w[...,np.newaxis] + z)).mean() - np.log(1 - expit(z)) betas[i] = find_zero(fun, -10, 10) if f == 0: np.save('betas.npy', betas) beta0 = betas.mean() print(f, beta0, betas.std()) weights[...,f] -= beta0 * support if 1: # Print these to file from matplotlib.pylab import cm grid = gv.plot.ImageGrid(detector.num_features, 2, weights.shape[:2], border_color=(0.5, 0.5, 0.5)) mm = np.fabs(weights).max() for f in xrange(detector.num_features): grid.set_image(weights[...,f], f, 0, vmin=-mm, vmax=mm, cmap=cm.RdBu_r) grid.set_image(M[...,f], f, 1, vmin=0, vmax=1, cmap=cm.RdBu_r) fn = os.path.join(os.path.expandvars('$HOME'), 'html', 'plots', 'plot.png') grid.save(fn, scale=10) os.chmod(fn, 0644) ag.info('sum', np.fabs(np.apply_over_axes(np.sum, weights, [0, 1])).sum()) # Instead, train model rigorously!! detector.extra['sturf'][m]['pos'] = all_pos_feats[m] detector.extra['sturf'][m]['neg'] = all_neg_feats[m] # Averags of all positives ff = all_pos_feats[m] posavg = np.apply_over_axes(np.sum, all_pos_feats[m] * support[...,np.newaxis], [1, 2]).squeeze() / support.sum() negavg = np.apply_over_axes(np.sum, all_neg_feats[m] * support[...,np.newaxis], [1, 2]).squeeze() / support.sum() S = np.cov(posavg.T) Sneg = np.cov(negavg.T) detector.extra['sturf'][m]['pavg'] = avg_pos detector.extra['sturf'][m]['pos-samples'] = posavg detector.extra['sturf'][m]['S'] = S detector.extra['sturf'][m]['Sneg'] = Sneg detector.extra['sturf'][m]['navg'] = avg_neg Spos = S rs = np.random.RandomState(0) detector.extra['sturf'][m]['Zs'] = rs.multivariate_normal(avg_neg, Sneg, size=1000).clip(min=0.005, max=0.995) detector.extra['sturf'][m]['Zs_pos'] = rs.multivariate_normal(avg_pos, Spos, size=1000).clip(min=0.005, max=0.995) detector.extra['sturf'][m]['Zs_pos2'] = rs.multivariate_normal(avg_pos, Spos * 2, size=1000).clip(min=0.005, max=0.995) detector.extra['sturf'][m]['Zs_pos10'] = rs.multivariate_normal(avg_pos, Spos * 10, size=1000).clip(min=0.005, max=0.995) detector.extra['sturf'][m]['Zs_pos50'] = rs.multivariate_normal(avg_pos, Spos * 50, size=1000).clip(min=0.005, max=0.995) #{{{ if 0: argses = [(m, settings, bbs[m], np.where(comps == m)[0], files, neg_files) for m in xrange(detector.num_mixtures)] for kern, bkg, orig_size, sup in gv.parallel.starmap(_create_kernel_for_mixcomp, argses): kernels.append(kern) bkgs.append(bkg) orig_sizes.append(orig_size) new_support.append(sup) ag.info("Checkpoint 10") detector.settings['per_mixcomp_bkg'] = True #}}} detector.kernel_templates = kernels detector.kernel_sizes = orig_sizes detector.settings['kernel_ready'] = True detector.use_alpha = False detector.support = new_support # Determine the background ag.info("Determining background") detector.fixed_bkg = None detector.fixed_spread_bkg = bkgs detector.settings['bkg_type'] = 'from-file' detector._preprocess() detector.prepare_eps(detector.fixed_spread_bkg[0]) # Determine the standardization values ag.info("Determining standardization values") #fixed_train_mean = np.zeros(detector.num_mixtures) #detector.fixed_train_mean = [] #fixed_train_std = np.ones(detector.num_mixtures) # Determine indices for coarse detection sweep if INDICES: detector.indices = [] for m in xrange(detector.num_mixtures): these_indices = [] weights = detector.extra['weights'][m] ag.info('Indices:', np.prod(weights.shape)) # If not plain, we need even keypoints even = not detector.settings.get('plain') indices = get_key_points(weights, suppress_radius=detector.settings.get('indices_suppress_radius', 4), even=even) if not detector.settings.get('plain'): detector.extra['weights'][m] = weights assert len(indices) > 0, "No indices were extracted when keypointing" detector.indices.append(indices) else: detector.indices = None if testing_type in ('fixed', 'non-parametric'): detector.standardization_info = [] if testing_type == 'fixed': if detector.settings.get('standardize_with_samples'): detector.standardization_info = [dict(mean=0, std=1)] * detector.num_mixtures info = [] source = detector.settings.get('standardize_negative_source', 'neg-dir') N = detector.settings.get('standardize_num_images', 50) if source.startswith('voc-train-non-'): obj_class = source.split('-')[-1] print('Taking negatives from voc train, without class', obj_class) gen = gv.voc.gen_negative_files(obj_class, 'train') #print('negatives', len([im for im in gen])) else: print('Taking negatives from neg_dir') gen = itr.cycle(gv.datasets.ImgFile(path=fn, img_id=os.path.basename(fn)) for fn in neg_files) gen = itr.cycle(gen) gen = itr.islice(gen, N) gens = itr.tee(gen, detector.num_mixtures) th = -np.inf for m in xrange(detector.num_mixtures): neg_files_segment = gens[m] argses = [(detector, i, fileobj, th, m) for i, fileobj in enumerate(neg_files_segment)] topsy = list(gv.parallel.starmap_unordered(get_strong_fps_single, argses)) confs = np.asarray([bbobj.confidence for topsy_m in topsy for bbobj in topsy_m]) info.append(dict(mean=confs.mean(), std=confs.std())) #for m in xrange(detector.num_mixtures): detector.standardization_info = info else: argses = [(m, settings, detector.eps, bbs[m], kernels[m], bkgs[m], None, None, None, detector.indices[m] if INDICES else None, 3) for m in xrange(detector.num_mixtures)] detector.standardization_info = list(gv.parallel.starmap(_calc_standardization_for_mixcomp, argses)) else: raise Exception("Unknown testing type") detector.settings['testing_type'] = testing_type #detector.settings['testing_type'] = 'NEW' #detector. # # Data mine stronger negatives # # TODO: Object class must be input if 1: contest = 'voc' obj_class = 'car' gen = gv.voc.gen_negative_files(obj_class, 'train') else: contest = 'custom-tmp-frontbacks' obj_class = 'bicycle' gen, tot = gv.datasets.load_files(contest, obj_class) import heapq top_bbs = [[] for k in xrange(detector.num_mixtures)] TOP_N = 10000 if detector.settings.get('cascade'): # New SVM attempt detector.extra['cascade_threshold'] = detector.settings.get('cascade_threshold', 8) COUNT = detector.settings.get('cascade_farming_count', 500) args = itr.izip( \ itr.repeat(detector), xrange(COUNT), itr.islice(gen, COUNT) ) for res in gv.parallel.starmap_unordered(get_strong_fps, args): for m in xrange(detector.num_mixtures): top_bbs[m].extend(res[m]) ag.info('- TOPS ------') ag.info(map(np.shape, top_bbs) ) detector.extra['top_bbs_shape'] = map(np.shape, top_bbs) # Save the strong negatives detector.extra['negs'] = top_bbs def phi(X, mixcomp): if SVM_INDICES and 0: indices = detector.indices2[mixcomp][0] return X.ravel()[np.ravel_multi_index(indices.T, X.shape)] else: #return gv.sub.subsample(X, (2, 2)).ravel() return X.ravel() all_neg_X0 = [] for k in xrange(detector.num_mixtures): all_neg_X0.append(np.asarray(map(lambda bbobj: phi(bbobj.X, k), top_bbs[k]))) del top_bbs all_pos_X0 = [] for mixcomp, pos_feats in enumerate(all_pos_feats): all_pos_X0.append(np.asarray(map(lambda X: phi(X, mixcomp), pos_feats))) ag.info('Done.') detector.extra['poss'] = all_pos_feats ag.info('Training SVMs...') # Train SVMs #from sklearn.svm import LinearSVC from sklearn.svm import LinearSVC, SVC clfs = [] detector.indices2 = None # not [] for now #all_neg_X0 = [[bbobj.X for bbobj in top_bbs[m]] for m in xrange(detector.num_mixtures)] detector.extra['svms'] = [] for m in xrange(detector.num_mixtures): X = np.concatenate([all_pos_X0[m], all_neg_X0[m]]) # Flatten ag.info(m, ':', X.shape) #X = phi(X, k) ag.info(m, '>', X.shape) y = np.concatenate([np.ones(len(all_pos_feats[m])), np.zeros(len(all_neg_X0[m]))]) #detector.extra['data_x'].append(X) #detector.extra['data_y'].append(y) from sklearn import cross_validation as cv #C = 5e-8 C = 1.0 #clf = LinearSVC(C=C) #clf = LinearSVC(C=C) clf = SVC(C=C, kernel='linear') clf.fit(X, y) svm_info = dict(intercept=float(clf.intercept_), weights=clf.coef_) detector.extra['svms'].append(svm_info) #sh = all_pos_feats[m][0].shape # Get most significant coefficients #th = smallest_th[k] #th = 0 #detector.extra['svms'].append(dict(svm=clf, th=th, uses_indices=SVM_INDICES)) ag.info('Done.') # Remove negatives and positives from extra, since it takes space if 1: del detector.extra['poss'] del detector.extra['negs'] ag.info('extra') ag.info(detector.extra.keys()) ag.info('eps', detector.eps) #ag.info("THIS IS SO TEMPORARY!!!!!") if 'weights' in detector.extra: #detector.indices = None ag.info(detector.standardization_info) #try: # detector.standardization_info[0]['std'] = 1.0 #except TypeError: # detector.standardization_info = [dict(std=1.0, mean=0.0)] ag.info('corner2', detector.extra['weights'][0][0,0,:5]) return detector
def _process_file_kernel_basis(seed, mixcomp, settings, bb, filename, bkg_stack, bkg_stack_num): ag.info("Processing file ", filename) im_size = settings['detector']['image_size'] size = gv.bb.size(bb) # Use the same seed for all mixture components! That will make them easier to compare, # without having to sample to infinity. # HERE: Make it possible to input data directly! descriptor = gv.load_descriptor(settings) part_size = descriptor.settings['part_size'] radii = settings['detector']['spread_radii'] psize = settings['detector']['subsample_size'] rotspread = settings['detector'].get('rotation_spreading_radius', 0) crop_image = settings['detector'].get('crop_image') cb = settings['detector'].get('crop_border') #sh = (size[0] // psize[0], size[1] // psize[1]) sh = gv.sub.subsample_size_new((size[0]-4, size[1]-4), psize) all_pos_feats = [] F = descriptor.num_features # No coding is also included counts = np.zeros(sh + (F + 1, F), dtype=np.int64) empty_counts = np.zeros((F + 1, F), dtype=np.int64) totals = 0 sett = dict(spread_radii=radii, subsample_size=psize, rotation_spreading_radius=rotspread, crop_border=cb) alpha_maps = [] gray_im, alpha = _load_cad_image(filename, im_size, bb, crop=crop_image) pad = (radii[0] + 2, radii[1] + 2) padded_gray_im = ag.util.zeropad(gray_im, pad) padded_alpha = ag.util.zeropad(alpha, pad) dups = 5 X_pad_size = (part_size[0] + pad[0] * 2, part_size[1] + pad[1] * 2) bkgs = np.empty(((F + 1) * dups,) + X_pad_size) rs = np.random.RandomState(seed) for f in xrange(F + 1): #for f in xrange(1): num = bkg_stack_num[f] for d in xrange(dups): bkg_i = rs.randint(num) bkgs[f*dups+d] = bkg_stack[f,bkg_i] # Do it with no superimposed image, to see what happens to pure background img_with_bkgs = bkgs #ex = descriptor.extract_features(img_with_bkgs[0], settings=sett) parts = np.asarray([descriptor.extract_features(im, settings=sett)[0,0] for im in img_with_bkgs]) for f in xrange(F + 1): hist = parts[f*dups:(f+1)*dups].sum(0) empty_counts[f] += hist if 1: for i, j in itr.product(xrange(sh[0]), xrange(sh[1])): selection = [slice(i * psize[0], i * psize[0] + X_pad_size[0]), slice(j * psize[1], j * psize[1] + X_pad_size[1])] patch = padded_gray_im[selection] alpha_patch = padded_alpha[selection] patch = patch[np.newaxis] alpha_patch = alpha_patch[np.newaxis] img_with_bkgs = patch * alpha_patch + bkgs * (1 - alpha_patch) #ex = descriptor.extract_features(img_with_bkgs[0], settings=sett) parts = np.asarray([descriptor.extract_features(im, settings=sett)[0,0] for im in img_with_bkgs]) #counts[i,j] += parts for f in xrange(F + 1): #for f in xrange(1): #hist = np.bincount(parts[f*dups:(f+1)*dups].ravel(), minlength=F + 1) hist = parts[f*dups:(f+1)*dups].sum(0) counts[i,j,f] += hist totals += dups #support = alpha_maps.mean(axis=0) return counts, empty_counts, totals
def background_adjust_model(settings, bkg_stack, bkg_stack_num, seed=0, threading=True): offset = settings["detector"].get("train_offset", 0) limit = settings["detector"].get("train_limit") num_mixtures = settings["detector"]["num_mixtures"] assert limit is not None, "Must specify limit in the settings file" duplicates = settings["detector"].get("duplicates", 1) files = sorted(glob.glob(settings["detector"]["train_dir"]))[offset : offset + limit] # try: # detector = gv.Detector.load(settings['detector']['file']) # except KeyError: # raise Exception("Need to train the model first") # Create accumulates for each mixture component # TODO: Temporary until multicomp # counts = np.zeros_like(detector.kernel_templates) # num_files = len(files) # num_duplicates = settings['detector'].get('duplicate', 1) # Create several random states, so it's easier to measure # the influence of certain features # Setup unspread bedges settings # X_pad_size = padded_theta.shape[1:3] # for fn in files: # counts += _process_file(settings, bkg_stack, bkg_stack_num, fn) # Train a mixture model to get a clustering of the angles of the object descriptor = gv.load_descriptor(settings) if 0: detector = gv.BernoulliDetector(num_mixtures, descriptor, settings["detector"]) detector.train_from_images(files) plt.clf() ag.plot.images(detector.support) plt.savefig("output/components.png") comps = detector.mixture.mixture_components() each_mix_N = np.bincount(comps, minlength=num_mixtures) comps = np.zeros(len(files)) argses = [(settings, bkg_stack, bkg_stack_num, files[i], comps[i]) for i in xrange(len(files))] # Iterate images all_counts = gv.parallel.imap(_process_file_star, argses) # Can dot this instead: counts = sum(all_counts) # Divide accmulate to get new distribution # counts /= num_files # Create a new model, with this distribution # new_detector = detector.copy() # new_detector.kernel_templates = counts # new_detector.support = None # new_detector.use_alpha = False # Return model # return new_detector return counts, each_mix_N * duplicates, detector.support, detector.mixture
def get_bkg_stack(settings, X_pad_size, M=20): descriptor = gv.load_descriptor(settings) bsettings = settings['edges'].copy() radius = bsettings['radius'] bsettings['radius'] = 0 descriptor_name = settings['detector']['descriptor'] #neg_filenames= sorted(glob.glob(os.path.join(os.environ['UIUC_DIR'], 'TrainImages', 'neg-*.pgm'))) neg_filenames = sorted( glob.glob(os.path.expandvars( settings[descriptor_name]['image_dir']))) * 100 gen_raw = generate_random_patches(neg_filenames, X_pad_size, 0, per_image=25) print descriptor.num_parts bkg_stack_num = np.zeros(descriptor.num_parts + 1) bkg_stack = np.zeros(( descriptor.num_parts + 1, M, ) + X_pad_size) psize = settings['detector']['subsample_size'] radii = settings['detector']['spread_radii'] sett = dict(subsample_size=psize, spread_radii=radii) i = 0 import matplotlib.pylab as plt N = 100000 for patch in gen_raw: #edges = ag.features.bedges(patch, **bsettings) #plt.imshow(patch, interpolation='nearest', cmap=plt.cm.gray) #plt.show() #X_pad_spread = ag.features.bspread(edges, spread=bsettings['spread'], radius=radius) padding = pad - 2 #X_spread = X_pad_spread[padding:-padding,padding:-padding] # Code parts #parts = descriptor.extract_parts(X_spread.astype(np.uint8), edges, settings=sett) parts = descriptor.extract_features(patch, settings=sett) # Accumulate and return if parts[0, 0].sum() == 0: f = 0 else: f = np.argmax(parts[0, 0]) + 1 #cc[f] += 1 # The i%10 is to avoid all background images for f=0 to be from the same image (and thus # likely overlapping patches) if bkg_stack_num[f] < M and (f != 0 or i % 10 == 0): bkg_stack[f, bkg_stack_num[f]] = patch bkg_stack_num[f] += 1 if i % 10000 == 0: print i, bkg_stack_num if bkg_stack_num.min() == M: break i += 1 if i == N: break #print 'i', i #print 'min', sorted(cc)[:10] #cc /= N #print cc[:10] #print bkg[:10] assert i != 0, "No images found" #print cc.sum() #print bkg.sum() return bkg_stack, bkg_stack_num
def get_bkg_stack(settings, X_pad_size, M=20): descriptor = gv.load_descriptor(settings) bsettings = settings['edges'].copy() radius = bsettings['radius'] bsettings['radius'] = 0 descriptor_name = settings['detector']['descriptor'] #neg_filenames= sorted(glob.glob(os.path.join(os.environ['UIUC_DIR'], 'TrainImages', 'neg-*.pgm'))) neg_filenames = sorted(glob.glob(os.path.expandvars(settings[descriptor_name]['image_dir']))) * 100 gen_raw = generate_random_patches(neg_filenames, X_pad_size, 0, per_image=25) print descriptor.num_parts bkg_stack_num = np.zeros(descriptor.num_parts + 1) bkg_stack = np.zeros((descriptor.num_parts + 1, M,) + X_pad_size) psize = settings['detector']['subsample_size'] radii = settings['detector']['spread_radii'] sett = dict(subsample_size=psize, spread_radii=radii) i = 0 import matplotlib.pylab as plt N = 100000 for patch in gen_raw: #edges = ag.features.bedges(patch, **bsettings) #plt.imshow(patch, interpolation='nearest', cmap=plt.cm.gray) #plt.show() #X_pad_spread = ag.features.bspread(edges, spread=bsettings['spread'], radius=radius) padding = pad - 2 #X_spread = X_pad_spread[padding:-padding,padding:-padding] # Code parts #parts = descriptor.extract_parts(X_spread.astype(np.uint8), edges, settings=sett) parts = descriptor.extract_features(patch, settings=sett) # Accumulate and return if parts[0,0].sum() == 0: f = 0 else: f = np.argmax(parts[0,0]) + 1 #cc[f] += 1 # The i%10 is to avoid all background images for f=0 to be from the same image (and thus # likely overlapping patches) if bkg_stack_num[f] < M and (f != 0 or i%10 == 0): bkg_stack[f,bkg_stack_num[f]] = patch bkg_stack_num[f] += 1 if i % 10000 == 0: print i, bkg_stack_num if bkg_stack_num.min() == M: break i += 1 if i == N: break #print 'i', i #print 'min', sorted(cc)[:10] #cc /= N #print cc[:10] #print bkg[:10] assert i != 0, "No images found" #print cc.sum() #print bkg.sum() return bkg_stack, bkg_stack_num
def background_adjust_model(settings, bkg, seed=0): offset = settings['detector'].get('train_offset', 0) limit = settings['detector'].get('train_limit') files = sorted(glob.glob(settings['detector']['train_dir']))[ offset:limit] # * settings['detector'].get('duplicate', 1) try: detector = gv.Detector.load(settings['detector']['file']) except KeyError: raise Exception("Need to train the model first") # We need the descriptor to generate and manipulate images descriptor = gv.load_descriptor(settings) sh = (28, 88) # Create accumulates for each mixture component # TODO: Temporary until multicomp #counts = np.zeros_like(detector.kernel_templates) counts = np.zeros((1, sh[0], sh[1], descriptor.num_parts)) num_files = len(files) num_duplicates = settings['detector'].get('duplicate', 1) # Create several random states, so it's easier to measure # the influence of certain features prnds = [np.random.RandomState(seed + i) for i in xrange(10)] # Setup unspread bedges settings bsettings = settings['edges'].copy() radius = bsettings['radius'] bsettings['radius'] = 0 padding = radius locations0 = xrange(sh[0]) locations1 = xrange(sh[1]) padded_theta = descriptor.unspread_parts_padded X_pad_size = padded_theta.shape[1:3] for fn in files: ag.info("Processing file", fn) # Which mixture component does this image belong to? # TODO: Temporary until multicomp mixcomp = 0 #np.argmax(detector.affinities # Binarize support and Extract alpha color_img, alpha = gv.img.load_image_binarized_alpha(fn) img = gv.img.asgray(color_img) alpha_pad = ag.util.zeropad(alpha, padding) inv_alpha_pad_expanded = np.expand_dims(~alpha_pad, -1) # Iterate every duplicate for loop in xrange(num_duplicates): ag.info("Iteration {0}/{1}".format(loop + 1, num_duplicates)) # Superimpose onto gray background graymap = create_graymap(img.shape, loop / (num_duplicates - 1), prnds[0]) # Composite img_with_gray = composite(img, graymap, alpha) # Retrieve unspread edges (with a given background gray level) edges = ag.features.bedges(img_with_gray, **bsettings) # Pad the edges edges_pad = ag.util.zeropad(edges, (padding, padding, 0)) for i, j in product(locations0, locations1): selection = [ slice(i, i + X_pad_size[0]), slice(j, j + X_pad_size[1]) ] X_pad = edges_pad[selection].copy() nA_pad = inv_alpha_pad_expanded[selection] # Draw background part from categorical distribution f_bkg = weighted_choice_unit(bkg, prnds[1]) probs_bkg = get_probs(padded_theta, f_bkg) probs = nA_pad * probs_bkg # Iterate over all locations # Draw from background edge probability over ~alpha X_pad |= (prnds[2].rand(*probs.shape) < probs) # Do spreading X_pad_spread = ag.features.bspread(X_pad, spread=bsettings['spread'], radius=radius) # De-pad X_spread = X_pad_spread[padding:-padding, padding:-padding] # Code parts parts = descriptor.extract_parts(X_spread.astype(np.uint8)) # Accumulate and return counts[mixcomp, i, j] += parts[0, 0] """ if 0: from multiprocessing import Pool p = Pool(7) mapf = p.map else: mapf = map def _process_file(fn): return _process_file_full(fn, sh, descriptor, detector) # Iterate images all_counts = mapf(_process_file, files) for counti in all_counts: counts += counti """ # Divide accmulate to get new distribution counts /= num_files * num_duplicates # Create a new model, with this distribution new_detector = detector.copy() new_detector.kernel_templates = counts new_detector.support = None new_detector.use_alpha = False # Return model return new_detector
def arrange_model(pos, settings, config, offset=None, mods=None): if offset is None: offset = settings['detector'].get('train_offset', 0) limit = settings['detector'].get('train_limit') if limit is not None: limit += offset nospreading = config.startswith('cor') files = sorted(glob.glob(settings['detector']['train_dir']))[offset:limit] * settings['detector'].get('duplicate', 1) def _load(fn): return load_and_crop(fn, pos) alpha_and_images = map(_load, files) if alpha_and_images[0][0] is None: alpha = None all_alphas = None else: all_alphas = np.asarray(map(itemgetter(0), alpha_and_images)) #all_alphas = np.asarray(map(lambda x: x[0], alpha_and_images)) side = 9+PAD*2 alpha_padded = all_alphas[:,2:-2,2:-2].mean(axis=0) alphas_padded = all_alphas[:,2:-2,2:-2] alpha = all_alphas[:,side//2-4:side//2+1+4,side//2-4:side//2+1+4].mean(axis=0) if 0 and PLOT and alpha is not None: plt.clf() ag.plot.images([alpha]) plt.savefig('outs/alpha.png') #np.save('_alpha.npy', alpha) images = np.asarray(map(itemgetter(1), alpha_and_images)) size = (9+PAD*2,)*2 if config.startswith('bkg'): seed = int(config[3:]) neg_gen = generate_random_patches(neg_filenames, size, seed=seed) for i in xrange(len(images)): # Superimpose it onto the negative patch images[i] = neg_gen.next() elif config.startswith('sup'): seed = int(config[3:]) neg_gen = generate_random_patches(neg_filenames, size, seed=seed) for i in xrange(len(images)): # Superimpose it onto the negative patch images[i] = composite(images[i], neg_gen.next(), all_alphas[i]) elif config == 'none' or config.startswith('cor'): # Add gray background if 1: D = settings['detector'].get('duplicate', 1) c = 0 for i in xrange(len(images)//D): for j in xrange(D): gray = np.ones_like(images[c]) * j / (D - 1) gray = np.clip(gray + np.random.randn(*gray.shape) * 0.0001, 0, 1) images[c] = composite(images[c], gray, all_alphas[c]) c += 1 else: raise ValueError("Unknown config: {0}".format(config)) setts = settings['edges'].copy() if nospreading: setts['radius'] = 0 all_edges_unspread = ag.features.bedges(images, **setts) edge_patch_unspread = all_edges_unspread[:,BUF:-BUF,BUF:-BUF].astype(np.bool) else: edge_patch_unspread = None all_edges = ag.features.bedges(images, **settings['edges']) #edgies = ag.features.bedges(images, **settings['edges'])[:,1:-1,1:-1] #edges = ag.features.bedges(images, **settings['edges']) descriptor = gv.load_descriptor(settings) #radii = settings['detector']['spread_radii'] #feats = np.asarray(map(descriptor.extract_features, images)) edge_patch = all_edges[:,BUF:-BUF,BUF:-BUF] #if mods is not None: #blackout = np.load('_blackout.npy') #blackin = np.load('_blackin.npy') # mask = ~(mods.mean(axis=0).mean(axis=0) > 0.00001) #mask = ag.util.zeropad(~((blackout > 0) | (blackin > 0)), (1, 1, 0)) #mask = ag.util.zeropad(((mods > 0.0001) | (blackin > 0.0001)), (1, 1, 0)) #mask = ag.util.zeropad(~((blackin > 0)), (1, 1, 0)) #edge_patch &= mask #if 0 and PLOT: # edges = all_edges#ag.features.bedges(images, **settings['edges']) # edges_ = np.rollaxis(edges, 3, start=1) # pledges = edges_.reshape((np.prod(edges_.shape[:2]),) + edges_.shape[2:]) # # #ag.plot.images([alpha]) # # #print edges.shape # plt.clf() # ag.plot.images(pledges[:,1:-1,1:-1], subplots=edges_.shape[:2], show=False) # plt.savefig('outs/edges-{0}.png'.format(config)) feats = np.asarray(map(descriptor.extract_parts, edge_patch)) return { 'settings': settings, 'theta': feats[:,0,0].mean(axis=0), 'alpha': alpha, 'alpha_padded': alpha_padded, 'alphas_padded': alphas_padded, 'edges': edge_patch.astype(np.bool), 'edges_unspread': edge_patch_unspread, }
def correct_model(model, bkg=None, model_bkg=None, seed=0, mods=None): settings = model['settings'] feats = model['theta'] alpha = model['alpha'] alpha_padded = model['alpha_padded'] descriptor = gv.load_descriptor(settings) N = settings['detector']['train_limit'] * settings['detector']['duplicate'] num_features = feats.size part_counts = np.zeros(num_features) num_edges = 4 USE_UNSPREAD = True if USE_UNSPREAD: #edges = model['edges_unspread'] edges = model['edges'] else: edges = model['edges'] if alpha is None: alpha = np.ones((9, 9)) p_alpha = alpha p_alpha_padded = alpha_padded p_kernel = feats if bkg is not None: good_back = p_back = bkg else: good_back = p_back = np.load('bkg2_nospread.npy') #ealpha = np.load('_edges.npy').astype(np.bool) Xs = [] #blackout0 = np.load('_blackout.npy') #blackin0 = np.load('_blackin.npy') #bm = ag.stats.BernoulliMixture.load('_mix.npy') #mods = np.load('_mods.npy') neg_gen = generate_random_patches(neg_filenames, (9+PAD*2, 9+PAD*2), seed=seed) if USE_UNSPREAD: #theta = descriptor.parts """ new_theta = np.ones(descriptor.parts.shape) #theta = 1 - (1 - descriptor.parts)**(1/9) sh = descriptor.parts.shape[1:] def cliprange(k, size): return xrange(max(0, k-1), min(size, k+2)) for i in xrange(sh[0]): for j in xrange(sh[1]): for x in cliprange(i, sh[0]): for y in cliprange(j, sh[1]): new_theta[:,i,j] *= 1 - theta[:,x,y] new_theta = 1 - new_theta**(1/81) plt.clf() ag.plot.images([theta[160,...,0], new_theta[160,...,0]]) plt.savefig('outs/debug.png') """ #theta = new_theta # theta = 1 - (1 - theta)**(1/9) padding = settings['edges']['radius'] #padded_theta = ag.util.border_value_pad(descriptor.unspread_parts, (0, padding, padding, 0)) padded_theta = descriptor.unspread_parts_padded else: theta = descriptor.parts cumX = None IN = 10 # Inner loop #import ipdb; ipdb.set_trace() FIXED_OBJ = True for loop in xrange(N): randgen = np.random.RandomState(seed+loop) randgen2 = np.random.RandomState(seed+loop + 4) randgen3 = np.random.RandomState(seed+loop + 23) randgen4 = np.random.RandomState(seed+loop + 100) randgen5 = np.random.RandomState(seed+loop + 231) randgen6 = np.random.RandomState(seed+loop + 232) randgen7 = np.random.RandomState(seed+loop + 232) for inner_loop in xrange(IN): #if loop % 1000 == 0: # print 'loop', loop if not FIXED_OBJ: f_obj = weighted_choice_unit(p_kernel, randgen) probs_obj = get_probs(theta, f_obj) parts = descriptor.extract_parts(edges[loop].astype(np.uint8))[0,0] if parts.sum() > 0: f_obj = np.argmax(parts) else: f_obj = -1 #import pdb; pdb.set_trace() f_bkg = weighted_choice_unit(good_back, randgen) if USE_UNSPREAD: probs_bkg = get_probs(padded_theta, f_bkg) else: probs_bkg = get_probs(theta, f_bkg) if 1: # Draw from the alpha #A = (randgen2.rand(*p_alpha.shape) < p_alpha).astype(np.uint8) #print p_alpha if USE_UNSPREAD: #A = (randgen2.rand() < p_alpha_padded) A = model['alphas_padded'][loop] else: A = (randgen2.rand() < p_alpha) #A = (0.5 < p_alpha).astype(np.uint8) #if FIXED_OBJ: #A = ~ag.util.inflate2d(~A, np.ones((3, 3))) #AA = A.reshape(A.shape + (1,)).astype(np.bool) #print 'AA:', AA.sum() """ if 0 and loop <= 5: plt.clf() ag.plot.images([AA[...,0]]) plt.savefig('outs/alpha-{1}-{0}.png'.format(inner_loop, loop)) """ if FIXED_OBJ: if not USE_UNSPREAD: #A = ~ag.util.inflate2d(~A, np.ones((3, 3))).astype(np.bool) Ab = np.tile(np.expand_dims(A, -1), 4) Ab = ~ag.features.bspread(~Ab, spread=settings['edges']['spread'], radius=settings['edges']['radius']).astype(np.bool) AA = Ab else: AA = np.expand_dims(A, -1).astype(np.bool) if USE_UNSPREAD: #AApad = ag.util.border_value_pad(AA, (padding, padding, 0)) AApad = AA if FIXED_OBJ: #probs_mixed = ag.util.inflate2d(~AA, np.ones((17, 17))) * probs_bkg if USE_UNSPREAD: probs_mixed = ~AApad * probs_bkg else: probs_mixed = ~AA * probs_bkg #probs_mixed = ~AA * probs_bkg else: probs_mixed = AA * probs_obj + ~AA * probs_bkg """ if 0 and loop <= 5: plt.clf() ag.plot.images([probs_mixed[...,0]==0]) plt.savefig('outs/alpha-{1}-{0}b.png'.format(inner_loop, loop)) """ if 1: #if f_obj != -1:# or f_bkg != -1: #print probs_mixed.shape if not FIXED_OBJ: X = (randgen3.rand(*probs_mixed.shape) < probs_mixed) else: if USE_UNSPREAD: X = np.zeros((9+padding*2, 9+padding*2, 4), dtype=np.bool) else: X = np.zeros(edges.shape[1:], dtype=np.bool) X0 = X.copy() if 0: Y = model_bkg['edges'][loop] X |= ~AA & Y # What f_bkg is this? f_bkg = np.argmax(descriptor.extract_parts(Y.astype(np.uint8))[0,0]) elif f_bkg != -1: # Draw samples from the mixture components X |= (randgen3.rand(*X.shape) < probs_mixed) #print 'bkg:', X.sum() #X = (randgen3.rand() < probs_mixed).astype(np.uint8) #X = (1 - AA) * ag.features.bedges(neg_gen.next(), **settings['edges'])[1:-1,1:-1] X0 = X.copy() #X[1:-1,1:-1] |= edges[loop] #X *= (1 - ealpha) #r = randgen4.uniform(0, 0.5) #mask = ~ealpha | ~(randgen4.rand(*X.shape) < 0.28) #print '----' #print np.rollaxis(mask, 2) #X &= mask #X &= ((blackout0 > 0.0001) | (blackin0 > 0.0001)) #print 'sum:', np.sum(~(X ^ X0)) # Draw which blackout/in component if 0: f_comp = weighted_choice_unit(bm.weights, randgen6) assert f_comp >= 0 blackout = bm.templates[f_comp,0] blackin = bm.templates[f_comp,1] elif 0 and f_bkg != -1: blackout = mods[f_bkg,0] blackin = mods[f_bkg,1] mask = ~(randgen4.rand(*X.shape) < blackout) mask2 = (randgen5.rand(*X.shape) < blackin) #mask = ~(randgen4.rand() < blackout) #Xmask = X & mask Xmask2 = ~X & mask2 X &= mask X |= Xmask2 #if randgen7.rand()>0.5: #else: #X &= mask #X |= mask2 #mask = ~(mods.mean(axis=0).mean(axis=0) > 0.00001) #X &= mask #X &= ~((blackout > 0) | (blackin > 0)) #X &= ~((blackin > 0)) if loop == 0: plt.clf() ag.plot.images(np.rollaxis(X, 2)) plt.savefig('outs/pre-{0}.png'.format(inner_loop)) # Now, do edge spreading! if USE_UNSPREAD: X = ag.features.bspread(X, spread=settings['edges']['spread'], radius=settings['edges']['radius']) # Now, take the window X = X[padding:-padding,padding:-padding] X |= edges[loop] if loop == 0: plt.clf() ag.plot.images(np.rollaxis(X, 2)) plt.savefig('outs/post-{0}.png'.format(inner_loop)) #if PLOT: # Xs.append(X) if cumX is None: cumX = X.astype(int) else: cumX += X #print X parts = descriptor.extract_parts(X.astype(np.uint8))[0,0] if parts.sum() > 0: f_res = np.argmax(parts) else: f_res = -1 #print 'bkg: {0}, obj: {1}, res: {2}'.format(f_bkg, f_obj, f_res) part_counts += parts if 0: if X.sum() >= settings['parts']['threshold']: # Check which part this is most similar to scores = np.apply_over_axes(np.sum, X * np.log(descriptor.parts) + (1 - X) * np.log(1 - descriptor.parts), [1, 2, 3]).ravel() f_best = np.argmax(scores) #f_best = np.argmax(np.apply_over_axes(np.sum, np.fabs(self.descriptor.parts - X), [1, 2, 3]).ravel()) part_counts[f_best] += 1 #p = _integrate(integral_aa_log[mixcomp], i, j, i+istep, j+jstep) #if f_bkg != -1: # part_counts[f_bkg] += 1 #part_counts += descriptor.extract_parts(X)[0,0] # Or do it this way: #feats = descriptor.extract_parts(X) #print f_best, feats new_feats = part_counts / (N * IN) if PLOT: plt.clf() ag.plot.images(np.rollaxis(cumX, 2)/(N * IN)) plt.savefig('outs/mean-cor.png') if PLOT and 0: plt.clf() Xs = np.asarray(Xs) Xs_ = np.rollaxis(Xs, 3, start=1) plXs = Xs_.reshape((np.prod(Xs_.shape[:2]),) + Xs_.shape[2:]) ag.plot.images(plXs, subplots=Xs_.shape[:2], show=False) plt.savefig('outs/corrected.png') new_model = model.copy() new_model['theta'] = new_feats new_model['alpha'] = None new_model['alpha_padded'] = None return new_model
def _process_file(settings, bkg_stack, bkg_stack_num, fn, mixcomp): ag.info("Processing file", fn) seed = np.abs(hash(fn) % 123124) descriptor_name = settings['detector']['descriptor'] img_size = settings['detector']['image_size'] part_size = settings[descriptor_name]['part_size'] psize = settings['detector']['subsample_size'] # The 4 is for the edge border that falls off #orig_sh = (img_size[0] - part_size[0] - 4 + 1, img_size[1] - part_size[1] - 4 + 1) orig_sh = img_size sh = gv.sub.subsample_size(np.ones(orig_sh), psize) # We need the descriptor to generate and manipulate images descriptor = gv.load_descriptor(settings) counts = np.zeros((settings['detector']['num_mixtures'], sh[0], sh[1], descriptor.num_parts + 1, descriptor.num_parts), dtype=np.uint16) prnds = [np.random.RandomState(seed + i) for i in xrange(5)] # Binarize support and Extract alpha #color_img, alpha = gv.img.load_image_binarized_alpha(fn) color_img = gv.img.load_image(fn) from skimage.transform import pyramid_reduce, pyramid_expand f = color_img.shape[0] / settings['detector']['image_size'][0] if f > 1: color_img = pyramid_reduce(color_img, downscale=f) elif f < 1: color_img = pyramid_expand(color_img, upscale=1 / f) alpha = color_img[..., 3] img = gv.img.asgray(color_img) # Resize it # TODO: This only looks at the first axis assert img.shape == settings['detector'][ 'image_size'], "Target size not achieved: {0} != {1}".format( img.shape, settings['detector']['image_size']) # Settings bsettings = settings['edges'].copy() radius = bsettings['radius'] bsettings['radius'] = 0 #offsets = gv.sub.subsample_offset_shape(sh, psize) #locations0 = xrange(offsets[0], sh[0], psize[0]) #locations1 = xrange(offsets[1], sh[1], psize[1]) locations0 = xrange(sh[0]) locations1 = xrange(sh[1]) #locations0 = xrange(10-4, 10+5) #locations1 = xrange(10-4, 10+5) #locations0 = xrange(10, 11) #locations1 = xrange(10, 11) #padded_theta = descriptor.unspread_parts_padded #pad = 10 pad = 5 size = settings[descriptor_name]['part_size'] X_pad_size = (size[0] + pad * 2, size[1] + pad * 2) img_pad = ag.util.zeropad(img, pad) alpha_pad = ag.util.zeropad(alpha, pad) # Iterate every duplicate dups = settings['detector'].get('duplicates', 1) bkgs = np.empty(((descriptor.num_parts + 1) * dups, ) + X_pad_size) #cads = np.empty((descriptor.num_parts,) + X_pad_size) #alphas = np.empty((descriptor.num_parts,) + X_pad_size, dtype=np.bool) radii = settings['detector']['spread_radii'] psize = settings['detector']['subsample_size'] cb = settings['detector'].get('crop_border') sett = dict(spread_radii=radii, subsample_size=psize, crop_border=cb) plt.clf() plt.imshow(img) plt.savefig('output/img.png') if 0: # NEW{ totfeats = np.zeros(sh + (descriptor.num_parts, ) * 2) for f in xrange(descriptor.num_parts): num = bkg_stack_num[f] for d in xrange(dups): feats = np.zeros(sh + (descriptor.num_parts, ), dtype=np.uint8) for i, j in itr.product(locations0, locations1): x = i * psize[0] y = i * psize[1] bkg_i = prnds[4].randint(num) bkg = bkg_stack[f, bkg_i] selection = [ slice(x, x + X_pad_size[0]), slice(y, y + X_pad_size[1]) ] #X_pad = edges_pad[selection].copy() patch = img_pad[selection] alpha_patch = alpha_pad[selection] #patch = np.expand_dims(patch, 0) #alpha_patch = np.expand_dims(alpha_patch, 0) # TODO: Which one? #img_with_bkg = patch + bkg * (1 - alpha_patch) img_with_bkg = patch * alpha_patch + bkg * (1 - alpha_patch) edges_pads = ag.features.bedges(img_with_bkg, **bsettings) X_pad_spreads = ag.features.bspread( edges_pads, spread=bsettings['spread'], radius=radius) padding = pad - 2 X_spreads = X_pad_spreads[padding:-padding:, padding:-padding] partprobs = ag.features.code_parts( X_spreads, descriptor._log_parts, descriptor._log_invparts, descriptor.settings['threshold'], descriptor.settings['patch_frame']) part = partprobs.argmax() if part > 0: feats[i, j, part - 1] = 1 # Now spread the parts feats = ag.features.bspread(feats, spread='box', radius=2) totfeats[:, :, f] += feats # } kernels = totfeats[:, :, 0].astype( np.float32) / (descriptor.num_parts * dups) # Subsample kernels sub_kernels = gv.sub.subsample(kernels, psize, skip_first_axis=False) np.save('tmp2.npy', sub_kernels) print 'saved tmp2.npy' import sys sys.exit(0) #ag.info("Iteration {0}/{1}".format(loop+1, num_duplicates)) #ag.info("Iteration") for i, j in itr.product(locations0, locations1): x = i * psize[0] y = i * psize[1] print 'processing', i, j selection = [slice(x, x + X_pad_size[0]), slice(y, y + X_pad_size[1])] #X_pad = edges_pad[selection].copy() patch = img_pad[selection] alpha_patch = alpha_pad[selection] patch = np.expand_dims(patch, 0) alpha_patch = np.expand_dims(alpha_patch, 0) for f in xrange(descriptor.num_parts + 1): num = bkg_stack_num[f] for d in xrange(dups): bkg_i = prnds[4].randint(num) bkgs[f * dups + d] = bkg_stack[f, bkg_i] img_with_bkgs = patch * alpha_patch + bkgs * (1 - alpha_patch) if 0: edges_pads = ag.features.bedges(img_with_bkgs, **bsettings) X_pad_spreads = ag.features.bspread(edges_pads, spread=bsettings['spread'], radius=radius) padding = pad - 2 X_spreads = X_pad_spreads[:, padding:-padding:, padding:-padding] #partprobs = ag.features.code_parts_many(X_spreads, descriptor._log_parts, descriptor._log_invparts, #descriptor.settings['threshold'], descriptor.settings['patch_frame']) #parts = partprobs.argmax(axis=-1) parts = np.asarray([ descriptor.extract_features(im, settings=sett)[0, 0] for im in img_with_bkgs ]) for f in xrange(descriptor.num_parts + 1): hist = np.bincount(parts[f * dups:(f + 1) * dups].ravel(), minlength=descriptor.num_parts + 1) counts[mixcomp, i, j, f] += hist[1:] #import pdb; pdb.set_trace() #for f in xrange(descriptor.num_parts): # for d in xrange(dups): # # Code parts # #parts = descriptor.extract_parts(X_spreads[f*dups+d].astype(np.uint8)) # # f_plus = parts[f*dups+d] # if f_plus > 0: #tau = self.settings.get('tau') #if self.settings.get('tau'): #parts = partprobs.argmax(axis=-1) # Accumulate and return # counts[mixcomp,i,j,f,f_plus-1] += 1#parts[0,0] if 0: kernels = counts[:, :, :, 0].astype( np.float32) / (descriptor.num_parts * dups) import pdb pdb.set_trace() radii = (2, 2) aa_log = np.log(1 - kernels) aa_log = ag.util.zeropad(aa_log, (0, radii[0], radii[1], 0)) integral_aa_log = aa_log.cumsum(1).cumsum(2) offsets = gv.sub.subsample_offset(kernels[0], psize) if 1: # Fix kernels istep = 2 * radii[0] jstep = 2 * radii[1] sh = kernels.shape[1:3] for mixcomp in xrange(1): # Note, we are going in strides of psize, given a certain offset, since # we will be subsampling anyway, so we don't need to do the rest. for i in xrange(offsets[0], sh[0], psize[0]): for j in xrange(offsets[1], sh[1], psize[1]): p = gv.img.integrate(integral_aa_log[mixcomp], i, j, i + istep, j + jstep) kernels[mixcomp, i, j] = 1 - np.exp(p) # Subsample kernels sub_kernels = gv.sub.subsample(kernels, psize, skip_first_axis=True) np.save('tmp.npy', sub_kernels) print 'saved tmp.npy' import sys sys.exit(0) if 0: for f in xrange(descriptor.num_parts): # Pick only one background for this part and file num = bkg_stack_num[f] # Assumes num > 0 bkg_i = prnds[4].randint(num) bkgmap = bkg_stack[f, bkg_i] # Composite img_with_bkg = gv.img.composite(patch, bkgmap, alpha_patch) # Retrieve unspread edges (with a given background gray level) edges_pad = ag.features.bedges(img_with_bkg, **bsettings) # Pad the edges #edges_pad = ag.util.zeropad(edges, (pad, pad, 0)) # Do spreading X_pad_spread = ag.features.bspread(edges_pad, spread=bsettings['spread'], radius=radius) # De-pad padding = pad - 2 X_spread = X_pad_spread[padding:-padding, padding:-padding] # Code parts parts = descriptor.extract_parts(X_spread.astype(np.uint8)) # Accumulate and return counts[mixcomp, i, j, f] += parts[0, 0] # Translate counts to spread counts (since we're assuming independence of samples within one CAD image) return counts
def background_adjust_model(settings, bkg_stack, bkg_stack_num, seed=0, threading=True): offset = settings['detector'].get('train_offset', 0) limit = settings['detector'].get('train_limit') num_mixtures = settings['detector']['num_mixtures'] assert limit is not None, "Must specify limit in the settings file" duplicates = settings['detector'].get('duplicates', 1) files = sorted(glob.glob( settings['detector']['train_dir']))[offset:offset + limit] #try: # detector = gv.Detector.load(settings['detector']['file']) #except KeyError: # raise Exception("Need to train the model first") # Create accumulates for each mixture component # TODO: Temporary until multicomp #counts = np.zeros_like(detector.kernel_templates) #num_files = len(files) #num_duplicates = settings['detector'].get('duplicate', 1) # Create several random states, so it's easier to measure # the influence of certain features # Setup unspread bedges settings #X_pad_size = padded_theta.shape[1:3] #for fn in files: #counts += _process_file(settings, bkg_stack, bkg_stack_num, fn) # Train a mixture model to get a clustering of the angles of the object descriptor = gv.load_descriptor(settings) if 0: detector = gv.BernoulliDetector(num_mixtures, descriptor, settings['detector']) detector.train_from_images(files) plt.clf() ag.plot.images(detector.support) plt.savefig('output/components.png') comps = detector.mixture.mixture_components() each_mix_N = np.bincount(comps, minlength=num_mixtures) comps = np.zeros(len(files)) argses = [(settings, bkg_stack, bkg_stack_num, files[i], comps[i]) for i in xrange(len(files))] # Iterate images all_counts = gv.parallel.imap(_process_file_star, argses) # Can dot this instead: counts = sum(all_counts) # Divide accmulate to get new distribution #counts /= num_files # Create a new model, with this distribution #new_detector = detector.copy() #new_detector.kernel_templates = counts #new_detector.support = None #new_detector.use_alpha = False # Return model #return new_detector return counts, each_mix_N * duplicates, detector.support, detector.mixture
def _process_file(settings, bkg_stack, bkg_stack_num, fn, mixcomp): ag.info("Processing file", fn) seed = np.abs(hash(fn) % 123124) descriptor_name = settings["detector"]["descriptor"] img_size = settings["detector"]["image_size"] part_size = settings[descriptor_name]["part_size"] psize = settings["detector"]["subsample_size"] # The 4 is for the edge border that falls off # orig_sh = (img_size[0] - part_size[0] - 4 + 1, img_size[1] - part_size[1] - 4 + 1) orig_sh = img_size sh = gv.sub.subsample_size(np.ones(orig_sh), psize) # We need the descriptor to generate and manipulate images descriptor = gv.load_descriptor(settings) counts = np.zeros( (settings["detector"]["num_mixtures"], sh[0], sh[1], descriptor.num_parts + 1, descriptor.num_parts), dtype=np.uint16, ) prnds = [np.random.RandomState(seed + i) for i in xrange(5)] # Binarize support and Extract alpha # color_img, alpha = gv.img.load_image_binarized_alpha(fn) color_img = gv.img.load_image(fn) from skimage.transform import pyramid_reduce, pyramid_expand f = color_img.shape[0] / settings["detector"]["image_size"][0] if f > 1: color_img = pyramid_reduce(color_img, downscale=f) elif f < 1: color_img = pyramid_expand(color_img, upscale=1 / f) alpha = color_img[..., 3] img = gv.img.asgray(color_img) # Resize it # TODO: This only looks at the first axis assert img.shape == settings["detector"]["image_size"], "Target size not achieved: {0} != {1}".format( img.shape, settings["detector"]["image_size"] ) # Settings bsettings = settings["edges"].copy() radius = bsettings["radius"] bsettings["radius"] = 0 # offsets = gv.sub.subsample_offset_shape(sh, psize) # locations0 = xrange(offsets[0], sh[0], psize[0]) # locations1 = xrange(offsets[1], sh[1], psize[1]) locations0 = xrange(sh[0]) locations1 = xrange(sh[1]) # locations0 = xrange(10-4, 10+5) # locations1 = xrange(10-4, 10+5) # locations0 = xrange(10, 11) # locations1 = xrange(10, 11) # padded_theta = descriptor.unspread_parts_padded # pad = 10 pad = 5 size = settings[descriptor_name]["part_size"] X_pad_size = (size[0] + pad * 2, size[1] + pad * 2) img_pad = ag.util.zeropad(img, pad) alpha_pad = ag.util.zeropad(alpha, pad) # Iterate every duplicate dups = settings["detector"].get("duplicates", 1) bkgs = np.empty(((descriptor.num_parts + 1) * dups,) + X_pad_size) # cads = np.empty((descriptor.num_parts,) + X_pad_size) # alphas = np.empty((descriptor.num_parts,) + X_pad_size, dtype=np.bool) radii = settings["detector"]["spread_radii"] psize = settings["detector"]["subsample_size"] cb = settings["detector"].get("crop_border") sett = dict(spread_radii=radii, subsample_size=psize, crop_border=cb) plt.clf() plt.imshow(img) plt.savefig("output/img.png") if 0: # NEW{ totfeats = np.zeros(sh + (descriptor.num_parts,) * 2) for f in xrange(descriptor.num_parts): num = bkg_stack_num[f] for d in xrange(dups): feats = np.zeros(sh + (descriptor.num_parts,), dtype=np.uint8) for i, j in itr.product(locations0, locations1): x = i * psize[0] y = i * psize[1] bkg_i = prnds[4].randint(num) bkg = bkg_stack[f, bkg_i] selection = [slice(x, x + X_pad_size[0]), slice(y, y + X_pad_size[1])] # X_pad = edges_pad[selection].copy() patch = img_pad[selection] alpha_patch = alpha_pad[selection] # patch = np.expand_dims(patch, 0) # alpha_patch = np.expand_dims(alpha_patch, 0) # TODO: Which one? # img_with_bkg = patch + bkg * (1 - alpha_patch) img_with_bkg = patch * alpha_patch + bkg * (1 - alpha_patch) edges_pads = ag.features.bedges(img_with_bkg, **bsettings) X_pad_spreads = ag.features.bspread(edges_pads, spread=bsettings["spread"], radius=radius) padding = pad - 2 X_spreads = X_pad_spreads[padding:-padding:, padding:-padding] partprobs = ag.features.code_parts( X_spreads, descriptor._log_parts, descriptor._log_invparts, descriptor.settings["threshold"], descriptor.settings["patch_frame"], ) part = partprobs.argmax() if part > 0: feats[i, j, part - 1] = 1 # Now spread the parts feats = ag.features.bspread(feats, spread="box", radius=2) totfeats[:, :, f] += feats # } kernels = totfeats[:, :, 0].astype(np.float32) / (descriptor.num_parts * dups) # Subsample kernels sub_kernels = gv.sub.subsample(kernels, psize, skip_first_axis=False) np.save("tmp2.npy", sub_kernels) print "saved tmp2.npy" import sys sys.exit(0) # ag.info("Iteration {0}/{1}".format(loop+1, num_duplicates)) # ag.info("Iteration") for i, j in itr.product(locations0, locations1): x = i * psize[0] y = i * psize[1] print "processing", i, j selection = [slice(x, x + X_pad_size[0]), slice(y, y + X_pad_size[1])] # X_pad = edges_pad[selection].copy() patch = img_pad[selection] alpha_patch = alpha_pad[selection] patch = np.expand_dims(patch, 0) alpha_patch = np.expand_dims(alpha_patch, 0) for f in xrange(descriptor.num_parts + 1): num = bkg_stack_num[f] for d in xrange(dups): bkg_i = prnds[4].randint(num) bkgs[f * dups + d] = bkg_stack[f, bkg_i] img_with_bkgs = patch * alpha_patch + bkgs * (1 - alpha_patch) if 0: edges_pads = ag.features.bedges(img_with_bkgs, **bsettings) X_pad_spreads = ag.features.bspread(edges_pads, spread=bsettings["spread"], radius=radius) padding = pad - 2 X_spreads = X_pad_spreads[:, padding:-padding:, padding:-padding] # partprobs = ag.features.code_parts_many(X_spreads, descriptor._log_parts, descriptor._log_invparts, # descriptor.settings['threshold'], descriptor.settings['patch_frame']) # parts = partprobs.argmax(axis=-1) parts = np.asarray([descriptor.extract_features(im, settings=sett)[0, 0] for im in img_with_bkgs]) for f in xrange(descriptor.num_parts + 1): hist = np.bincount(parts[f * dups : (f + 1) * dups].ravel(), minlength=descriptor.num_parts + 1) counts[mixcomp, i, j, f] += hist[1:] # import pdb; pdb.set_trace() # for f in xrange(descriptor.num_parts): # for d in xrange(dups): # # Code parts # #parts = descriptor.extract_parts(X_spreads[f*dups+d].astype(np.uint8)) # # f_plus = parts[f*dups+d] # if f_plus > 0: # tau = self.settings.get('tau') # if self.settings.get('tau'): # parts = partprobs.argmax(axis=-1) # Accumulate and return # counts[mixcomp,i,j,f,f_plus-1] += 1#parts[0,0] if 0: kernels = counts[:, :, :, 0].astype(np.float32) / (descriptor.num_parts * dups) import pdb pdb.set_trace() radii = (2, 2) aa_log = np.log(1 - kernels) aa_log = ag.util.zeropad(aa_log, (0, radii[0], radii[1], 0)) integral_aa_log = aa_log.cumsum(1).cumsum(2) offsets = gv.sub.subsample_offset(kernels[0], psize) if 1: # Fix kernels istep = 2 * radii[0] jstep = 2 * radii[1] sh = kernels.shape[1:3] for mixcomp in xrange(1): # Note, we are going in strides of psize, given a certain offset, since # we will be subsampling anyway, so we don't need to do the rest. for i in xrange(offsets[0], sh[0], psize[0]): for j in xrange(offsets[1], sh[1], psize[1]): p = gv.img.integrate(integral_aa_log[mixcomp], i, j, i + istep, j + jstep) kernels[mixcomp, i, j] = 1 - np.exp(p) # Subsample kernels sub_kernels = gv.sub.subsample(kernels, psize, skip_first_axis=True) np.save("tmp.npy", sub_kernels) print "saved tmp.npy" import sys sys.exit(0) if 0: for f in xrange(descriptor.num_parts): # Pick only one background for this part and file num = bkg_stack_num[f] # Assumes num > 0 bkg_i = prnds[4].randint(num) bkgmap = bkg_stack[f, bkg_i] # Composite img_with_bkg = gv.img.composite(patch, bkgmap, alpha_patch) # Retrieve unspread edges (with a given background gray level) edges_pad = ag.features.bedges(img_with_bkg, **bsettings) # Pad the edges # edges_pad = ag.util.zeropad(edges, (pad, pad, 0)) # Do spreading X_pad_spread = ag.features.bspread(edges_pad, spread=bsettings["spread"], radius=radius) # De-pad padding = pad - 2 X_spread = X_pad_spread[padding:-padding, padding:-padding] # Code parts parts = descriptor.extract_parts(X_spread.astype(np.uint8)) # Accumulate and return counts[mixcomp, i, j, f] += parts[0, 0] # Translate counts to spread counts (since we're assuming independence of samples within one CAD image) return counts
def background_adjust_model(settings, bkg, seed=0): offset = settings["detector"].get("train_offset", 0) limit = settings["detector"].get("train_limit") files = sorted(glob.glob(settings["detector"]["train_dir"]))[ offset:limit ] # * settings['detector'].get('duplicate', 1) try: detector = gv.Detector.load(settings["detector"]["file"]) except KeyError: raise Exception("Need to train the model first") # We need the descriptor to generate and manipulate images descriptor = gv.load_descriptor(settings) sh = (28, 88) # Create accumulates for each mixture component # TODO: Temporary until multicomp # counts = np.zeros_like(detector.kernel_templates) counts = np.zeros((1, sh[0], sh[1], descriptor.num_parts)) num_files = len(files) num_duplicates = settings["detector"].get("duplicate", 1) # Create several random states, so it's easier to measure # the influence of certain features prnds = [np.random.RandomState(seed + i) for i in xrange(10)] # Setup unspread bedges settings bsettings = settings["edges"].copy() radius = bsettings["radius"] bsettings["radius"] = 0 locations0 = xrange(sh[0]) locations1 = xrange(sh[1]) padded_theta = descriptor.unspread_parts_padded # pad = 10 pad = 5 X_pad_size = (9 + pad * 2,) * 2 # X_pad_size = padded_theta.shape[1:3] neg_filenames = sorted(glob.glob(os.path.join(os.environ["UIUC_DIR"], "TrainImages", "neg-*.pgm"))) gen_raw = generate_random_patches(neg_filenames, X_pad_size, seed) # Pre-generate a bunch of background patches and loop them. bkgs = [gen_raw.next() for i in xrange(2000)] def new_gen(): i = 0 while True: yield bkgs[i] i += 1 if i == 2000: i = 0 gen = new_gen() for seed, fn in enumerate(files): ag.info("Processing file", fn) # Which mixture component does this image belong to? # TODO: Temporary until multicomp mixcomp = 0 # np.argmax(detector.affinities # Binarize support and Extract alpha color_img, alpha = gv.img.load_image_binarized_alpha(fn) img = gv.img.asgray(color_img) img_pad = ag.util.zeropad(img, pad) alpha_pad = ag.util.zeropad(alpha, pad) inv_alpha_pad_expanded = np.expand_dims(~alpha_pad, -1) # Iterate every duplicate # ag.info("Iteration {0}/{1}".format(loop+1, num_duplicates)) # ag.info("Iteration") for i, j in product(locations0, locations1): selection = [slice(i, i + X_pad_size[0]), slice(j, j + X_pad_size[1])] # X_pad = edges_pad[selection].copy() patch = img_pad[selection] alpha_patch = alpha_pad[selection] # ag.info("Position {0} {1}".format(i, j)) for loop in xrange(num_duplicates): bkgmap = gen.next() # Composite img_with_bkg = composite(patch, bkgmap, alpha_patch) # Retrieve unspread edges (with a given background gray level) edges_pad = ag.features.bedges(img_with_bkg, **bsettings) # Pad the edges # edges_pad = ag.util.zeropad(edges, (pad, pad, 0)) # Do spreading X_pad_spread = ag.features.bspread(edges_pad, spread=bsettings["spread"], radius=radius) # De-pad padding = pad - 2 X_spread = X_pad_spread[padding:-padding, padding:-padding] # Code parts parts = descriptor.extract_parts(X_spread.astype(np.uint8)) # Accumulate and return counts[mixcomp, i, j] += parts[0, 0] """ if 0: from multiprocessing import Pool p = Pool(7) mapf = p.map else: mapf = map def _process_file(fn): return _process_file_full(fn, sh, descriptor, detector) # Iterate images all_counts = mapf(_process_file, files) for counti in all_counts: counts += counti """ # Divide accmulate to get new distribution counts /= num_files * num_duplicates # Create a new model, with this distribution new_detector = detector.copy() new_detector.kernel_templates = counts new_detector.support = None new_detector.use_alpha = False # Return model return new_detector
#parser = argparse.ArgumentParser(description='Train mixture model on edge data') #parser.add_argument('patches', metavar='<patches file>', type=argparse.FileType('rb'), help='Filename of patches file') #parser.add_argument('model', metavar='<output model file>', type=argparse.FileType('wb'), help='Filename of the output models file') #parser.add_argument('mixtures', metavar='<number mixtures>', type=int, help='Number of mixture components') #parser.add_argument('--use-voc', action='store_true', help="Use VOC data to train model") import gv import glob import os import os.path import amitgroup as ag ag.set_verbose(True) #descriptor = gv.load_descriptor(gv.BinaryDetector.DESCRIPTOR, sett) descriptor = gv.load_descriptor(sett) detector = gv.BernoulliDetector(dsettings['num_mixtures'], descriptor, dsettings) if dsettings['use_voc']: files = gv.voc.load_object_images_of_size(sett['voc'], 'bicycle', dsettings['image_size'], dataset='train') else: base_path = '' if 'base_path' in dsettings: base_path = os.environ[dsettings['base_path']] path = os.path.join(base_path, dsettings['train_dir']) files = sorted(glob.glob(path)) # TEMP! from random import shuffle shuffle(files) limit = dsettings.get('train_limit')
settings = load_settings(settings_file) #import matplotlib #matplotlib.use('Agg') import glob import sys import os import gv import numpy as np import amitgroup as ag import matplotlib.pylab as plt from superimpose_experiment import * from operator import itemgetter descriptor = gv.load_descriptor(settings) def get_edges(settings, config): offset = settings['detector'].get('train_offset', 0) limit = settings['detector'].get('train_limit') if limit is not None: limit += offset files = sorted(glob.glob(settings['detector']['train_dir']))[offset:limit] * settings['detector'].get('duplicate', 1) alpha_and_images = map(load_and_crop, files) if alpha_and_images[0][0] is None: alpha = None all_alphas = None else: all_alphas = np.asarray(map(itemgetter(0), alpha_and_images)) #all_alphas = np.asarray(map(lambda x: x[0], alpha_and_images)) alpha = all_alphas[:,7-4:8+4,7-4:8+4].mean(axis=0)
def background_adjust_model(settings, bkg, seed=0): offset = settings['detector'].get('train_offset', 0) limit = settings['detector'].get('train_limit') files = sorted(glob.glob(settings['detector']['train_dir']))[ offset:limit] # * settings['detector'].get('duplicate', 1) try: detector = gv.Detector.load(settings['detector']['file']) except KeyError: raise Exception("Need to train the model first") # We need the descriptor to generate and manipulate images descriptor = gv.load_descriptor(settings) sh = (28, 88) # Create accumulates for each mixture component # TODO: Temporary until multicomp #counts = np.zeros_like(detector.kernel_templates) counts = np.zeros((1, sh[0], sh[1], descriptor.num_parts)) num_files = len(files) num_duplicates = settings['detector'].get('duplicate', 1) # Create several random states, so it's easier to measure # the influence of certain features prnds = [np.random.RandomState(seed + i) for i in xrange(10)] # Setup unspread bedges settings bsettings = settings['edges'].copy() radius = bsettings['radius'] bsettings['radius'] = 0 locations0 = xrange(sh[0]) locations1 = xrange(sh[1]) padded_theta = descriptor.unspread_parts_padded #pad = 10 pad = 5 X_pad_size = (9 + pad * 2, ) * 2 #X_pad_size = padded_theta.shape[1:3] neg_filenames = sorted( glob.glob( os.path.join(os.environ['UIUC_DIR'], 'TrainImages', 'neg-*.pgm'))) gen_raw = generate_random_patches(neg_filenames, X_pad_size, seed) # Pre-generate a bunch of background patches and loop them. bkgs = [gen_raw.next() for i in xrange(2000)] def new_gen(): i = 0 while True: yield bkgs[i] i += 1 if i == 2000: i = 0 gen = new_gen() for seed, fn in enumerate(files): ag.info("Processing file", fn) # Which mixture component does this image belong to? # TODO: Temporary until multicomp mixcomp = 0 #np.argmax(detector.affinities # Binarize support and Extract alpha color_img, alpha = gv.img.load_image_binarized_alpha(fn) img = gv.img.asgray(color_img) img_pad = ag.util.zeropad(img, pad) alpha_pad = ag.util.zeropad(alpha, pad) inv_alpha_pad_expanded = np.expand_dims(~alpha_pad, -1) # Iterate every duplicate #ag.info("Iteration {0}/{1}".format(loop+1, num_duplicates)) #ag.info("Iteration") for i, j in product(locations0, locations1): selection = [ slice(i, i + X_pad_size[0]), slice(j, j + X_pad_size[1]) ] #X_pad = edges_pad[selection].copy() patch = img_pad[selection] alpha_patch = alpha_pad[selection] #ag.info("Position {0} {1}".format(i, j)) for loop in xrange(num_duplicates): bkgmap = gen.next() # Composite img_with_bkg = composite(patch, bkgmap, alpha_patch) # Retrieve unspread edges (with a given background gray level) edges_pad = ag.features.bedges(img_with_bkg, **bsettings) # Pad the edges #edges_pad = ag.util.zeropad(edges, (pad, pad, 0)) # Do spreading X_pad_spread = ag.features.bspread(edges_pad, spread=bsettings['spread'], radius=radius) # De-pad padding = pad - 2 X_spread = X_pad_spread[padding:-padding, padding:-padding] # Code parts parts = descriptor.extract_parts(X_spread.astype(np.uint8)) # Accumulate and return counts[mixcomp, i, j] += parts[0, 0] """ if 0: from multiprocessing import Pool p = Pool(7) mapf = p.map else: mapf = map def _process_file(fn): return _process_file_full(fn, sh, descriptor, detector) # Iterate images all_counts = mapf(_process_file, files) for counti in all_counts: counts += counti """ # Divide accmulate to get new distribution counts /= num_files * num_duplicates # Create a new model, with this distribution new_detector = detector.copy() new_detector.kernel_templates = counts new_detector.support = None new_detector.use_alpha = False # Return model return new_detector
def background_adjust_model(settings, bkg, seed=0): offset = settings['detector'].get('train_offset', 0) limit = settings['detector'].get('train_limit') files = sorted(glob.glob(settings['detector']['train_dir']))[offset:limit]# * settings['detector'].get('duplicate', 1) try: detector = gv.Detector.load(settings['detector']['file']) except KeyError: raise Exception("Need to train the model first") # We need the descriptor to generate and manipulate images descriptor = gv.load_descriptor(settings) sh = (28, 88) # Create accumulates for each mixture component # TODO: Temporary until multicomp #counts = np.zeros_like(detector.kernel_templates) counts = np.zeros((1, sh[0], sh[1], descriptor.num_parts)) num_files = len(files) num_duplicates = settings['detector'].get('duplicate', 1) # Create several random states, so it's easier to measure # the influence of certain features prnds = [np.random.RandomState(seed+i) for i in xrange(10)] # Setup unspread bedges settings bsettings = settings['edges'].copy() radius = bsettings['radius'] bsettings['radius'] = 0 padding = radius locations0 = xrange(sh[0]) locations1 = xrange(sh[1]) padded_theta = descriptor.unspread_parts_padded X_pad_size = padded_theta.shape[1:3] for fn in files: ag.info("Processing file", fn) # Which mixture component does this image belong to? # TODO: Temporary until multicomp mixcomp = 0#np.argmax(detector.affinities # Binarize support and Extract alpha color_img, alpha = gv.img.load_image_binarized_alpha(fn) img = gv.img.asgray(color_img) alpha_pad = ag.util.zeropad(alpha, padding) inv_alpha_pad_expanded = np.expand_dims(~alpha_pad, -1) # Iterate every duplicate for loop in xrange(num_duplicates): ag.info("Iteration {0}/{1}".format(loop+1, num_duplicates)) # Superimpose onto gray background graymap = create_graymap(img.shape, loop / (num_duplicates - 1), prnds[0]) # Composite img_with_gray = composite(img, graymap, alpha) # Retrieve unspread edges (with a given background gray level) edges = ag.features.bedges(img_with_gray, **bsettings) # Pad the edges edges_pad = ag.util.zeropad(edges, (padding, padding, 0)) for i, j in product(locations0, locations1): selection = [slice(i, i+X_pad_size[0]), slice(j, j+X_pad_size[1])] X_pad = edges_pad[selection].copy() nA_pad = inv_alpha_pad_expanded[selection] # Draw background part from categorical distribution f_bkg = weighted_choice_unit(bkg, prnds[1]) probs_bkg = get_probs(padded_theta, f_bkg) probs = nA_pad * probs_bkg # Iterate over all locations # Draw from background edge probability over ~alpha X_pad |= (prnds[2].rand(*probs.shape) < probs) # Do spreading X_pad_spread = ag.features.bspread(X_pad, spread=bsettings['spread'], radius=radius) # De-pad X_spread = X_pad_spread[padding:-padding,padding:-padding] # Code parts parts = descriptor.extract_parts(X_spread.astype(np.uint8)) # Accumulate and return counts[mixcomp,i,j] += parts[0,0] """ if 0: from multiprocessing import Pool p = Pool(7) mapf = p.map else: mapf = map def _process_file(fn): return _process_file_full(fn, sh, descriptor, detector) # Iterate images all_counts = mapf(_process_file, files) for counti in all_counts: counts += counti """ # Divide accmulate to get new distribution counts /= num_files * num_duplicates # Create a new model, with this distribution new_detector = detector.copy() new_detector.kernel_templates = counts new_detector.support = None new_detector.use_alpha = False # Return model return new_detector