def get_positives(mixcomp, settings, indices, files, crop=False):
    im_size = settings['detector']['image_size']

    # Use the same seed for all mixture components! That will make them easier to compare,
    # without having to sample to infinity.


    # HERE: Make it possible to input data directly!
    descriptor = gv.load_descriptor(settings)

    radii = settings['detector']['spread_radii']
    psize = settings['detector']['subsample_size']
    rotspread = settings['detector'].get('rotation_spreading_radius', 0)
    cb = settings['detector'].get('crop_border')

    all_feats = []

    for index in indices: 
        ag.info("Fetching positives from image of index {0} and mixture component {1}".format(index, mixcomp))
        gray_im = gv.img.asgray(gv.img.load_image(files[index]))
        if crop:
            gray_im = gv.img.crop(gray_im, im_size)
        else:
            gray_im = gv.img.resize(gray_im, im_size)
        #gray_im = gv.img.resize(gray_im, im_size)

        feats = descriptor.extract_features(gray_im, settings=dict(spread_radii=radii, subsample_size=psize, rotation_spreading_radius=rotspread, crop_border=cb))
        all_feats.append(feats)

    return mixcomp, np.asarray(all_feats)
def get_pos_and_neg(mixcomp, settings, bb, indices, files, neg_files, duplicates_mult=1):
    im_size = settings['detector']['image_size']
    size = gv.bb.size(bb)

    # Use the same seed for all mixture components! That will make them easier to compare,
    # without having to sample to infinity.


    # HERE: Make it possible to input data directly!
    descriptor = gv.load_descriptor(settings)

    all_pos_feats = []
    all_neg_feats = []

    radii = settings['detector']['spread_radii']
    psize = settings['detector']['subsample_size']
    rotspread = settings['detector'].get('rotation_spreading_radius', 0)
    duplicates = settings['detector'].get('duplicates', 1) * duplicates_mult
    cb = settings['detector'].get('crop_border')
    crop_image = settings['detector'].get('crop_image')

    sett = dict(spread_radii=radii, subsample_size=psize, rotation_spreading_radius=rotspread, crop_border=cb, crop_image=crop_image)

    extra = {}
    if settings['detector'].get('selective_bkg'):
        ag.info("SELECTIVE!")
        extra['selective'] = True
        extra['concentrations'] = _get_avg_positives(mixcomp, settings, bb, indices, files, neg_files, descriptor, sett)

    alpha_maps = []

    args = [(index, mixcomp, files, im_size, bb, duplicates, neg_files, descriptor, sett, extra) for index in indices]
#index, mixcomp, files, im_size, bb, duplicates):
    for pos_feats, neg_feats, alpha in gv.parallel.starmap_unordered(__process_one, args):
        all_pos_feats.extend(pos_feats)
        all_neg_feats.extend(neg_feats)
        alpha_maps.append(alpha)

    all_neg_feats = np.asarray(all_neg_feats)
    all_pos_feats = np.asarray(all_pos_feats)
    #alpha_maps = np.asarray(alpha_maps)
    #support = alpha_maps.mean(axis=0)

    return mixcomp, all_neg_feats, all_pos_feats, alpha_maps, extra 
def _get_background_model(settings, neg_files):
    descriptor = gv.load_descriptor(settings)
    neg_count = settings['detector'].get('train_neg_limit', 50)

    rs = np.random.RandomState(0)

    radii = settings['detector']['spread_radii']
    psize = settings['detector']['subsample_size']
    rotspread = settings['detector'].get('rotation_spreading_radius', 0)
    cb = settings['detector'].get('crop_border')

    bkg_counts = np.zeros(descriptor.num_features, dtype=int)
    count = 0

    sett = dict(spread_radii=radii, subsample_size=psize, rotation_spreading_radius=rotspread, crop_border=cb)
    factors = rs.uniform(0.2, 1.0, size=neg_count)
    argses = [(neg_files[i], descriptor, sett, factors[i]) for i in xrange(neg_count)]

    for feats, c in gv.parallel.starmap_unordered(__process_bkg, argses):
    #for fn in itr.islice(neg_files, neg_count):
        if 0:
            im = gv.img.asgray(gv.img.load_image(fn))
            # Randomly resize
            factor = rs.uniform(0.2, 1.0)
            im = gv.img.resize_with_factor_new(im, factor)

            ag.info(im.shape)

            feats = descriptor.extract_features(im, settings=sett)

            count += np.prod(feats.shape[:2])
            bkg_counts += np.apply_over_axes(np.sum, feats, [0, 1]).ravel()

        count += c 
        bkg_counts += feats 

    assert count > 0, "Did not find any background images!"
    
    bkg = bkg_counts.astype(np.float64) / count
    return bkg
def _create_kernel_for_mixcomp(mixcomp, settings, bb, indices, files, neg_files):
    im_size = settings['detector']['image_size']
    size = gv.bb.size(bb)
    orig_size = size
    
    gen = generate_random_patches(neg_files, size, seed=0)
    
    descriptor = gv.load_descriptor(settings)

    radii = settings['detector']['spread_radii']
    psize = settings['detector']['subsample_size']
    rotspread = settings['detector'].get('rotation_spreading_radius', 0)
    duplicates = settings['detector'].get('duplicates', 1)
    cb = settings['detector'].get('crop_border')
    crop_image = settings['detector'].get('crop_image')

    totals = 0
    bkg = None
    kern = None
    alpha_cum = None

    setts = dict(spread_radii=radii, subsample_size=psize, rotation_spreading_radius=rotspread, crop_border=cb)
    counts = 0 

    all_b = []
    all_X = []
    all_s = []

    for index in indices: 
        ag.info("Processing image of index {0} and mixture component {1}".format(index, mixcomp))
        gray_im, alpha = _load_cad_image(files[index], im_size, bb, crop=crop_image)

        bin_alpha = (alpha > 0.05).astype(np.uint32)

        if alpha_cum is None:
            alpha_cum = bin_alpha
        else:
            alpha_cum += bin_alpha 

        for dup in xrange(duplicates):
            neg_im = gen.next()
            neg_feats = descriptor.extract_features(neg_im, settings=setts)
            superimposed_im = neg_im * (1 - alpha) + gray_im * alpha

            feats = descriptor.extract_features(superimposed_im, settings=setts)

            counts += 1

            #bkg_feats = gv.sub.subsample(bkg_feats, psize)
        
            if bkg is None:
                bkg = neg_feats.astype(np.uint32)
            else:
                bkg += neg_feats

            #feats = gv.sub.subsample(feats, psize)

            if kern is None:
                kern = feats.astype(np.uint32)
            else:
                kern += feats

            # NEW TODO: This throws out low-activity negatives
            #if abs(neg_feats.mean() - 0.2) < 0.05:
            #if neg_feats.mean() < 0.05:
            if True:
                all_b.append(neg_feats)
                all_X.append(feats)
                all_s.append(bin_alpha)

                totals += 1

    ag.info('COUNTS', counts)

    np.seterr(divide='raise')

    kern = kern.astype(np.float64) / totals
    bkg = bkg.astype(np.float64) / totals
    
    #kern = kern.astype(np.float64) / total 
    #kern = np.clip(kern, eps, 1-eps)

    #bkg = bkg.astype(np.float64) / total

    support = alpha_cum.astype(np.float64) / len(indices)

    return kern, bkg, orig_size, support 
def superimposed_model(settings, threading=True):
    num_mixtures = settings['detector']['num_mixtures']

    # Train a mixture model to get a clustering of the angles of the object
    descriptor = gv.load_descriptor(settings)
    detector = gv.BernoulliDetector(num_mixtures, descriptor, settings['detector'])

    files = get_training_files(detector)
    neg_files = sorted(glob.glob(settings['detector']['neg_dir']))

    ag.info("Checkpoint 1")

    testing_type = detector.settings.get('testing_type')

    # Extract clusters (manual or through EM)
    ##############################################################################
    detector, comps = cluster(detector, files)
    each_mix_N = np.bincount(comps, minlength=num_mixtures)

    ##############################################################################

    ag.info("Checkpoint 3")

    ag.info("Checkpoint 4")

    support = detector.support 

    kernels = []

    #ag.info("TODO, quitting")
    #return detector

    # Determine bounding boxes
    ##############################################################################

    psize = settings['detector']['subsample_size']

    bbs = calc_bbs(detector)

    ag.info("Checkpoint 6")

    ag.info("Checkpoint 7")

    bkgs = []
    orig_sizes = []
    new_support = []
    im_size = settings['detector']['image_size']

    ag.info("Checkpoint 8")
    all_negs = []

    ag.info("Checkpoint 9")

    # Retrieve features and support 
    ##############################################################################

    ag.info('Fetching positives again...')
    all_pos_feats = []
    all_neg_feats = []
    alphas = []
    all_alphas = []
    all_binarized_alphas = []


    if settings['detector'].get('superimpose'):
        detector.extra['concentrations'] = []

        argses = [(m, settings, bbs[m], list(np.where(comps == m)[0]), files, neg_files, settings['detector'].get('stand_multiples', 1)) for m in range(detector.num_mixtures)]        
        for mixcomp, neg_feats, pos_feats, alpha_maps, extra in itr.starmap(get_pos_and_neg, argses):
            alpha = np.mean(alpha_maps, axis=0)
            alpha_maps = np.asarray(alpha_maps)
            all_alphas.append(alpha_maps)
            all_binarized_alphas.append(alpha_maps > 0.05)

            alphas.append(alpha)
            all_neg_feats.append(neg_feats)
            all_pos_feats.append(pos_feats)

            detector.extra['concentrations'].append(extra.get('concentrations', {}))

        ag.info('Done.')

        # Setup some places to store things
        if 'weights' not in detector.extra:
            detector.extra['weights'] = [None] * detector.num_mixtures
        if 'sturf' not in detector.extra:
            detector.extra['sturf'] = [{} for _ in xrange(detector.num_mixtures)]

        for m in xrange(detector.num_mixtures):
            detector.extra['sturf'].append(dict())

            obj = all_pos_feats[m].mean(axis=0)
            bkg = all_neg_feats[m].mean(axis=0)
            size = gv.bb.size(bbs[m])

            kernels.append(obj)
            bkgs.append(bkg)
            orig_sizes.append(size)
            new_support.append(alphas[m])

        if 0:
            for m in xrange(detector.num_mixtures):
                obj = all_pos_feats[m].mean(axis=0)
                bkg = all_neg_feats[m].mean(axis=0)
                size = gv.bb.size(bbs[m])

                eps = 0.025
                obj = np.clip(obj, eps, 1 - eps)
                avg = np.clip(avg, eps, 1 - eps)
                #lmb = obj / avg
                #w = np.clip(np.log(obj / avg), -1, 1)
                w = np.log(obj / (1 - obj) * ((1 - avg) / avg))
                #w = np.log(

                #w_avg = np.apply_over_axes(np.sum, w * support[...,np.newaxis], [0, 1]) / support.sum()

                #w -= w_avg * support[...,np.newaxis]

                if 'weights' not in detector.extra:
                    detector.extra['weights'] = []
                detector.extra['weights'].append(w)

                if 'sturf' not in detector.extra:
                    detector.extra['sturf'] = []

                detector.extra['sturf'].append(dict())
                        
                kernels.append(obj)
                bkgs.append(bkg)
                orig_sizes.append(size)
                new_support.append(alphas[m])

        detector.settings['per_mixcomp_bkg'] = True
    else:
        # Get a single background model for this one
        bkg = _get_background_model(settings, neg_files)

        crop_image = detector.settings.get('crop_image')
        import pdb; pdb.set_trace()
        argses = [(m, settings, list(np.where(comps == m)[0]), files, crop_image) for m in range(detector.num_mixtures)]        
        for m, pos_feats in gv.parallel.starmap(get_positives, argses):
            obj = pos_feats.mean(axis=0)
            all_pos_feats.append(pos_feats)

            kernels.append(obj)
            bkgs.append(bkg)
            size = gv.bb.size(bbs[m])

            orig_sizes.append(size)
            support = np.ones(settings['detector']['image_size'])
            new_support.append(support)

        detector.settings['per_mixcomp_bkg'] = True # False 


    # Get weights and support

    for m in xrange(detector.num_mixtures):
        #kern = detector.kernel_templates[m]
        #bkg = detector.fixed_spread_bkg[m]
        obj = all_pos_feats[m].mean(axis=0)
        bkg = all_neg_feats[m].mean(axis=0)

        if detector.eps is None:
            detector.prepare_eps(bkg)

        weights = detector.build_clipped_weights(obj, bkg, detector.eps)

        detector.extra['weights'][m] = weights

        detector.extra['sturf'][m]['support'] = arrange_support(alphas[m], weights.shape, psize)

    # Modify weights

    if not detector.settings.get('plain'):
        for m in xrange(detector.num_mixtures):
            weights = detector.extra['weights'][m] 

            F = detector.num_features
            indices = get_key_points(weights, suppress_radius=detector.settings.get('indices_suppress_radius', 4), even=True)

            L0 = indices.shape[0] // F 
            
            kp_weights = np.zeros((L0, F))

            M = np.zeros(weights.shape, dtype=np.uint8)
            counts = np.zeros(F)
            for index in indices:
                f = index[2]
                M[tuple(index)] = 1
                kp_weights[counts[f],f] = weights[tuple(index)]
                counts[f] += 1

            #theta = np.load('theta3.npy')[1:-1,1:-1]
            #th = theta
            #eth = np.load('empty_theta.npy')

            #support = 1-th[:,:,np.arange(1,F+1),np.arange(F)].mean(-1)
            #offset = gv.sub.subsample_offset_shape(alphas[m].shape, psize)

            support = detector.extra['sturf'][m]['support'] 

            #    def subsample_offset_shape(shape, size):


            pos, neg = all_pos_feats[m].astype(bool), all_neg_feats[m].astype(bool)
            #avg = np.apply_over_axes(

            diff = pos ^ neg
            appeared = pos & ~neg
            disappeared = ~pos & neg

            #bs = (support > 0.5)[np.newaxis,...,np.newaxis]
             

            A = appeared.mean(0) / (0.00001+((1-neg).mean(0)))
            D = disappeared.mean(0) / (0.00001+neg.mean(0))
            #ss = D.mean(-1)[...,np.newaxis]
            ss = support[...,np.newaxis]

            B = (np.apply_over_axes(np.mean, A*ss, [0, 1])).squeeze() / ss.mean()

            def clogit(x):
                return gv.logit(gv.bclip(x, 0.025))

            def find_zero(fun, l, u, depth=30):
                m = np.mean([l, u])
                if depth == 0:
                    return m
                v = fun(m)
                if v > 0:
                    return find_zero(fun, l, m, depth-1)
                else:
                    return find_zero(fun, m, u, depth-1)

            # Find zero-crossing
            #for f in xrange(F):
                

            # Now construct weights from these deltas
            #weights = ((clogit(ss * deltas + A) - clogit(B)))
            #weights = (ss * (clogit(deltas + pos.mean(0)) - clogit(neg.mean(0))))

            
            avg = np.apply_over_axes(np.mean, pos * M * ss, [1, 2]) / (ss * M).mean()

            if 0:
                for l0, l1, f in gv.multirange(*weights.shape):

                    def fun(w):
                        return -(np.clip(pos[:,l0,l1,f].mean(), 0.005, 0.995) - np.mean(expit(w + logit(avg[...,f]))))

                    weights[l0,l1,f] = find_zero(fun, -10, 10)



            if 1:
                # Print these to file
                from matplotlib.pylab import cm
                grid = gv.plot.ImageGrid(detector.num_features, 1, weights.shape[:2], border_color=(0.5, 0.5, 0.5))
                mm = np.fabs(weights).max()
                for f in xrange(detector.num_features):
                    grid.set_image(weights[...,f], f, 0, vmin=-mm, vmax=mm, cmap=cm.RdBu_r)
                fn = os.path.join(os.path.expandvars('$HOME'), 'html', 'plots', 'plot2.png')
                grid.save(fn, scale=10)
                os.chmod(fn, 0644)
                



            #A = appeared.mean(0) / (0.00001+((1-neg).mean(0)))
            #mm = (A * ss).mean() / ss.mean()


            #xx = (bs & pos) | (~bs & appeared)

            #avg = xx.mean(0)
            weights1 = ss*(weights - np.apply_over_axes(np.mean, weights * ss, [0, 1])/ss.mean())
            detector.extra['sturf'][m]['weights1'] = weights1

            eps = 0.025

            avg_pos = (np.apply_over_axes(np.mean, pos * ss, [0, 1, 2]) / ss.mean()).squeeze().clip(eps, 1-eps)
            avg_neg = (np.apply_over_axes(np.mean, neg * ss, [0, 1, 2]) / ss.mean()).squeeze().clip(eps, 1-eps)

            #w_avg = np.apply_over_axes(np.sum, weights * support[...,np.newaxis], [0, 1]) / support.sum()
            #
            #w_avg = (logit(np.apply_over_axes(np.mean, pos, [0, 1, 2])) - \
             #        logit(np.apply_over_axes(np.mean, neg, [0, 1, 2]))).squeeze()
            w_avg = logit(avg_pos) - logit(avg_neg)
            detector.extra['sturf'][m]['wavg'] = w_avg
            detector.extra['sturf'][m]['reweighted'] = (w_avg * support[...,np.newaxis]).squeeze()

            #weights -= w_avg * support[...,np.newaxis]
            #weights *= support[...,np.newaxis] * M
            if 0:
                weights *= support[...,np.newaxis]

                avg_weights = np.apply_over_axes(np.mean, weights, [0, 1]) / M.mean(0).mean(0)

                avg_w = kp_weights.mean(0)

                weights -= avg_w - (-kp_weights.var(0) / 2)

                weights *= support[...,np.newaxis]

                print((weights * M).mean(0))


            #weights = (weights - w_avg) * support[...,np.newaxis]
            #weights -= (w_avg + 0.0) * support[...,np.newaxis]

            weights -= w_avg * support[...,np.newaxis]

            F = detector.num_features

            if 0:
                for f in xrange(F):
                    #zz = np.random.normal(-1.5, size=(1, 1, 50))
                    zz = np.random.normal(-1.5, size=(1, 1, 50)).ravel()

                    betas = np.zeros(len(zz))
                    for i, z in enumerate(zz):
                        def fun(beta):
                            w = weights[...,f] - beta * support 
                            return np.log(1 - expit(w[...,np.newaxis] + z)).mean() - np.log(1 - expit(z))

                        betas[i] = find_zero(fun, -10, 10)

                    
                    if f == 0:
                        np.save('betas.npy', betas)
                    beta0 = betas.mean()
                    print(f, beta0, betas.std())
                    weights[...,f] -= beta0 * support 


            if 1:
                # Print these to file
                from matplotlib.pylab import cm
                grid = gv.plot.ImageGrid(detector.num_features, 2, weights.shape[:2], border_color=(0.5, 0.5, 0.5))
                mm = np.fabs(weights).max()
                for f in xrange(detector.num_features):
                    grid.set_image(weights[...,f], f, 0, vmin=-mm, vmax=mm, cmap=cm.RdBu_r)
                    grid.set_image(M[...,f], f, 1, vmin=0, vmax=1, cmap=cm.RdBu_r)
                fn = os.path.join(os.path.expandvars('$HOME'), 'html', 'plots', 'plot.png')
                grid.save(fn, scale=10)
                os.chmod(fn, 0644)

            ag.info('sum', np.fabs(np.apply_over_axes(np.sum, weights, [0, 1])).sum())

            # Instead, train model rigorously!!
            detector.extra['sturf'][m]['pos'] = all_pos_feats[m]
            detector.extra['sturf'][m]['neg'] = all_neg_feats[m]


            # Averags of all positives
            ff = all_pos_feats[m]
            posavg = np.apply_over_axes(np.sum, all_pos_feats[m] * support[...,np.newaxis], [1, 2]).squeeze() / support.sum() 
            negavg = np.apply_over_axes(np.sum, all_neg_feats[m] * support[...,np.newaxis], [1, 2]).squeeze() / support.sum() 

            S = np.cov(posavg.T)
            Sneg = np.cov(negavg.T)

            detector.extra['sturf'][m]['pavg'] = avg_pos
            detector.extra['sturf'][m]['pos-samples'] = posavg 
            detector.extra['sturf'][m]['S'] = S
            detector.extra['sturf'][m]['Sneg'] = Sneg
            detector.extra['sturf'][m]['navg'] = avg_neg

            Spos = S
            rs = np.random.RandomState(0)
            detector.extra['sturf'][m]['Zs'] = rs.multivariate_normal(avg_neg, Sneg, size=1000).clip(min=0.005, max=0.995)
            detector.extra['sturf'][m]['Zs_pos'] = rs.multivariate_normal(avg_pos, Spos, size=1000).clip(min=0.005, max=0.995)
            detector.extra['sturf'][m]['Zs_pos2'] = rs.multivariate_normal(avg_pos, Spos * 2, size=1000).clip(min=0.005, max=0.995)
            detector.extra['sturf'][m]['Zs_pos10'] = rs.multivariate_normal(avg_pos, Spos * 10, size=1000).clip(min=0.005, max=0.995)
            detector.extra['sturf'][m]['Zs_pos50'] = rs.multivariate_normal(avg_pos, Spos * 50, size=1000).clip(min=0.005, max=0.995)

    #{{{
    if 0:
        argses = [(m, settings, bbs[m], np.where(comps == m)[0], files, neg_files) for m in xrange(detector.num_mixtures)]
        for kern, bkg, orig_size, sup in gv.parallel.starmap(_create_kernel_for_mixcomp, argses):
            kernels.append(kern) 
            bkgs.append(bkg)
            orig_sizes.append(orig_size)
            new_support.append(sup)
                    
            ag.info("Checkpoint 10")

            detector.settings['per_mixcomp_bkg'] = True
    #}}}

    detector.kernel_templates = kernels
    detector.kernel_sizes = orig_sizes
    detector.settings['kernel_ready'] = True
    detector.use_alpha = False
    detector.support = new_support

    # Determine the background
    ag.info("Determining background")

    detector.fixed_bkg = None
    detector.fixed_spread_bkg = bkgs

    detector.settings['bkg_type'] = 'from-file'

    detector._preprocess()
    detector.prepare_eps(detector.fixed_spread_bkg[0])

    # Determine the standardization values
    ag.info("Determining standardization values")

    #fixed_train_mean = np.zeros(detector.num_mixtures)
    #detector.fixed_train_mean = []
    #fixed_train_std = np.ones(detector.num_mixtures)

    # Determine indices for coarse detection sweep
    if INDICES:
        detector.indices = []

        for m in xrange(detector.num_mixtures):
            these_indices = []
            weights = detector.extra['weights'][m]

            ag.info('Indices:', np.prod(weights.shape))

            # If not plain, we need even keypoints
            even = not detector.settings.get('plain')
            indices = get_key_points(weights, suppress_radius=detector.settings.get('indices_suppress_radius', 4), even=even)

            if not detector.settings.get('plain'):
                detector.extra['weights'][m] = weights

            assert len(indices) > 0, "No indices were extracted when keypointing"

            detector.indices.append(indices)
    else:
        detector.indices = None

    if testing_type in ('fixed', 'non-parametric'):
        detector.standardization_info = []
        if testing_type == 'fixed':
            if detector.settings.get('standardize_with_samples'):
                detector.standardization_info = [dict(mean=0, std=1)] * detector.num_mixtures
                info = []
                source = detector.settings.get('standardize_negative_source', 'neg-dir')
                N = detector.settings.get('standardize_num_images', 50)
                if source.startswith('voc-train-non-'):
                    obj_class = source.split('-')[-1] 
                    print('Taking negatives from voc train, without class', obj_class)
                    gen = gv.voc.gen_negative_files(obj_class, 'train')
                    #print('negatives', len([im for im in gen]))
                else:
                    print('Taking negatives from neg_dir')
                    gen = itr.cycle(gv.datasets.ImgFile(path=fn, img_id=os.path.basename(fn)) for fn in neg_files)
                    
                gen = itr.cycle(gen)
                gen = itr.islice(gen, N)
                gens = itr.tee(gen, detector.num_mixtures)

                th = -np.inf
                for m in xrange(detector.num_mixtures):
                    neg_files_segment = gens[m]
                    argses = [(detector, i, fileobj, th, m) for i, fileobj in enumerate(neg_files_segment)] 
                    topsy = list(gv.parallel.starmap_unordered(get_strong_fps_single, argses))
                    confs = np.asarray([bbobj.confidence for topsy_m in topsy for bbobj in topsy_m])

                    info.append(dict(mean=confs.mean(), std=confs.std())) 
                    #for m in xrange(detector.num_mixtures):
                    
                detector.standardization_info = info      

            else:

                argses = [(m, settings, detector.eps, bbs[m], kernels[m], bkgs[m], None, None, None, detector.indices[m] if INDICES else None, 3) for m in xrange(detector.num_mixtures)]

                detector.standardization_info = list(gv.parallel.starmap(_calc_standardization_for_mixcomp, argses))
        else:
            raise Exception("Unknown testing type")


    detector.settings['testing_type'] = testing_type 
    #detector.settings['testing_type'] = 'NEW'

    #detector.

    #
    # Data mine stronger negatives 
    #
    # TODO: Object class must be input
    if 1:
        contest = 'voc'
        obj_class = 'car'
        gen = gv.voc.gen_negative_files(obj_class, 'train')
    else:
        contest = 'custom-tmp-frontbacks'
        obj_class = 'bicycle'
        gen, tot = gv.datasets.load_files(contest, obj_class)

    import heapq
    top_bbs = [[] for k in xrange(detector.num_mixtures)]
    TOP_N = 10000


    if detector.settings.get('cascade'): # New SVM attempt 
        detector.extra['cascade_threshold'] = detector.settings.get('cascade_threshold', 8) 
        COUNT = detector.settings.get('cascade_farming_count', 500)

        args = itr.izip( \
            itr.repeat(detector), 
            xrange(COUNT), 
            itr.islice(gen, COUNT)
        )

        for res in gv.parallel.starmap_unordered(get_strong_fps, args):
            for m in xrange(detector.num_mixtures):
                top_bbs[m].extend(res[m])

        ag.info('- TOPS ------')
        ag.info(map(np.shape, top_bbs) )
        detector.extra['top_bbs_shape'] = map(np.shape, top_bbs) 

        # Save the strong negatives
        detector.extra['negs'] = top_bbs
        
        def phi(X, mixcomp):
            if SVM_INDICES and 0:
                indices = detector.indices2[mixcomp][0]
                return X.ravel()[np.ravel_multi_index(indices.T, X.shape)]
            else:
                #return gv.sub.subsample(X, (2, 2)).ravel()
                return X.ravel()

        all_neg_X0 = []
        for k in xrange(detector.num_mixtures):
            all_neg_X0.append(np.asarray(map(lambda bbobj: phi(bbobj.X, k), top_bbs[k])))

        del top_bbs

        all_pos_X0 = []
        for mixcomp, pos_feats in enumerate(all_pos_feats):
            all_pos_X0.append(np.asarray(map(lambda X: phi(X, mixcomp), pos_feats))) 
        ag.info('Done.')

        detector.extra['poss'] = all_pos_feats

        ag.info('Training SVMs...')
        # Train SVMs
        #from sklearn.svm import LinearSVC
        from sklearn.svm import LinearSVC, SVC
        clfs = []
        detector.indices2 = None # not [] for now 

        #all_neg_X0 = [[bbobj.X for bbobj in top_bbs[m]] for m in xrange(detector.num_mixtures)]

        detector.extra['svms'] = []
        for m in xrange(detector.num_mixtures):
            X = np.concatenate([all_pos_X0[m], all_neg_X0[m]])  
    
            # Flatten
            ag.info(m, ':', X.shape)
            #X = phi(X, k)
            ag.info(m, '>', X.shape)
            y = np.concatenate([np.ones(len(all_pos_feats[m])), np.zeros(len(all_neg_X0[m]))])

            #detector.extra['data_x'].append(X)
            #detector.extra['data_y'].append(y)


            from sklearn import cross_validation as cv

            #C = 5e-8
            C = 1.0

            #clf = LinearSVC(C=C)
            #clf = LinearSVC(C=C)
            clf = SVC(C=C, kernel='linear')
            clf.fit(X, y)

            svm_info = dict(intercept=float(clf.intercept_), weights=clf.coef_)
            detector.extra['svms'].append(svm_info)

            #sh = all_pos_feats[m][0].shape

            # Get most significant coefficients

            #th = smallest_th[k] 
            #th = 0
            #detector.extra['svms'].append(dict(svm=clf, th=th, uses_indices=SVM_INDICES))
        ag.info('Done.')

        # Remove negatives and positives from extra, since it takes space
        if 1:
            del detector.extra['poss']
            del detector.extra['negs']

    ag.info('extra')
    ag.info(detector.extra.keys())
    ag.info('eps', detector.eps)

    #ag.info("THIS IS SO TEMPORARY!!!!!")
    if 'weights' in detector.extra:
        #detector.indices = None

        ag.info(detector.standardization_info)
        #try:
        #    detector.standardization_info[0]['std'] = 1.0
        #except TypeError:
        #    detector.standardization_info = [dict(std=1.0, mean=0.0)]
        ag.info('corner2', detector.extra['weights'][0][0,0,:5])

    return detector 
def _process_file_kernel_basis(seed, mixcomp, settings, bb, filename, bkg_stack, bkg_stack_num):
    ag.info("Processing file ", filename)
    im_size = settings['detector']['image_size']
    size = gv.bb.size(bb)

    # Use the same seed for all mixture components! That will make them easier to compare,
    # without having to sample to infinity.


    # HERE: Make it possible to input data directly!
    descriptor = gv.load_descriptor(settings)

    part_size = descriptor.settings['part_size']
    radii = settings['detector']['spread_radii']
    psize = settings['detector']['subsample_size']
    rotspread = settings['detector'].get('rotation_spreading_radius', 0)
    crop_image = settings['detector'].get('crop_image')
    cb = settings['detector'].get('crop_border')

    #sh = (size[0] // psize[0], size[1] // psize[1])
    sh = gv.sub.subsample_size_new((size[0]-4, size[1]-4), psize)

    all_pos_feats = []

    F = descriptor.num_features

    # No coding is also included
    counts = np.zeros(sh + (F + 1, F), dtype=np.int64)
    empty_counts = np.zeros((F + 1, F), dtype=np.int64)
    totals = 0

    sett = dict(spread_radii=radii, subsample_size=psize, rotation_spreading_radius=rotspread, crop_border=cb)


    alpha_maps = []

    gray_im, alpha = _load_cad_image(filename, im_size, bb, crop=crop_image)

    pad = (radii[0] + 2, radii[1] + 2)

    padded_gray_im = ag.util.zeropad(gray_im, pad)
    padded_alpha = ag.util.zeropad(alpha, pad)

    dups = 5
    X_pad_size = (part_size[0] + pad[0] * 2, part_size[1] + pad[1] * 2)

    bkgs = np.empty(((F + 1) * dups,) + X_pad_size) 

    rs = np.random.RandomState(seed)

    for f in xrange(F + 1):
    #for f in xrange(1):
        num = bkg_stack_num[f]

        for d in xrange(dups):
            bkg_i = rs.randint(num)
            bkgs[f*dups+d] = bkg_stack[f,bkg_i]

    # Do it with no superimposed image, to see what happens to pure background
    img_with_bkgs = bkgs
    #ex = descriptor.extract_features(img_with_bkgs[0], settings=sett)
    parts = np.asarray([descriptor.extract_features(im, settings=sett)[0,0] for im in img_with_bkgs])

    for f in xrange(F + 1):
        hist = parts[f*dups:(f+1)*dups].sum(0)
        empty_counts[f] += hist

    if 1:
        for i, j in itr.product(xrange(sh[0]), xrange(sh[1])):
            selection = [slice(i * psize[0], i * psize[0] + X_pad_size[0]), slice(j * psize[1], j * psize[1] + X_pad_size[1])]

            patch = padded_gray_im[selection]
            alpha_patch = padded_alpha[selection]

            patch = patch[np.newaxis]
            alpha_patch = alpha_patch[np.newaxis]

            img_with_bkgs = patch * alpha_patch + bkgs * (1 - alpha_patch)
            
            #ex = descriptor.extract_features(img_with_bkgs[0], settings=sett)
            parts = np.asarray([descriptor.extract_features(im, settings=sett)[0,0] for im in img_with_bkgs])

            #counts[i,j] += parts
            for f in xrange(F + 1):
            #for f in xrange(1):
                #hist = np.bincount(parts[f*dups:(f+1)*dups].ravel(), minlength=F + 1)
                hist = parts[f*dups:(f+1)*dups].sum(0)
                counts[i,j,f] += hist

    totals += dups 

    #support = alpha_maps.mean(axis=0)

    return counts, empty_counts, totals
def background_adjust_model(settings, bkg_stack, bkg_stack_num, seed=0, threading=True):
    offset = settings["detector"].get("train_offset", 0)
    limit = settings["detector"].get("train_limit")
    num_mixtures = settings["detector"]["num_mixtures"]
    assert limit is not None, "Must specify limit in the settings file"
    duplicates = settings["detector"].get("duplicates", 1)
    files = sorted(glob.glob(settings["detector"]["train_dir"]))[offset : offset + limit]

    # try:
    #    detector = gv.Detector.load(settings['detector']['file'])
    # except KeyError:
    #    raise Exception("Need to train the model first")

    # Create accumulates for each mixture component
    # TODO: Temporary until multicomp
    # counts = np.zeros_like(detector.kernel_templates)

    # num_files = len(files)
    # num_duplicates = settings['detector'].get('duplicate', 1)

    # Create several random states, so it's easier to measure
    # the influence of certain features

    # Setup unspread bedges settings
    # X_pad_size = padded_theta.shape[1:3]

    # for fn in files:
    # counts += _process_file(settings, bkg_stack, bkg_stack_num, fn)

    # Train a mixture model to get a clustering of the angles of the object

    descriptor = gv.load_descriptor(settings)

    if 0:
        detector = gv.BernoulliDetector(num_mixtures, descriptor, settings["detector"])
        detector.train_from_images(files)

        plt.clf()
        ag.plot.images(detector.support)
        plt.savefig("output/components.png")

        comps = detector.mixture.mixture_components()
        each_mix_N = np.bincount(comps, minlength=num_mixtures)

    comps = np.zeros(len(files))

    argses = [(settings, bkg_stack, bkg_stack_num, files[i], comps[i]) for i in xrange(len(files))]

    # Iterate images
    all_counts = gv.parallel.imap(_process_file_star, argses)

    # Can dot this instead:
    counts = sum(all_counts)

    # Divide accmulate to get new distribution
    # counts /= num_files

    # Create a new model, with this distribution
    # new_detector = detector.copy()

    # new_detector.kernel_templates = counts
    # new_detector.support = None
    # new_detector.use_alpha = False

    # Return model
    # return new_detector
    return counts, each_mix_N * duplicates, detector.support, detector.mixture
Exemple #8
0
def get_bkg_stack(settings, X_pad_size, M=20):
    descriptor = gv.load_descriptor(settings)

    bsettings = settings['edges'].copy()
    radius = bsettings['radius']
    bsettings['radius'] = 0

    descriptor_name = settings['detector']['descriptor']

    #neg_filenames= sorted(glob.glob(os.path.join(os.environ['UIUC_DIR'], 'TrainImages', 'neg-*.pgm')))
    neg_filenames = sorted(
        glob.glob(os.path.expandvars(
            settings[descriptor_name]['image_dir']))) * 100

    gen_raw = generate_random_patches(neg_filenames,
                                      X_pad_size,
                                      0,
                                      per_image=25)

    print descriptor.num_parts
    bkg_stack_num = np.zeros(descriptor.num_parts + 1)
    bkg_stack = np.zeros((
        descriptor.num_parts + 1,
        M,
    ) + X_pad_size)

    psize = settings['detector']['subsample_size']
    radii = settings['detector']['spread_radii']
    sett = dict(subsample_size=psize, spread_radii=radii)

    i = 0
    import matplotlib.pylab as plt
    N = 100000
    for patch in gen_raw:
        #edges = ag.features.bedges(patch, **bsettings)

        #plt.imshow(patch, interpolation='nearest', cmap=plt.cm.gray)
        #plt.show()

        #X_pad_spread = ag.features.bspread(edges, spread=bsettings['spread'], radius=radius)

        padding = pad - 2
        #X_spread = X_pad_spread[padding:-padding,padding:-padding]

        # Code parts
        #parts = descriptor.extract_parts(X_spread.astype(np.uint8), edges, settings=sett)
        parts = descriptor.extract_features(patch, settings=sett)

        # Accumulate and return
        if parts[0, 0].sum() == 0:
            f = 0
        else:
            f = np.argmax(parts[0, 0]) + 1
            #cc[f] += 1

        # The i%10 is to avoid all background images for f=0 to be from the same image (and thus
        # likely overlapping patches)
        if bkg_stack_num[f] < M and (f != 0 or i % 10 == 0):
            bkg_stack[f, bkg_stack_num[f]] = patch
            bkg_stack_num[f] += 1

        if i % 10000 == 0:
            print i, bkg_stack_num
            if bkg_stack_num.min() == M:
                break
        i += 1
        if i == N:
            break

    #print 'i', i

    #print 'min', sorted(cc)[:10]
    #cc /= N
    #print cc[:10]
    #print bkg[:10]

    assert i != 0, "No images found"

    #print cc.sum()
    #print bkg.sum()
    return bkg_stack, bkg_stack_num
def get_bkg_stack(settings, X_pad_size, M=20):
    descriptor = gv.load_descriptor(settings)

    bsettings = settings['edges'].copy()
    radius = bsettings['radius']
    bsettings['radius'] = 0

    descriptor_name = settings['detector']['descriptor']

    #neg_filenames= sorted(glob.glob(os.path.join(os.environ['UIUC_DIR'], 'TrainImages', 'neg-*.pgm')))
    neg_filenames = sorted(glob.glob(os.path.expandvars(settings[descriptor_name]['image_dir']))) * 100 

    gen_raw = generate_random_patches(neg_filenames, X_pad_size, 0, per_image=25) 

    print descriptor.num_parts
    bkg_stack_num = np.zeros(descriptor.num_parts + 1)
    bkg_stack = np.zeros((descriptor.num_parts + 1, M,) + X_pad_size)

    psize = settings['detector']['subsample_size']
    radii = settings['detector']['spread_radii'] 
    sett = dict(subsample_size=psize, spread_radii=radii)

    i = 0
    import matplotlib.pylab as plt
    N = 100000
    for patch in gen_raw:
        #edges = ag.features.bedges(patch, **bsettings)

        #plt.imshow(patch, interpolation='nearest', cmap=plt.cm.gray)
        #plt.show()

        #X_pad_spread = ag.features.bspread(edges, spread=bsettings['spread'], radius=radius)

        padding = pad - 2
        #X_spread = X_pad_spread[padding:-padding,padding:-padding]

        # Code parts 
        #parts = descriptor.extract_parts(X_spread.astype(np.uint8), edges, settings=sett)
        parts = descriptor.extract_features(patch, settings=sett)

        # Accumulate and return
        if parts[0,0].sum() == 0:
            f = 0 
        else:
            f = np.argmax(parts[0,0]) + 1
            #cc[f] += 1

        # The i%10 is to avoid all background images for f=0 to be from the same image (and thus
        # likely overlapping patches)
        if bkg_stack_num[f] < M and (f != 0 or i%10 == 0):
            bkg_stack[f,bkg_stack_num[f]] = patch
            bkg_stack_num[f] += 1

        if i % 10000 == 0:
            print i, bkg_stack_num
            if bkg_stack_num.min() == M:
                break
        i += 1
        if i == N: 
            break

    #print 'i', i

    #print 'min', sorted(cc)[:10] 
    #cc /= N
    #print cc[:10]
    #print bkg[:10]

    assert i != 0, "No images found"

    #print cc.sum()
    #print bkg.sum()
    return bkg_stack, bkg_stack_num
def background_adjust_model(settings, bkg, seed=0):
    offset = settings['detector'].get('train_offset', 0)
    limit = settings['detector'].get('train_limit')
    files = sorted(glob.glob(settings['detector']['train_dir']))[
        offset:limit]  # * settings['detector'].get('duplicate', 1)

    try:
        detector = gv.Detector.load(settings['detector']['file'])
    except KeyError:
        raise Exception("Need to train the model first")

    # We need the descriptor to generate and manipulate images
    descriptor = gv.load_descriptor(settings)

    sh = (28, 88)

    # Create accumulates for each mixture component
    # TODO: Temporary until multicomp
    #counts = np.zeros_like(detector.kernel_templates)
    counts = np.zeros((1, sh[0], sh[1], descriptor.num_parts))

    num_files = len(files)
    num_duplicates = settings['detector'].get('duplicate', 1)

    # Create several random states, so it's easier to measure
    # the influence of certain features
    prnds = [np.random.RandomState(seed + i) for i in xrange(10)]

    # Setup unspread bedges settings
    bsettings = settings['edges'].copy()
    radius = bsettings['radius']
    bsettings['radius'] = 0
    padding = radius

    locations0 = xrange(sh[0])
    locations1 = xrange(sh[1])

    padded_theta = descriptor.unspread_parts_padded

    X_pad_size = padded_theta.shape[1:3]

    for fn in files:
        ag.info("Processing file", fn)

        # Which mixture component does this image belong to?
        # TODO: Temporary until multicomp
        mixcomp = 0  #np.argmax(detector.affinities

        # Binarize support and Extract alpha
        color_img, alpha = gv.img.load_image_binarized_alpha(fn)
        img = gv.img.asgray(color_img)

        alpha_pad = ag.util.zeropad(alpha, padding)
        inv_alpha_pad_expanded = np.expand_dims(~alpha_pad, -1)

        # Iterate every duplicate
        for loop in xrange(num_duplicates):
            ag.info("Iteration {0}/{1}".format(loop + 1, num_duplicates))
            # Superimpose onto gray background
            graymap = create_graymap(img.shape, loop / (num_duplicates - 1),
                                     prnds[0])

            # Composite
            img_with_gray = composite(img, graymap, alpha)

            # Retrieve unspread edges (with a given background gray level)
            edges = ag.features.bedges(img_with_gray, **bsettings)

            # Pad the edges
            edges_pad = ag.util.zeropad(edges, (padding, padding, 0))

            for i, j in product(locations0, locations1):
                selection = [
                    slice(i, i + X_pad_size[0]),
                    slice(j, j + X_pad_size[1])
                ]
                X_pad = edges_pad[selection].copy()
                nA_pad = inv_alpha_pad_expanded[selection]

                # Draw background part from categorical distribution
                f_bkg = weighted_choice_unit(bkg, prnds[1])
                probs_bkg = get_probs(padded_theta, f_bkg)
                probs = nA_pad * probs_bkg

                # Iterate over all locations

                # Draw from background edge probability over ~alpha
                X_pad |= (prnds[2].rand(*probs.shape) < probs)

                # Do spreading
                X_pad_spread = ag.features.bspread(X_pad,
                                                   spread=bsettings['spread'],
                                                   radius=radius)

                # De-pad
                X_spread = X_pad_spread[padding:-padding, padding:-padding]

                # Code parts
                parts = descriptor.extract_parts(X_spread.astype(np.uint8))

                # Accumulate and return
                counts[mixcomp, i, j] += parts[0, 0]
    """
    if 0:
        from multiprocessing import Pool
        p = Pool(7)
        mapf = p.map
    else:
        mapf = map
    def _process_file(fn): 
        return _process_file_full(fn, sh, descriptor, detector)

    # Iterate images
    all_counts = mapf(_process_file, files)

    for counti in all_counts:
        counts += counti
    """

    # Divide accmulate to get new distribution
    counts /= num_files * num_duplicates

    # Create a new model, with this distribution
    new_detector = detector.copy()

    new_detector.kernel_templates = counts
    new_detector.support = None
    new_detector.use_alpha = False

    # Return model
    return new_detector
def arrange_model(pos, settings, config, offset=None, mods=None):
    if offset is None:
        offset = settings['detector'].get('train_offset', 0)
    limit = settings['detector'].get('train_limit')
    if limit is not None:
        limit += offset

    nospreading = config.startswith('cor')

    files = sorted(glob.glob(settings['detector']['train_dir']))[offset:limit] * settings['detector'].get('duplicate', 1)
    def _load(fn):
        return load_and_crop(fn, pos)
    alpha_and_images = map(_load, files)
    if alpha_and_images[0][0] is None:
        alpha = None
        all_alphas = None
    else:
        all_alphas = np.asarray(map(itemgetter(0), alpha_and_images))
        #all_alphas = np.asarray(map(lambda x: x[0], alpha_and_images))
        side = 9+PAD*2
        alpha_padded = all_alphas[:,2:-2,2:-2].mean(axis=0)
        alphas_padded = all_alphas[:,2:-2,2:-2]
        alpha = all_alphas[:,side//2-4:side//2+1+4,side//2-4:side//2+1+4].mean(axis=0)

    if 0 and PLOT and alpha is not None:
        plt.clf()
        ag.plot.images([alpha])
        plt.savefig('outs/alpha.png')
    #np.save('_alpha.npy', alpha) 
    
    images = np.asarray(map(itemgetter(1), alpha_and_images))

    size = (9+PAD*2,)*2

    if config.startswith('bkg'):
        seed = int(config[3:])
        neg_gen = generate_random_patches(neg_filenames, size, seed=seed)
        for i in xrange(len(images)):
            # Superimpose it onto the negative patch
            images[i] = neg_gen.next()
        
    elif config.startswith('sup'):
        seed = int(config[3:])
        neg_gen = generate_random_patches(neg_filenames, size, seed=seed)
        for i in xrange(len(images)):
            # Superimpose it onto the negative patch
            images[i] = composite(images[i], neg_gen.next(), all_alphas[i])
    elif config == 'none' or config.startswith('cor'):
        # Add gray background
        if 1:
            D = settings['detector'].get('duplicate', 1)
            c = 0
            for i in xrange(len(images)//D):
                for j in xrange(D):
                    gray = np.ones_like(images[c]) * j / (D - 1)
                    gray = np.clip(gray + np.random.randn(*gray.shape) * 0.0001, 0, 1)
                    images[c] = composite(images[c], gray, all_alphas[c])
                    c += 1
    else:
        raise ValueError("Unknown config: {0}".format(config))
    
    setts = settings['edges'].copy()

    if nospreading:
        setts['radius'] = 0
        all_edges_unspread = ag.features.bedges(images, **setts)
        edge_patch_unspread = all_edges_unspread[:,BUF:-BUF,BUF:-BUF].astype(np.bool)
    else:
        edge_patch_unspread = None
    all_edges = ag.features.bedges(images, **settings['edges'])
    #edgies = ag.features.bedges(images, **settings['edges'])[:,1:-1,1:-1]

    #edges = ag.features.bedges(images, **settings['edges'])
    descriptor = gv.load_descriptor(settings) 

    #radii = settings['detector']['spread_radii']

    
    #feats = np.asarray(map(descriptor.extract_features, images))

    edge_patch = all_edges[:,BUF:-BUF,BUF:-BUF]

    #if mods is not None:
        #blackout = np.load('_blackout.npy')
        #blackin = np.load('_blackin.npy')

    #    mask = ~(mods.mean(axis=0).mean(axis=0) > 0.00001)
        
        #mask = ag.util.zeropad(~((blackout > 0) | (blackin > 0)), (1, 1, 0))
        #mask = ag.util.zeropad(((mods > 0.0001) | (blackin > 0.0001)), (1, 1, 0))
        #mask = ag.util.zeropad(~((blackin > 0)), (1, 1, 0))
        #edge_patch &= mask 
        

    #if 0 and PLOT:
    #    edges = all_edges#ag.features.bedges(images, **settings['edges'])
    #    edges_ = np.rollaxis(edges, 3, start=1)
    #    pledges = edges_.reshape((np.prod(edges_.shape[:2]),) + edges_.shape[2:])
#
#        #ag.plot.images([alpha])
#
#        #print edges.shape
#        plt.clf()
#        ag.plot.images(pledges[:,1:-1,1:-1], subplots=edges_.shape[:2], show=False)
#        plt.savefig('outs/edges-{0}.png'.format(config))


    feats = np.asarray(map(descriptor.extract_parts, edge_patch))

    return {
        'settings': settings, 
        'theta': feats[:,0,0].mean(axis=0), 
        'alpha': alpha,
        'alpha_padded': alpha_padded,
        'alphas_padded': alphas_padded,
        'edges': edge_patch.astype(np.bool),
        'edges_unspread': edge_patch_unspread,
    }
def correct_model(model, bkg=None, model_bkg=None, seed=0, mods=None):
    settings = model['settings']
    feats = model['theta']
    alpha = model['alpha']
    alpha_padded = model['alpha_padded']
    descriptor = gv.load_descriptor(settings)
    N = settings['detector']['train_limit'] * settings['detector']['duplicate']
    num_features = feats.size
    part_counts = np.zeros(num_features)
    num_edges = 4 

    USE_UNSPREAD = True 
    if USE_UNSPREAD:
        #edges = model['edges_unspread']
        edges = model['edges']
    else:
        edges = model['edges']


    if alpha is None:
        alpha = np.ones((9, 9))
    p_alpha = alpha
    p_alpha_padded = alpha_padded
    p_kernel = feats 
    if bkg is not None:
        good_back = p_back = bkg
    else:
        good_back = p_back = np.load('bkg2_nospread.npy')

    #ealpha = np.load('_edges.npy').astype(np.bool)

    Xs = []

    #blackout0 = np.load('_blackout.npy')
    #blackin0 = np.load('_blackin.npy')

    #bm = ag.stats.BernoulliMixture.load('_mix.npy') 
    #mods = np.load('_mods.npy') 


    neg_gen = generate_random_patches(neg_filenames, (9+PAD*2, 9+PAD*2), seed=seed)

    if USE_UNSPREAD:
        #theta = descriptor.parts
        """
        new_theta = np.ones(descriptor.parts.shape)
        #theta = 1 - (1 - descriptor.parts)**(1/9)
        sh = descriptor.parts.shape[1:]
        def cliprange(k, size):
            return xrange(max(0, k-1), min(size, k+2))
        for i in xrange(sh[0]):
            for j in xrange(sh[1]):
                for x in cliprange(i, sh[0]):
                    for y in cliprange(j, sh[1]):
                        new_theta[:,i,j] *= 1 - theta[:,x,y]

        new_theta = 1 - new_theta**(1/81)

        plt.clf()
        ag.plot.images([theta[160,...,0], new_theta[160,...,0]])
        plt.savefig('outs/debug.png')
        """
        #theta = new_theta
        #  theta = 1 - (1 - theta)**(1/9)
        padding = settings['edges']['radius']
        #padded_theta = ag.util.border_value_pad(descriptor.unspread_parts, (0, padding, padding, 0))
        padded_theta = descriptor.unspread_parts_padded
    else:
        theta = descriptor.parts

    cumX = None

    IN = 10 # Inner loop

    #import ipdb; ipdb.set_trace()

    FIXED_OBJ = True
    for loop in xrange(N): 
        randgen = np.random.RandomState(seed+loop)
        randgen2 = np.random.RandomState(seed+loop + 4)
        randgen3 = np.random.RandomState(seed+loop + 23)
        randgen4 = np.random.RandomState(seed+loop + 100)
        randgen5 = np.random.RandomState(seed+loop + 231)
        randgen6 = np.random.RandomState(seed+loop + 232)
        randgen7 = np.random.RandomState(seed+loop + 232)

        for inner_loop in xrange(IN):
            #if loop % 1000 == 0:
            #    print 'loop', loop
            if not FIXED_OBJ:
                f_obj = weighted_choice_unit(p_kernel, randgen)
                probs_obj = get_probs(theta, f_obj)

                parts = descriptor.extract_parts(edges[loop].astype(np.uint8))[0,0]
                if parts.sum() > 0:
                    f_obj = np.argmax(parts)
                else:
                    f_obj = -1

            #import pdb; pdb.set_trace()
            f_bkg = weighted_choice_unit(good_back, randgen)
            if USE_UNSPREAD:
                probs_bkg = get_probs(padded_theta, f_bkg) 
            else:
                probs_bkg = get_probs(theta, f_bkg) 

    
            if 1:
                # Draw from the alpha
                #A = (randgen2.rand(*p_alpha.shape) < p_alpha).astype(np.uint8) 
                #print p_alpha
                if USE_UNSPREAD:
                    #A = (randgen2.rand() < p_alpha_padded)
                    A = model['alphas_padded'][loop]
                else:
                    A = (randgen2.rand() < p_alpha)
                #A = (0.5 < p_alpha).astype(np.uint8)


                #if FIXED_OBJ:
                    #A = ~ag.util.inflate2d(~A, np.ones((3, 3)))         

                #AA = A.reshape(A.shape + (1,)).astype(np.bool)

                #print 'AA:', AA.sum()

                """
                if 0 and loop <= 5:
                    plt.clf()
                    ag.plot.images([AA[...,0]]) 
                    plt.savefig('outs/alpha-{1}-{0}.png'.format(inner_loop, loop))
                """

                if FIXED_OBJ:
                    if not USE_UNSPREAD:
                         
                        #A = ~ag.util.inflate2d(~A, np.ones((3, 3))).astype(np.bool)
                        Ab = np.tile(np.expand_dims(A, -1), 4)
                        Ab = ~ag.features.bspread(~Ab, spread=settings['edges']['spread'], radius=settings['edges']['radius']).astype(np.bool)
                        AA = Ab
                    else:
                        AA = np.expand_dims(A, -1).astype(np.bool)

                if USE_UNSPREAD:
                    #AApad = ag.util.border_value_pad(AA, (padding, padding, 0))
                    AApad = AA


                if FIXED_OBJ:
                    #probs_mixed = ag.util.inflate2d(~AA, np.ones((17, 17))) * probs_bkg 
                    if USE_UNSPREAD:
                        probs_mixed = ~AApad * probs_bkg
                    else:
                        probs_mixed = ~AA * probs_bkg
                    #probs_mixed = ~AA * probs_bkg
                else:
                    probs_mixed = AA * probs_obj + ~AA * probs_bkg 

                """
                if 0 and loop <= 5:
                    plt.clf()
                    ag.plot.images([probs_mixed[...,0]==0]) 
                    plt.savefig('outs/alpha-{1}-{0}b.png'.format(inner_loop, loop))
                """



                if 1:
                #if f_obj != -1:# or f_bkg != -1:

                    #print probs_mixed.shape
                    if not FIXED_OBJ:
                        X = (randgen3.rand(*probs_mixed.shape) < probs_mixed)

                    else:
                        if USE_UNSPREAD:
                            X = np.zeros((9+padding*2, 9+padding*2, 4), dtype=np.bool)
                        else:
                            X = np.zeros(edges.shape[1:], dtype=np.bool)
                        X0 = X.copy()
        
                        if 0:
                            Y = model_bkg['edges'][loop]
                            X |= ~AA & Y
                            
                            # What f_bkg is this?
                            f_bkg = np.argmax(descriptor.extract_parts(Y.astype(np.uint8))[0,0])
                        elif f_bkg != -1:
                            # Draw samples from the mixture components
                            X |= (randgen3.rand(*X.shape) < probs_mixed)
                            #print 'bkg:', X.sum()
                            #X = (randgen3.rand() < probs_mixed).astype(np.uint8)
                            #X = (1 - AA) * ag.features.bedges(neg_gen.next(), **settings['edges'])[1:-1,1:-1] 
                            X0 = X.copy()
                        #X[1:-1,1:-1] |= edges[loop] 

                    #X *= (1 - ealpha)
                    #r = randgen4.uniform(0, 0.5)
                    #mask = ~ealpha | ~(randgen4.rand(*X.shape) < 0.28)

                    #print '----'
                    #print np.rollaxis(mask, 2)
                    #X &= mask 

                    #X &= ((blackout0 > 0.0001) | (blackin0 > 0.0001)) 
        
                    #print 'sum:', np.sum(~(X ^ X0))
                
                    # Draw which blackout/in component
                    if 0:
                        f_comp = weighted_choice_unit(bm.weights, randgen6)
                        assert f_comp >= 0  
                        blackout = bm.templates[f_comp,0]
                        blackin = bm.templates[f_comp,1]
                    elif 0 and f_bkg != -1:
                        blackout = mods[f_bkg,0]
                        blackin = mods[f_bkg,1]
                        
                        mask = ~(randgen4.rand(*X.shape) < blackout)
                        mask2 = (randgen5.rand(*X.shape) < blackin)
                        #mask = ~(randgen4.rand() < blackout)
                        #Xmask = X & mask
                        Xmask2 = ~X & mask2
                        
                        X &= mask 
                        X |= Xmask2
                        #if randgen7.rand()>0.5:
                        #else:
                            #X &= mask 
                            #X |= mask2

                    
                    #mask = ~(mods.mean(axis=0).mean(axis=0) > 0.00001)
                    #X &= mask

                    #X &= ~((blackout > 0) | (blackin > 0)) 
                    #X &= ~((blackin > 0)) 

                    if loop == 0:
                        plt.clf()
                        ag.plot.images(np.rollaxis(X, 2))
                        plt.savefig('outs/pre-{0}.png'.format(inner_loop))
                    
                    # Now, do edge spreading!
                    if USE_UNSPREAD:
                        X = ag.features.bspread(X, spread=settings['edges']['spread'], radius=settings['edges']['radius'])

                        # Now, take the window
                        X = X[padding:-padding,padding:-padding]
                    
                    X |= edges[loop]

                    if loop == 0:
                        plt.clf()
                        ag.plot.images(np.rollaxis(X, 2))
                        plt.savefig('outs/post-{0}.png'.format(inner_loop))

                    #if PLOT:
                    #    Xs.append(X)    
                    if cumX is None:
                        cumX = X.astype(int)
                    else:
                        cumX += X

                    #print X
                    
                    parts = descriptor.extract_parts(X.astype(np.uint8))[0,0]

                    if parts.sum() > 0:
                        f_res = np.argmax(parts)
                    else:
                        f_res = -1
        
                    #print 'bkg: {0}, obj: {1}, res: {2}'.format(f_bkg, f_obj, f_res)
                    
                    part_counts += parts 

                if 0:
                    if X.sum() >= settings['parts']['threshold']:

                        # Check which part this is most similar to
                        scores = np.apply_over_axes(np.sum, X * np.log(descriptor.parts) + (1 - X) * np.log(1 - descriptor.parts), [1, 2, 3]).ravel()
                        f_best = np.argmax(scores)
                        #f_best = np.argmax(np.apply_over_axes(np.sum, np.fabs(self.descriptor.parts - X), [1, 2, 3]).ravel())
                        part_counts[f_best] += 1
                    
                        
                        #p = _integrate(integral_aa_log[mixcomp], i, j, i+istep, j+jstep)

                #if f_bkg != -1:
                #    part_counts[f_bkg] += 1

                #part_counts += descriptor.extract_parts(X)[0,0]
                # Or do it this way:
                #feats = descriptor.extract_parts(X)
                #print f_best, feats

    new_feats = part_counts / (N * IN)

    if PLOT:
        plt.clf()
        ag.plot.images(np.rollaxis(cumX, 2)/(N * IN))
        plt.savefig('outs/mean-cor.png')

    if PLOT and 0:
        plt.clf()
        Xs = np.asarray(Xs)
        Xs_ = np.rollaxis(Xs, 3, start=1)
        plXs = Xs_.reshape((np.prod(Xs_.shape[:2]),) + Xs_.shape[2:])
        ag.plot.images(plXs, subplots=Xs_.shape[:2], show=False)
        plt.savefig('outs/corrected.png')

    new_model = model.copy()
    new_model['theta'] = new_feats
    new_model['alpha'] = None
    new_model['alpha_padded'] = None
    return new_model
def _process_file(settings, bkg_stack, bkg_stack_num, fn, mixcomp):
    ag.info("Processing file", fn)
    seed = np.abs(hash(fn) % 123124)
    descriptor_name = settings['detector']['descriptor']
    img_size = settings['detector']['image_size']
    part_size = settings[descriptor_name]['part_size']
    psize = settings['detector']['subsample_size']

    # The 4 is for the edge border that falls off
    #orig_sh = (img_size[0] - part_size[0] - 4 + 1, img_size[1] - part_size[1] - 4 + 1)
    orig_sh = img_size
    sh = gv.sub.subsample_size(np.ones(orig_sh), psize)

    # We need the descriptor to generate and manipulate images
    descriptor = gv.load_descriptor(settings)

    counts = np.zeros((settings['detector']['num_mixtures'], sh[0], sh[1],
                       descriptor.num_parts + 1, descriptor.num_parts),
                      dtype=np.uint16)

    prnds = [np.random.RandomState(seed + i) for i in xrange(5)]

    # Binarize support and Extract alpha
    #color_img, alpha = gv.img.load_image_binarized_alpha(fn)
    color_img = gv.img.load_image(fn)

    from skimage.transform import pyramid_reduce, pyramid_expand
    f = color_img.shape[0] / settings['detector']['image_size'][0]
    if f > 1:
        color_img = pyramid_reduce(color_img, downscale=f)
    elif f < 1:
        color_img = pyramid_expand(color_img, upscale=1 / f)

    alpha = color_img[..., 3]
    img = gv.img.asgray(color_img)

    # Resize it
    # TODO: This only looks at the first axis

    assert img.shape == settings['detector'][
        'image_size'], "Target size not achieved: {0} != {1}".format(
            img.shape, settings['detector']['image_size'])

    # Settings
    bsettings = settings['edges'].copy()
    radius = bsettings['radius']
    bsettings['radius'] = 0

    #offsets = gv.sub.subsample_offset_shape(sh, psize)

    #locations0 = xrange(offsets[0], sh[0], psize[0])
    #locations1 = xrange(offsets[1], sh[1], psize[1])
    locations0 = xrange(sh[0])
    locations1 = xrange(sh[1])
    #locations0 = xrange(10-4, 10+5)
    #locations1 = xrange(10-4, 10+5)

    #locations0 = xrange(10, 11)
    #locations1 = xrange(10, 11)

    #padded_theta = descriptor.unspread_parts_padded

    #pad = 10
    pad = 5
    size = settings[descriptor_name]['part_size']
    X_pad_size = (size[0] + pad * 2, size[1] + pad * 2)

    img_pad = ag.util.zeropad(img, pad)

    alpha_pad = ag.util.zeropad(alpha, pad)

    # Iterate every duplicate

    dups = settings['detector'].get('duplicates', 1)

    bkgs = np.empty(((descriptor.num_parts + 1) * dups, ) + X_pad_size)
    #cads = np.empty((descriptor.num_parts,) + X_pad_size)
    #alphas = np.empty((descriptor.num_parts,) + X_pad_size, dtype=np.bool)

    radii = settings['detector']['spread_radii']
    psize = settings['detector']['subsample_size']
    cb = settings['detector'].get('crop_border')
    sett = dict(spread_radii=radii, subsample_size=psize, crop_border=cb)

    plt.clf()
    plt.imshow(img)
    plt.savefig('output/img.png')

    if 0:
        # NEW{
        totfeats = np.zeros(sh + (descriptor.num_parts, ) * 2)
        for f in xrange(descriptor.num_parts):
            num = bkg_stack_num[f]

            for d in xrange(dups):
                feats = np.zeros(sh + (descriptor.num_parts, ), dtype=np.uint8)

                for i, j in itr.product(locations0, locations1):
                    x = i * psize[0]
                    y = i * psize[1]

                    bkg_i = prnds[4].randint(num)
                    bkg = bkg_stack[f, bkg_i]

                    selection = [
                        slice(x, x + X_pad_size[0]),
                        slice(y, y + X_pad_size[1])
                    ]
                    #X_pad = edges_pad[selection].copy()
                    patch = img_pad[selection]
                    alpha_patch = alpha_pad[selection]

                    #patch = np.expand_dims(patch, 0)
                    #alpha_patch = np.expand_dims(alpha_patch, 0)

                    # TODO: Which one?
                    #img_with_bkg = patch + bkg * (1 - alpha_patch)
                    img_with_bkg = patch * alpha_patch + bkg * (1 -
                                                                alpha_patch)

                    edges_pads = ag.features.bedges(img_with_bkg, **bsettings)
                    X_pad_spreads = ag.features.bspread(
                        edges_pads, spread=bsettings['spread'], radius=radius)

                    padding = pad - 2
                    X_spreads = X_pad_spreads[padding:-padding:,
                                              padding:-padding]

                    partprobs = ag.features.code_parts(
                        X_spreads, descriptor._log_parts,
                        descriptor._log_invparts,
                        descriptor.settings['threshold'],
                        descriptor.settings['patch_frame'])

                    part = partprobs.argmax()
                    if part > 0:
                        feats[i, j, part - 1] = 1

                # Now spread the parts
                feats = ag.features.bspread(feats, spread='box', radius=2)

                totfeats[:, :, f] += feats

        # }

        kernels = totfeats[:, :, 0].astype(
            np.float32) / (descriptor.num_parts * dups)

        # Subsample kernels
        sub_kernels = gv.sub.subsample(kernels, psize, skip_first_axis=False)

        np.save('tmp2.npy', sub_kernels)
        print 'saved tmp2.npy'
        import sys
        sys.exit(0)

    #ag.info("Iteration {0}/{1}".format(loop+1, num_duplicates))
    #ag.info("Iteration")
    for i, j in itr.product(locations0, locations1):
        x = i * psize[0]
        y = i * psize[1]

        print 'processing', i, j
        selection = [slice(x, x + X_pad_size[0]), slice(y, y + X_pad_size[1])]
        #X_pad = edges_pad[selection].copy()
        patch = img_pad[selection]
        alpha_patch = alpha_pad[selection]

        patch = np.expand_dims(patch, 0)
        alpha_patch = np.expand_dims(alpha_patch, 0)

        for f in xrange(descriptor.num_parts + 1):
            num = bkg_stack_num[f]

            for d in xrange(dups):
                bkg_i = prnds[4].randint(num)
                bkgs[f * dups + d] = bkg_stack[f, bkg_i]

        img_with_bkgs = patch * alpha_patch + bkgs * (1 - alpha_patch)

        if 0:
            edges_pads = ag.features.bedges(img_with_bkgs, **bsettings)
            X_pad_spreads = ag.features.bspread(edges_pads,
                                                spread=bsettings['spread'],
                                                radius=radius)

            padding = pad - 2
            X_spreads = X_pad_spreads[:, padding:-padding:, padding:-padding]

        #partprobs = ag.features.code_parts_many(X_spreads, descriptor._log_parts, descriptor._log_invparts,
        #descriptor.settings['threshold'], descriptor.settings['patch_frame'])

        #parts = partprobs.argmax(axis=-1)

        parts = np.asarray([
            descriptor.extract_features(im, settings=sett)[0, 0]
            for im in img_with_bkgs
        ])

        for f in xrange(descriptor.num_parts + 1):
            hist = np.bincount(parts[f * dups:(f + 1) * dups].ravel(),
                               minlength=descriptor.num_parts + 1)
            counts[mixcomp, i, j, f] += hist[1:]

        #import pdb; pdb.set_trace()

        #for f in xrange(descriptor.num_parts):
        #    for d in xrange(dups):
        #        # Code parts
        #        #parts = descriptor.extract_parts(X_spreads[f*dups+d].astype(np.uint8))


#
#                f_plus = parts[f*dups+d]
#                if f_plus > 0:
#tau = self.settings.get('tau')
#if self.settings.get('tau'):
#parts = partprobs.argmax(axis=-1)

# Accumulate and return
#                    counts[mixcomp,i,j,f,f_plus-1] += 1#parts[0,0]

    if 0:
        kernels = counts[:, :, :, 0].astype(
            np.float32) / (descriptor.num_parts * dups)

        import pdb
        pdb.set_trace()

        radii = (2, 2)

        aa_log = np.log(1 - kernels)
        aa_log = ag.util.zeropad(aa_log, (0, radii[0], radii[1], 0))

        integral_aa_log = aa_log.cumsum(1).cumsum(2)

        offsets = gv.sub.subsample_offset(kernels[0], psize)

        if 1:
            # Fix kernels
            istep = 2 * radii[0]
            jstep = 2 * radii[1]
            sh = kernels.shape[1:3]
            for mixcomp in xrange(1):
                # Note, we are going in strides of psize, given a certain offset, since
                # we will be subsampling anyway, so we don't need to do the rest.
                for i in xrange(offsets[0], sh[0], psize[0]):
                    for j in xrange(offsets[1], sh[1], psize[1]):
                        p = gv.img.integrate(integral_aa_log[mixcomp], i, j,
                                             i + istep, j + jstep)
                        kernels[mixcomp, i, j] = 1 - np.exp(p)

        # Subsample kernels
        sub_kernels = gv.sub.subsample(kernels, psize, skip_first_axis=True)

        np.save('tmp.npy', sub_kernels)
        print 'saved tmp.npy'
        import sys
        sys.exit(0)

    if 0:
        for f in xrange(descriptor.num_parts):

            # Pick only one background for this part and file
            num = bkg_stack_num[f]

            # Assumes num > 0

            bkg_i = prnds[4].randint(num)

            bkgmap = bkg_stack[f, bkg_i]

            # Composite
            img_with_bkg = gv.img.composite(patch, bkgmap, alpha_patch)

            # Retrieve unspread edges (with a given background gray level)
            edges_pad = ag.features.bedges(img_with_bkg, **bsettings)

            # Pad the edges
            #edges_pad = ag.util.zeropad(edges, (pad, pad, 0))

            # Do spreading
            X_pad_spread = ag.features.bspread(edges_pad,
                                               spread=bsettings['spread'],
                                               radius=radius)

            # De-pad
            padding = pad - 2
            X_spread = X_pad_spread[padding:-padding, padding:-padding]

            # Code parts
            parts = descriptor.extract_parts(X_spread.astype(np.uint8))

            # Accumulate and return
            counts[mixcomp, i, j, f] += parts[0, 0]

    # Translate counts to spread counts (since we're assuming independence of samples within one CAD image)

    return counts
def background_adjust_model(settings,
                            bkg_stack,
                            bkg_stack_num,
                            seed=0,
                            threading=True):
    offset = settings['detector'].get('train_offset', 0)
    limit = settings['detector'].get('train_limit')
    num_mixtures = settings['detector']['num_mixtures']
    assert limit is not None, "Must specify limit in the settings file"
    duplicates = settings['detector'].get('duplicates', 1)
    files = sorted(glob.glob(
        settings['detector']['train_dir']))[offset:offset + limit]

    #try:
    #    detector = gv.Detector.load(settings['detector']['file'])
    #except KeyError:
    #    raise Exception("Need to train the model first")

    # Create accumulates for each mixture component
    # TODO: Temporary until multicomp
    #counts = np.zeros_like(detector.kernel_templates)

    #num_files = len(files)
    #num_duplicates = settings['detector'].get('duplicate', 1)

    # Create several random states, so it's easier to measure
    # the influence of certain features

    # Setup unspread bedges settings
    #X_pad_size = padded_theta.shape[1:3]

    #for fn in files:
    #counts += _process_file(settings, bkg_stack, bkg_stack_num, fn)

    # Train a mixture model to get a clustering of the angles of the object

    descriptor = gv.load_descriptor(settings)

    if 0:
        detector = gv.BernoulliDetector(num_mixtures, descriptor,
                                        settings['detector'])
        detector.train_from_images(files)

        plt.clf()
        ag.plot.images(detector.support)
        plt.savefig('output/components.png')

        comps = detector.mixture.mixture_components()
        each_mix_N = np.bincount(comps, minlength=num_mixtures)

    comps = np.zeros(len(files))

    argses = [(settings, bkg_stack, bkg_stack_num, files[i], comps[i])
              for i in xrange(len(files))]

    # Iterate images
    all_counts = gv.parallel.imap(_process_file_star, argses)

    # Can dot this instead:
    counts = sum(all_counts)

    # Divide accmulate to get new distribution
    #counts /= num_files

    # Create a new model, with this distribution
    #new_detector = detector.copy()

    #new_detector.kernel_templates = counts
    #new_detector.support = None
    #new_detector.use_alpha = False

    # Return model
    #return new_detector
    return counts, each_mix_N * duplicates, detector.support, detector.mixture
def _process_file(settings, bkg_stack, bkg_stack_num, fn, mixcomp):
    ag.info("Processing file", fn)
    seed = np.abs(hash(fn) % 123124)
    descriptor_name = settings["detector"]["descriptor"]
    img_size = settings["detector"]["image_size"]
    part_size = settings[descriptor_name]["part_size"]
    psize = settings["detector"]["subsample_size"]

    # The 4 is for the edge border that falls off
    # orig_sh = (img_size[0] - part_size[0] - 4 + 1, img_size[1] - part_size[1] - 4 + 1)
    orig_sh = img_size
    sh = gv.sub.subsample_size(np.ones(orig_sh), psize)

    # We need the descriptor to generate and manipulate images
    descriptor = gv.load_descriptor(settings)

    counts = np.zeros(
        (settings["detector"]["num_mixtures"], sh[0], sh[1], descriptor.num_parts + 1, descriptor.num_parts),
        dtype=np.uint16,
    )

    prnds = [np.random.RandomState(seed + i) for i in xrange(5)]

    # Binarize support and Extract alpha
    # color_img, alpha = gv.img.load_image_binarized_alpha(fn)
    color_img = gv.img.load_image(fn)

    from skimage.transform import pyramid_reduce, pyramid_expand

    f = color_img.shape[0] / settings["detector"]["image_size"][0]
    if f > 1:
        color_img = pyramid_reduce(color_img, downscale=f)
    elif f < 1:
        color_img = pyramid_expand(color_img, upscale=1 / f)

    alpha = color_img[..., 3]
    img = gv.img.asgray(color_img)

    # Resize it
    # TODO: This only looks at the first axis

    assert img.shape == settings["detector"]["image_size"], "Target size not achieved: {0} != {1}".format(
        img.shape, settings["detector"]["image_size"]
    )

    # Settings
    bsettings = settings["edges"].copy()
    radius = bsettings["radius"]
    bsettings["radius"] = 0

    # offsets = gv.sub.subsample_offset_shape(sh, psize)

    # locations0 = xrange(offsets[0], sh[0], psize[0])
    # locations1 = xrange(offsets[1], sh[1], psize[1])
    locations0 = xrange(sh[0])
    locations1 = xrange(sh[1])
    # locations0 = xrange(10-4, 10+5)
    # locations1 = xrange(10-4, 10+5)

    # locations0 = xrange(10, 11)
    # locations1 = xrange(10, 11)

    # padded_theta = descriptor.unspread_parts_padded

    # pad = 10
    pad = 5
    size = settings[descriptor_name]["part_size"]
    X_pad_size = (size[0] + pad * 2, size[1] + pad * 2)

    img_pad = ag.util.zeropad(img, pad)

    alpha_pad = ag.util.zeropad(alpha, pad)

    # Iterate every duplicate

    dups = settings["detector"].get("duplicates", 1)

    bkgs = np.empty(((descriptor.num_parts + 1) * dups,) + X_pad_size)
    # cads = np.empty((descriptor.num_parts,) + X_pad_size)
    # alphas = np.empty((descriptor.num_parts,) + X_pad_size, dtype=np.bool)

    radii = settings["detector"]["spread_radii"]
    psize = settings["detector"]["subsample_size"]
    cb = settings["detector"].get("crop_border")
    sett = dict(spread_radii=radii, subsample_size=psize, crop_border=cb)

    plt.clf()
    plt.imshow(img)
    plt.savefig("output/img.png")

    if 0:
        # NEW{
        totfeats = np.zeros(sh + (descriptor.num_parts,) * 2)
        for f in xrange(descriptor.num_parts):
            num = bkg_stack_num[f]

            for d in xrange(dups):
                feats = np.zeros(sh + (descriptor.num_parts,), dtype=np.uint8)

                for i, j in itr.product(locations0, locations1):
                    x = i * psize[0]
                    y = i * psize[1]

                    bkg_i = prnds[4].randint(num)
                    bkg = bkg_stack[f, bkg_i]

                    selection = [slice(x, x + X_pad_size[0]), slice(y, y + X_pad_size[1])]
                    # X_pad = edges_pad[selection].copy()
                    patch = img_pad[selection]
                    alpha_patch = alpha_pad[selection]

                    # patch = np.expand_dims(patch, 0)
                    # alpha_patch = np.expand_dims(alpha_patch, 0)

                    # TODO: Which one?
                    # img_with_bkg = patch + bkg * (1 - alpha_patch)
                    img_with_bkg = patch * alpha_patch + bkg * (1 - alpha_patch)

                    edges_pads = ag.features.bedges(img_with_bkg, **bsettings)
                    X_pad_spreads = ag.features.bspread(edges_pads, spread=bsettings["spread"], radius=radius)

                    padding = pad - 2
                    X_spreads = X_pad_spreads[padding:-padding:, padding:-padding]

                    partprobs = ag.features.code_parts(
                        X_spreads,
                        descriptor._log_parts,
                        descriptor._log_invparts,
                        descriptor.settings["threshold"],
                        descriptor.settings["patch_frame"],
                    )

                    part = partprobs.argmax()
                    if part > 0:
                        feats[i, j, part - 1] = 1

                # Now spread the parts
                feats = ag.features.bspread(feats, spread="box", radius=2)

                totfeats[:, :, f] += feats

        # }

        kernels = totfeats[:, :, 0].astype(np.float32) / (descriptor.num_parts * dups)

        # Subsample kernels
        sub_kernels = gv.sub.subsample(kernels, psize, skip_first_axis=False)

        np.save("tmp2.npy", sub_kernels)
        print "saved tmp2.npy"
        import sys

        sys.exit(0)

    # ag.info("Iteration {0}/{1}".format(loop+1, num_duplicates))
    # ag.info("Iteration")
    for i, j in itr.product(locations0, locations1):
        x = i * psize[0]
        y = i * psize[1]

        print "processing", i, j
        selection = [slice(x, x + X_pad_size[0]), slice(y, y + X_pad_size[1])]
        # X_pad = edges_pad[selection].copy()
        patch = img_pad[selection]
        alpha_patch = alpha_pad[selection]

        patch = np.expand_dims(patch, 0)
        alpha_patch = np.expand_dims(alpha_patch, 0)

        for f in xrange(descriptor.num_parts + 1):
            num = bkg_stack_num[f]

            for d in xrange(dups):
                bkg_i = prnds[4].randint(num)
                bkgs[f * dups + d] = bkg_stack[f, bkg_i]

        img_with_bkgs = patch * alpha_patch + bkgs * (1 - alpha_patch)

        if 0:
            edges_pads = ag.features.bedges(img_with_bkgs, **bsettings)
            X_pad_spreads = ag.features.bspread(edges_pads, spread=bsettings["spread"], radius=radius)

            padding = pad - 2
            X_spreads = X_pad_spreads[:, padding:-padding:, padding:-padding]

        # partprobs = ag.features.code_parts_many(X_spreads, descriptor._log_parts, descriptor._log_invparts,
        # descriptor.settings['threshold'], descriptor.settings['patch_frame'])

        # parts = partprobs.argmax(axis=-1)

        parts = np.asarray([descriptor.extract_features(im, settings=sett)[0, 0] for im in img_with_bkgs])

        for f in xrange(descriptor.num_parts + 1):
            hist = np.bincount(parts[f * dups : (f + 1) * dups].ravel(), minlength=descriptor.num_parts + 1)
            counts[mixcomp, i, j, f] += hist[1:]

        # import pdb; pdb.set_trace()

        # for f in xrange(descriptor.num_parts):
        #    for d in xrange(dups):
        #        # Code parts
        #        #parts = descriptor.extract_parts(X_spreads[f*dups+d].astype(np.uint8))
    #
    #                f_plus = parts[f*dups+d]
    #                if f_plus > 0:
    # tau = self.settings.get('tau')
    # if self.settings.get('tau'):
    # parts = partprobs.argmax(axis=-1)

    # Accumulate and return
    #                    counts[mixcomp,i,j,f,f_plus-1] += 1#parts[0,0]

    if 0:
        kernels = counts[:, :, :, 0].astype(np.float32) / (descriptor.num_parts * dups)

        import pdb

        pdb.set_trace()

        radii = (2, 2)

        aa_log = np.log(1 - kernels)
        aa_log = ag.util.zeropad(aa_log, (0, radii[0], radii[1], 0))

        integral_aa_log = aa_log.cumsum(1).cumsum(2)

        offsets = gv.sub.subsample_offset(kernels[0], psize)

        if 1:
            # Fix kernels
            istep = 2 * radii[0]
            jstep = 2 * radii[1]
            sh = kernels.shape[1:3]
            for mixcomp in xrange(1):
                # Note, we are going in strides of psize, given a certain offset, since
                # we will be subsampling anyway, so we don't need to do the rest.
                for i in xrange(offsets[0], sh[0], psize[0]):
                    for j in xrange(offsets[1], sh[1], psize[1]):
                        p = gv.img.integrate(integral_aa_log[mixcomp], i, j, i + istep, j + jstep)
                        kernels[mixcomp, i, j] = 1 - np.exp(p)

        # Subsample kernels
        sub_kernels = gv.sub.subsample(kernels, psize, skip_first_axis=True)

        np.save("tmp.npy", sub_kernels)
        print "saved tmp.npy"
        import sys

        sys.exit(0)

    if 0:
        for f in xrange(descriptor.num_parts):

            # Pick only one background for this part and file
            num = bkg_stack_num[f]

            # Assumes num > 0

            bkg_i = prnds[4].randint(num)

            bkgmap = bkg_stack[f, bkg_i]

            # Composite
            img_with_bkg = gv.img.composite(patch, bkgmap, alpha_patch)

            # Retrieve unspread edges (with a given background gray level)
            edges_pad = ag.features.bedges(img_with_bkg, **bsettings)

            # Pad the edges
            # edges_pad = ag.util.zeropad(edges, (pad, pad, 0))

            # Do spreading
            X_pad_spread = ag.features.bspread(edges_pad, spread=bsettings["spread"], radius=radius)

            # De-pad
            padding = pad - 2
            X_spread = X_pad_spread[padding:-padding, padding:-padding]

            # Code parts
            parts = descriptor.extract_parts(X_spread.astype(np.uint8))

            # Accumulate and return
            counts[mixcomp, i, j, f] += parts[0, 0]

    # Translate counts to spread counts (since we're assuming independence of samples within one CAD image)

    return counts
def background_adjust_model(settings, bkg, seed=0):
    offset = settings["detector"].get("train_offset", 0)
    limit = settings["detector"].get("train_limit")
    files = sorted(glob.glob(settings["detector"]["train_dir"]))[
        offset:limit
    ]  # * settings['detector'].get('duplicate', 1)

    try:
        detector = gv.Detector.load(settings["detector"]["file"])
    except KeyError:
        raise Exception("Need to train the model first")

    # We need the descriptor to generate and manipulate images
    descriptor = gv.load_descriptor(settings)

    sh = (28, 88)

    # Create accumulates for each mixture component
    # TODO: Temporary until multicomp
    # counts = np.zeros_like(detector.kernel_templates)
    counts = np.zeros((1, sh[0], sh[1], descriptor.num_parts))

    num_files = len(files)
    num_duplicates = settings["detector"].get("duplicate", 1)

    # Create several random states, so it's easier to measure
    # the influence of certain features
    prnds = [np.random.RandomState(seed + i) for i in xrange(10)]

    # Setup unspread bedges settings
    bsettings = settings["edges"].copy()
    radius = bsettings["radius"]
    bsettings["radius"] = 0

    locations0 = xrange(sh[0])
    locations1 = xrange(sh[1])

    padded_theta = descriptor.unspread_parts_padded

    # pad = 10
    pad = 5
    X_pad_size = (9 + pad * 2,) * 2
    # X_pad_size = padded_theta.shape[1:3]

    neg_filenames = sorted(glob.glob(os.path.join(os.environ["UIUC_DIR"], "TrainImages", "neg-*.pgm")))

    gen_raw = generate_random_patches(neg_filenames, X_pad_size, seed)
    # Pre-generate a bunch of background patches and loop them.
    bkgs = [gen_raw.next() for i in xrange(2000)]

    def new_gen():
        i = 0
        while True:
            yield bkgs[i]
            i += 1
            if i == 2000:
                i = 0

    gen = new_gen()

    for seed, fn in enumerate(files):
        ag.info("Processing file", fn)

        # Which mixture component does this image belong to?
        # TODO: Temporary until multicomp
        mixcomp = 0  # np.argmax(detector.affinities

        # Binarize support and Extract alpha
        color_img, alpha = gv.img.load_image_binarized_alpha(fn)
        img = gv.img.asgray(color_img)

        img_pad = ag.util.zeropad(img, pad)

        alpha_pad = ag.util.zeropad(alpha, pad)
        inv_alpha_pad_expanded = np.expand_dims(~alpha_pad, -1)

        # Iterate every duplicate

        # ag.info("Iteration {0}/{1}".format(loop+1, num_duplicates))
        # ag.info("Iteration")
        for i, j in product(locations0, locations1):
            selection = [slice(i, i + X_pad_size[0]), slice(j, j + X_pad_size[1])]
            # X_pad = edges_pad[selection].copy()
            patch = img_pad[selection]
            alpha_patch = alpha_pad[selection]

            # ag.info("Position {0} {1}".format(i, j))
            for loop in xrange(num_duplicates):
                bkgmap = gen.next()

                # Composite
                img_with_bkg = composite(patch, bkgmap, alpha_patch)

                # Retrieve unspread edges (with a given background gray level)
                edges_pad = ag.features.bedges(img_with_bkg, **bsettings)

                # Pad the edges
                # edges_pad = ag.util.zeropad(edges, (pad, pad, 0))

                # Do spreading
                X_pad_spread = ag.features.bspread(edges_pad, spread=bsettings["spread"], radius=radius)

                # De-pad
                padding = pad - 2
                X_spread = X_pad_spread[padding:-padding, padding:-padding]

                # Code parts
                parts = descriptor.extract_parts(X_spread.astype(np.uint8))

                # Accumulate and return
                counts[mixcomp, i, j] += parts[0, 0]

    """
    if 0:
        from multiprocessing import Pool
        p = Pool(7)
        mapf = p.map
    else:
        mapf = map
    def _process_file(fn): 
        return _process_file_full(fn, sh, descriptor, detector)

    # Iterate images
    all_counts = mapf(_process_file, files)

    for counti in all_counts:
        counts += counti
    """

    # Divide accmulate to get new distribution
    counts /= num_files * num_duplicates

    # Create a new model, with this distribution
    new_detector = detector.copy()

    new_detector.kernel_templates = counts
    new_detector.support = None
    new_detector.use_alpha = False

    # Return model
    return new_detector
#parser = argparse.ArgumentParser(description='Train mixture model on edge data')
#parser.add_argument('patches', metavar='<patches file>', type=argparse.FileType('rb'), help='Filename of patches file')
#parser.add_argument('model', metavar='<output model file>', type=argparse.FileType('wb'), help='Filename of the output models file')
#parser.add_argument('mixtures', metavar='<number mixtures>', type=int, help='Number of mixture components')
#parser.add_argument('--use-voc', action='store_true', help="Use VOC data to train model")

import gv
import glob
import os
import os.path
import amitgroup as ag

ag.set_verbose(True)

#descriptor = gv.load_descriptor(gv.BinaryDetector.DESCRIPTOR, sett)
descriptor = gv.load_descriptor(sett)
detector = gv.BernoulliDetector(dsettings['num_mixtures'], descriptor, dsettings)

if dsettings['use_voc']:
    files = gv.voc.load_object_images_of_size(sett['voc'], 'bicycle', dsettings['image_size'], dataset='train')
else:
    base_path = ''
    if 'base_path' in dsettings:
        base_path = os.environ[dsettings['base_path']]
    path = os.path.join(base_path, dsettings['train_dir'])
    files = sorted(glob.glob(path))
    # TEMP!
    from random import shuffle
    shuffle(files)

limit = dsettings.get('train_limit')
settings = load_settings(settings_file)

#import matplotlib
#matplotlib.use('Agg')
import glob
import sys
import os
import gv
import numpy as np
import amitgroup as ag
import matplotlib.pylab as plt

from superimpose_experiment import *
from operator import itemgetter

descriptor = gv.load_descriptor(settings)

def get_edges(settings, config):
    offset = settings['detector'].get('train_offset', 0)
    limit = settings['detector'].get('train_limit')
    if limit is not None:
        limit += offset
    files = sorted(glob.glob(settings['detector']['train_dir']))[offset:limit] * settings['detector'].get('duplicate', 1)
    alpha_and_images = map(load_and_crop, files)
    if alpha_and_images[0][0] is None:
        alpha = None
        all_alphas = None
    else:
        all_alphas = np.asarray(map(itemgetter(0), alpha_and_images))
        #all_alphas = np.asarray(map(lambda x: x[0], alpha_and_images))
        alpha = all_alphas[:,7-4:8+4,7-4:8+4].mean(axis=0)
Exemple #19
0
def background_adjust_model(settings, bkg, seed=0):
    offset = settings['detector'].get('train_offset', 0)
    limit = settings['detector'].get('train_limit')
    files = sorted(glob.glob(settings['detector']['train_dir']))[
        offset:limit]  # * settings['detector'].get('duplicate', 1)

    try:
        detector = gv.Detector.load(settings['detector']['file'])
    except KeyError:
        raise Exception("Need to train the model first")

    # We need the descriptor to generate and manipulate images
    descriptor = gv.load_descriptor(settings)

    sh = (28, 88)

    # Create accumulates for each mixture component
    # TODO: Temporary until multicomp
    #counts = np.zeros_like(detector.kernel_templates)
    counts = np.zeros((1, sh[0], sh[1], descriptor.num_parts))

    num_files = len(files)
    num_duplicates = settings['detector'].get('duplicate', 1)

    # Create several random states, so it's easier to measure
    # the influence of certain features
    prnds = [np.random.RandomState(seed + i) for i in xrange(10)]

    # Setup unspread bedges settings
    bsettings = settings['edges'].copy()
    radius = bsettings['radius']
    bsettings['radius'] = 0

    locations0 = xrange(sh[0])
    locations1 = xrange(sh[1])

    padded_theta = descriptor.unspread_parts_padded

    #pad = 10
    pad = 5
    X_pad_size = (9 + pad * 2, ) * 2
    #X_pad_size = padded_theta.shape[1:3]

    neg_filenames = sorted(
        glob.glob(
            os.path.join(os.environ['UIUC_DIR'], 'TrainImages', 'neg-*.pgm')))

    gen_raw = generate_random_patches(neg_filenames, X_pad_size, seed)
    # Pre-generate a bunch of background patches and loop them.
    bkgs = [gen_raw.next() for i in xrange(2000)]

    def new_gen():
        i = 0
        while True:
            yield bkgs[i]
            i += 1
            if i == 2000:
                i = 0

    gen = new_gen()

    for seed, fn in enumerate(files):
        ag.info("Processing file", fn)

        # Which mixture component does this image belong to?
        # TODO: Temporary until multicomp
        mixcomp = 0  #np.argmax(detector.affinities

        # Binarize support and Extract alpha
        color_img, alpha = gv.img.load_image_binarized_alpha(fn)
        img = gv.img.asgray(color_img)

        img_pad = ag.util.zeropad(img, pad)

        alpha_pad = ag.util.zeropad(alpha, pad)
        inv_alpha_pad_expanded = np.expand_dims(~alpha_pad, -1)

        # Iterate every duplicate

        #ag.info("Iteration {0}/{1}".format(loop+1, num_duplicates))
        #ag.info("Iteration")
        for i, j in product(locations0, locations1):
            selection = [
                slice(i, i + X_pad_size[0]),
                slice(j, j + X_pad_size[1])
            ]
            #X_pad = edges_pad[selection].copy()
            patch = img_pad[selection]
            alpha_patch = alpha_pad[selection]

            #ag.info("Position {0} {1}".format(i, j))
            for loop in xrange(num_duplicates):
                bkgmap = gen.next()

                # Composite
                img_with_bkg = composite(patch, bkgmap, alpha_patch)

                # Retrieve unspread edges (with a given background gray level)
                edges_pad = ag.features.bedges(img_with_bkg, **bsettings)

                # Pad the edges
                #edges_pad = ag.util.zeropad(edges, (pad, pad, 0))

                # Do spreading
                X_pad_spread = ag.features.bspread(edges_pad,
                                                   spread=bsettings['spread'],
                                                   radius=radius)

                # De-pad
                padding = pad - 2
                X_spread = X_pad_spread[padding:-padding, padding:-padding]

                # Code parts
                parts = descriptor.extract_parts(X_spread.astype(np.uint8))

                # Accumulate and return
                counts[mixcomp, i, j] += parts[0, 0]
    """
    if 0:
        from multiprocessing import Pool
        p = Pool(7)
        mapf = p.map
    else:
        mapf = map
    def _process_file(fn): 
        return _process_file_full(fn, sh, descriptor, detector)

    # Iterate images
    all_counts = mapf(_process_file, files)

    for counti in all_counts:
        counts += counti
    """

    # Divide accmulate to get new distribution
    counts /= num_files * num_duplicates

    # Create a new model, with this distribution
    new_detector = detector.copy()

    new_detector.kernel_templates = counts
    new_detector.support = None
    new_detector.use_alpha = False

    # Return model
    return new_detector
def background_adjust_model(settings, bkg, seed=0):
    offset = settings['detector'].get('train_offset', 0)
    limit = settings['detector'].get('train_limit')
    files = sorted(glob.glob(settings['detector']['train_dir']))[offset:limit]# * settings['detector'].get('duplicate', 1)

    try:
        detector = gv.Detector.load(settings['detector']['file'])
    except KeyError:
        raise Exception("Need to train the model first")

    # We need the descriptor to generate and manipulate images
    descriptor = gv.load_descriptor(settings)

    sh = (28, 88)

    # Create accumulates for each mixture component
    # TODO: Temporary until multicomp
    #counts = np.zeros_like(detector.kernel_templates)
    counts = np.zeros((1, sh[0], sh[1], descriptor.num_parts))

    num_files = len(files)
    num_duplicates = settings['detector'].get('duplicate', 1)

    # Create several random states, so it's easier to measure
    # the influence of certain features
    prnds = [np.random.RandomState(seed+i) for i in xrange(10)]

    # Setup unspread bedges settings
    bsettings = settings['edges'].copy()
    radius = bsettings['radius']
    bsettings['radius'] = 0
    padding = radius

    locations0 = xrange(sh[0])
    locations1 = xrange(sh[1])

    padded_theta = descriptor.unspread_parts_padded

    X_pad_size = padded_theta.shape[1:3]

    for fn in files:
        ag.info("Processing file", fn)

        # Which mixture component does this image belong to?
        # TODO: Temporary until multicomp
        mixcomp = 0#np.argmax(detector.affinities

        # Binarize support and Extract alpha
        color_img, alpha = gv.img.load_image_binarized_alpha(fn)
        img = gv.img.asgray(color_img) 

        alpha_pad = ag.util.zeropad(alpha, padding)
        inv_alpha_pad_expanded = np.expand_dims(~alpha_pad, -1)

        # Iterate every duplicate
        for loop in xrange(num_duplicates):
            ag.info("Iteration {0}/{1}".format(loop+1, num_duplicates)) 
            # Superimpose onto gray background
            graymap = create_graymap(img.shape, loop / (num_duplicates - 1), prnds[0])

            # Composite
            img_with_gray = composite(img, graymap, alpha)

            # Retrieve unspread edges (with a given background gray level) 
            edges = ag.features.bedges(img_with_gray, **bsettings)

            # Pad the edges
            edges_pad = ag.util.zeropad(edges, (padding, padding, 0)) 

            for i, j in product(locations0, locations1):
                selection = [slice(i, i+X_pad_size[0]), slice(j, j+X_pad_size[1])]
                X_pad = edges_pad[selection].copy()
                nA_pad = inv_alpha_pad_expanded[selection]

                # Draw background part from categorical distribution
                f_bkg = weighted_choice_unit(bkg, prnds[1])
                probs_bkg = get_probs(padded_theta, f_bkg)
                probs = nA_pad * probs_bkg
            
                # Iterate over all locations
                
                # Draw from background edge probability over ~alpha 
                X_pad |= (prnds[2].rand(*probs.shape) < probs)

                # Do spreading
                X_pad_spread = ag.features.bspread(X_pad, spread=bsettings['spread'], radius=radius)

                # De-pad
                X_spread = X_pad_spread[padding:-padding,padding:-padding]

                # Code parts 
                parts = descriptor.extract_parts(X_spread.astype(np.uint8))

                # Accumulate and return
                counts[mixcomp,i,j] += parts[0,0]

    """
    if 0:
        from multiprocessing import Pool
        p = Pool(7)
        mapf = p.map
    else:
        mapf = map
    def _process_file(fn): 
        return _process_file_full(fn, sh, descriptor, detector)

    # Iterate images
    all_counts = mapf(_process_file, files)

    for counti in all_counts:
        counts += counti
    """

    # Divide accmulate to get new distribution
    counts /= num_files * num_duplicates
    
    # Create a new model, with this distribution
    new_detector = detector.copy() 

    new_detector.kernel_templates = counts
    new_detector.support = None
    new_detector.use_alpha = False

    # Return model 
    return new_detector