Example #1
0
def testAgainstMunkres(N=100, d=100, round=False):
    # compare Hungarian (munkres) algoritm to LAPJV
    import munkres
    success = 0
    for i in xrange(N):
        A = np.random.randn(d, d) / np.random.randn(d, d)
        if round:
            A = A.round()
        if i % 2 == 0:  # test some branh of the code
            A = A - np.mean(A)
        [rowsol, cost, v, u, costMat] = cy_lapjv(A)
        E = munkres.munkres(A)
        rowsol_munkres = np.nonzero(E)[1]
        cost_munkres = (A[E]).sum()
        acc = np.abs(cost - cost_munkres)
        if acc < 1e-8 and np.all(rowsol_munkres == rowsol):
            success += 1
        else:
            print i
            print 'failed with accurracy', acc
            print 'munkres:', cost_munkres
            print 'LAPJV:', cost
            print rowsol_munkres - rowsol
            print rowsol
            print A
            asd
    print success, 'out of', N
Example #2
0
def test_simple():
    a = np.array([i for i in range(64)], dtype=np.double).reshape((8, 8))
    b = munkres(a)
    truth = np.zeros((8, 8), dtype=np.bool)
    for i in range(8):
        truth[7 - i, i] = True
    np.testing.assert_array_equal(b, truth, 'simple 8x8 case failed, b=%s, truth=%s' % (str(b), str(truth)))
def test_big(k):
    a = np.empty((k,k))
    for i in range(k):
        for j in range(k):
            a[i,j] = (i+1)*(j+1)
    b = munkres(a)
    print k, b
Example #4
0
def test_big(k):
    a = np.empty((k, k))
    for i in range(k):
        for j in range(k):
            a[i, j] = (i + 1) * (j + 1)
    b = munkres(a)
    print k, b
Example #5
0
def testAgainstMunkres(N=100, d=100, round=False):
    # compare Hungarian (munkres) algoritm to LAPJV
    import munkres
    success = 0
    for i in xrange(N):
        A = np.random.randn(d,d) / np.random.randn(d,d)
        if round:
            A = A.round()
        if i % 2 == 0: # test some branh of the code
            A = A - np.mean(A)
        [rowsol, cost, v, u, costMat] = cy_lapjv(A)
        E = munkres.munkres(A)
        rowsol_munkres = np.nonzero(E)[1]
        cost_munkres = (A[E]).sum()
        acc = np.abs(cost - cost_munkres)
        if acc < 1e-8 and np.all(rowsol_munkres==rowsol):
            success += 1
        else:
            print i
            print 'failed with accurracy', acc
            print 'munkres:', cost_munkres
            print 'LAPJV:', cost
            print rowsol_munkres - rowsol
            print rowsol
            print A
    print success, 'out of' , N
Example #6
0
def test_basic():
    a = np.array(map(float, '7 4 3 6 8 5 9 4 4'.split()), dtype=np.double).reshape((3, 3))
    b = munkres(a)
    truth = np.array([[False, False, True],
                      [ True, False, False],
                      [False, True, False]])
    np.testing.assert_array_equal(b, truth, 
                                  'basic 3x3 case failed\na=%s\ntruth=%s\ncost=%s\ntrue_cost=%s' % (str(a), str(truth), str(a[b]), str(a[truth])))
Example #7
0
def perm_to_best_match(x, y):
    out = np.zeros(y.shape)
    for batchIdx in range(x.shape[0]):
        outBatch = np.zeros(y[batchIdx].shape)
        cost = distance.cdist(x[batchIdx], y[batchIdx])
        optimum = munkres(cost)
        for i in range(x.shape[1]):
            outBatch[i] = y[batchIdx, np.where(optimum[i])[0][0]]
        out[batchIdx] = outBatch
    #print(np.square(np.subtract(x, out)).sum())
    return out.astype(np.float32)
Example #8
0
def test_big():
    a = np.empty((100,100))
    for i in range(100):
        for j in range(100):
            a[i,j] = (i+1)*(j+1)
    b = munkres(a)

    truth = np.zeros((100, 100), dtype=np.bool)
    for i in range(100):
        truth[99 - i, i] = True
    np.testing.assert_array_equal(b, truth, 'simple 100x100 case failed, b=%s, truth=%s' % (str(b), str(truth)))
Example #9
0
def cython_link(costs, nl, nr):
    """use the cython munkres algorithm to link. it's faster than scipy linear_sum_assignment by a lot."""
    from munkres import munkres
    # this is the cython munkres program from here
    # https://github.com/jfrelinger/cython-munkres-wrapper
    # on a 1000x1000 matrix it is a 35% speedup
    t0 = time.time()
    mat = munkres(costs)  # do the munkres algorithm in cython
    inds = np.argwhere(mat)  # find all indices of True links
    inds = inds[(inds.T[0] < nl) & (inds.T[1] < nr)]  # remove dummy links
    print(
        '%d links generated from %d possibilities in %d seconds with CYTHON.' %
        (len(inds), min(nl, nr), time.time() - t0))
    return inds.T
Example #10
0
def munkres_safe(e):
    """Wrapper around Hungarian algorithm that allows infinite values"""

    e = e.copy()

    try:
        practicalInfinity = 2 * e[e < np.inf].max() + 1
    except ValueError:
        practicalInfinity = 1

    e[e == np.inf] = practicalInfinity
    e[e == -np.inf] = 0.0

    assignment = munkres(e)
    return np.argwhere(assignment)[:, 1]
Example #11
0
def call_lap(cost, costDie, costBorn):
    costMat = prepare_costmat(cost, costDie, costBorn)
    t = munkres(costMat)
    topleft = t[0:cost.shape[0], 0:cost.shape[1]]
    return topleft
def ransac_compute_rigid_transform(Dm, pts1, pts2, confidence_thresh=.01, ransac_iters=20, sample_size=5,
                                  matching_iter=10, n_neighbors=10, verbose=False):

#     q = time.time()
    
    high_confidence_thresh = np.sort(Dm.flat)[int(confidence_thresh * np.size(Dm))]
#     print 'high_confidence_thresh', high_confidence_thresh
    
    N1 = len(pts1)
    N2 = len(pts2)
    
    rs, cs = np.where(Dm < high_confidence_thresh)
    high_confidence_pairs = np.c_[rs,cs]
    
    if len(high_confidence_pairs) == 0:
        return None, [], None, np.inf
    
    if verbose:
        print 'high_confidence_pairs', high_confidence_pairs
    
#     from itertools import combinations
#     possible_samples = list(combinations(high_confidence_pairs, sample_size))
#     random.shuffle(possible_samples)
    
#     n_possible_samples = len([t for t in combinations(high_confidence_pairs, sample_size) 
#                         if allunique([tt[0] for tt in t]) and allunique([tt[1] for tt in t])])
#     print 'n_possible_samples', len(possible_samples)
#     random.shuffle(possible_samples)

#     print 'comb', time.time() - q

#     return
    
    p1s = np.sort(list(set(rs)))
    p2s = np.sort(list(set(cs)))
    n1 = len(p1s)
    n2 = len(p2s)
    
    if n1 < sample_size or n2 < sample_size:
        return None, [], None, np.inf
    
    offsets = []
    scores = []
    matches_list = []
    samples_list = []
    
    sample_counter = 0
    n_possible_samples = int(comb(len(high_confidence_pairs), sample_size, exact=False))

    if verbose:
        sys.stderr.write('n_possible_samples = %d\n' % n_possible_samples)
    
#     n_possible_samples = len(possible_samples)
    for ri in range(min(ransac_iters, n_possible_samples)):
#         sys.stderr.write('ri = %d\n' % ri)

        samples = []
        
        for tt in range(100):
#             sys.stderr.write('tt = %d\n' % tt)

#             s = possible_samples[sample_counter]
            s = random.sample(high_confidence_pairs, sample_size)
            sample_counter += 1
            w1, w2 = zip(*s)
            if len(set(w1)) == len(w1) and len(set(w2)) == len(w2):
                samples = s
                break
                
        if len(samples) == 0:
            continue
            
#         samples = np.array(possible_samples[ri])

        if verbose:
            sys.stderr.write('samples = %d\n' % ri)
#             print '\nsamples', ri, samples
        
        X = pts1[[s[0] for s in samples]]
        Y = pts2[[s[1] for s in samples]]
                
        # generate transform hypothesis
        T, angle = rigid_transform_from_pairs(X, Y)
        if np.abs(angle) > np.pi/4:
            if verbose:
                print 'angle too wide', np.rad2deg(angle)
            continue
        
        # apply transform hypothesis
        pts1_trans = rigid_transform_to(pts1, T)
        
        # iterative closest point association
        matches = None
        matches_prev = None
        
        for mi in range(matching_iter):
  
            # given transform, find matching

#             t1 = time.time()
        
#             b = time.time()
    
            Dh = cdist(pts1_trans, pts2, metric='euclidean')
            Dargmin1 = Dh.argsort(axis=1)
            Dargmin0 = Dh.argsort(axis=0)
#             print 'cdist', time.time() - b
        
#             b = time.time()
            
            D2 = Dh.copy()
            D2[np.arange(N1)[:,np.newaxis], Dargmin1[:,n_neighbors:]] = 999
            D2[Dargmin0[n_neighbors:,:], np.arange(N2)[np.newaxis,:]] = 999
            D_hc_pairs = D2[p1s[:,np.newaxis], p2s]
                
#             D_hc_pairs = 9999 * np.ones((n1, n2))
#             for i,j in high_confidence_pairs:
#                 if j in Dargmin1[i,:10] and i in Dargmin0[:10,j]:
#                     ii = p1s.index(i)
#                     jj = p2s.index(j)
#                     D_hc_pairs[ii, jj] = Dh[i,j]

#             print 'D_hc_pairs', time.time() - b

            if matches is not None:
                matches_prev = matches
        
#             b = time.time()
            matches_hc_pairs = np.array(zip(*np.nonzero(munkres(D_hc_pairs))))
#             print 'munkres', time.time() - b, mi
            
#             b = time.time()

#                 print [(p1s[ii], p2s[jj]) for (ii,jj) in matches_hc_pairs]
            matches = np.array([(p1s[ii], p2s[jj]) for (ii,jj) in matches_hc_pairs
                                if D_hc_pairs[ii, jj] != 999])
            # some 9999 edges will be included, the "if" above removes them
#             print 'matches', time.time() - b
        
            expanded_matches = []
            matches1 = set([i for i,j in matches])
            matches2 = set([j for i,j in matches])
            rem1 = set(range(N1)) - matches1
            rem2 = set(range(N2)) - matches2
            add1 = set([])
            add2 = set([])
            for i in rem1:
                for j in rem2:
                    if j in Dargmin1[i,:3] and i in Dargmin0[:3,j] and i not in add1 and j not in add2:
                        add1.add(i)
                        add2.add(j)
                        expanded_matches.append((i,j))

            if len(expanded_matches) > 0 and len(matches) > 0 :
                matches = np.vstack([matches, np.array(expanded_matches)])
    
            if verbose:
#                 print 'considered pairs', w
#                 print 'matches', [(i,j) for i,j in matches
                q1, q2 = np.where(D_hc_pairs < 999)
                w = zip(*[p1s[q1], p2s[q2]])
                print 'matches', len(matches), '/', 'considered pairs', len(w), '/', 'all hc pairs', len(high_confidence_pairs)

#             t2 = time.time()
            
            if len(matches) < 3:
                s = np.inf
                break
            else:
                xs1 = pts1_trans[matches[:,0], 0]
                x_coverage1 = float(xs1.max() - xs1.min()) / (pts1_trans[:,0].max() - pts1_trans[:,0].min())
                ys1 = pts1_trans[matches[:,0], 1]
                y_coverage1 = float(ys1.max() - ys1.min()) / (pts1_trans[:,1].max() - pts1_trans[:,1].min())
                
                xs2 = pts2[matches[:,1], 0]
                x_coverage2 = float(xs2.max() - xs2.min())/ (pts2[:,0].max() - pts2[:,0].min())
                ys2 = pts2[matches[:,1], 1]
                y_coverage2 = float(ys2.max() - ys2.min())/ (pts2[:,1].max() - pts2[:,1].min())
                
                coverage = .5 * x_coverage1 * y_coverage1 + .5 * x_coverage2 * y_coverage2
                
                s = Dh[matches[:,0], matches[:,1]].mean() / coverage**2    
#             s = .5 * Dm[Dh.argmin(axis=0), np.arange(len(pts2))].mean() + .5 * Dm[np.arange(len(pts1)), Dh.argmin(axis=1)].mean()            
#             s = np.mean([np.mean(Dh.min(axis=0)), np.mean(Dh.min(axis=1))])
    
            X = pts1[matches[:,0]]
            Y = pts2[matches[:,1]]

            T, angle = rigid_transform_from_pairs(X, Y)
            if np.abs(angle) > np.pi/4:
                break

            pts1_trans = rigid_transform_to(pts1, T)
            
            if matches_prev is not None and all([(i,j) in matches_prev for i,j in matches]):
                break
                            
        samples_list.append(samples)
        offsets.append(T)
        matches_list.append(matches)
        scores.append(s)
    
        if verbose:
            print matches
            print s
            plot_two_pointsets(pts1_trans[:,::-1]*np.array([1,-1]), pts2[:,::-1]*np.array([1,-1]), 
                       center1=False, center2=False,
                       text=True, matchings=matches)
            
    if len(scores) > 0:
        best_i = np.argmin(scores)

        best_score = scores[best_i]
        best_T = offsets[best_i]
        best_sample = samples_list[best_i]
        best_matches = matches_list[best_i]    
    
        return best_T, best_matches, best_sample, best_score
    else:
        return None, [], None, np.inf
Example #13
0
    def evaluateFrame(self, frame):
        """Update statistics by evaluating a new frame."""

        timestamp = frame["timestamp"]
        # print("frame:",timestamp)
        groundtruths = frame["annotations"]
        hypotheses = self.get_hypotheses_frame(timestamp)["hypotheses"]

        visualDebugAnnotations = []

        # Save occuring ground truth ids
        for g in groundtruths:
            self.groundtruth_ids_.add(g["id"])

        # Save occuring hypothesis ids
        for h in hypotheses:
            self.hypothesis_ids_.add(h["id"])
        self.LOG = LOG
        LOG.info("")
        LOG.info("Timestamp: %s" % timestamp)
        
        LOG.info("DIFF")
        LOG.info("DIFF Time %.2f" % timestamp)
        
        logstr = ["DIFF Mappings:"]
        for gt_id in sorted(self.mappings_.keys()):
            logstr.append("%s-%s" % (gt_id, self.mappings_[gt_id]))
        LOG.info(" ".join(logstr))

        # No need to evaluate this frame.
        if len(groundtruths) == 0 and len(hypotheses) == 0:
            LOG.info("No gt and hypos for this frame.")
            return

        LOG.info("GTs:")
        for groundtruth in groundtruths:
            LOG.info(Rect(groundtruth))

        LOG.info("Hypos:")
        for hypothesis in hypotheses:
            LOG.info(Rect(hypothesis))
            

        # PAPER STEP 1
        # Valid mappings skip Munkres algorithm, if both ground truth and hypo are found in this frame
        # We call these pairs correspondences and fill the list each frame.
        correspondences = {} # truth id -> hypothesis id
        
        listofprints = []
        LOG.info("")
        LOG.info("STEP 1: KEEP CORRESPONDENCE")
#            print "DIFF Keep correspondence"
            
        for gt_id in self.mappings_.keys():
            
            groundtruth = list(filter(lambda g: g["id"] == gt_id, groundtruths)) # Get ground truths with given ground truth id in current frame
            if len(groundtruth) > 1:
                LOG.warning("found %d > 1 ground truth tracks for id %s", len(groundtruth), gt_id)
            elif len(groundtruth) < 1:                
                continue
            
            hypothesis = list(filter(lambda h: h["id"] == self.mappings_[gt_id], hypotheses)) # Get hypothesis with hypothesis id according to mapping
            assert len(hypothesis) <= 1
            if len(hypothesis) != 1:               
                continue
            
            # Hypothesis found for known mapping
            # Check hypothesis for overlap
            # overlap = Rect(groundtruth[0]).overlap(Rect(hypothesis[0])) <- original
            overlap = Rect(groundtruth[0]).euclidDist(Rect(hypothesis[0])) # <- added by kg

            # NOTE: changed >= to <= to accomodate overlap to euclidDist change
            if overlap <= self.overlap_threshold_:
                LOG.info("Keeping correspondence between %s and %s" % (groundtruth[0]["id"], hypothesis[0]["id"]))
#                    print "DIFF Keep corr %s %s %.2f" % (groundtruth[0]["id"], hypothesis[0]["id"], Rect(groundtruth[0]).overlap(Rect(hypothesis[0])))
                # listofprints.append("DIFF Keep corr %s %s %.2f" % (groundtruth[0]["id"], hypothesis[0]["id"], Rect(groundtruth[0]).overlap(Rect(hypothesis[0])))) <- original
                listofprints.append("DIFF Keep corr %s %s %.2f" % (groundtruth[0]["id"], hypothesis[0]["id"], Rect(groundtruth[0]).euclidDist(Rect(hypothesis[0])))) # <- added by kg
                correspondences[gt_id] = hypothesis[0]["id"]
                
                self.total_overlap_ += overlap                

        
        for p in sorted(listofprints):
            LOG.info(p)

        # PAPER STEP 2
        LOG.info("")
        LOG.info("STEP 2: FIND CORRESPONDENCE")
        
        # Fill hungarian matrix with +inf
        munkres_matrix = [ [ self.munkres_inf_ for i in range(len(hypotheses)) ] for j in range(len(groundtruths)) ] # TODO make square matrix

        # Find correspondences
        for i in range(len(groundtruths)):
            groundtruth = groundtruths[i]
            
            # Skip groundtruth with correspondence from mapping
            if groundtruth["id"] in correspondences.keys():
                LOG.info("Groundtruth %s already in correspondence" % groundtruth["id"])
                continue
            
            # Fill hungarian matrix with distance between gts and hypos
            for j in range(len(hypotheses)):
                hypothesis = hypotheses[j]
                
                # Skip hypotheses with correspondence from mapping
                if hypothesis["id"] in correspondences.values():
                    LOG.info("Hypothesis %s already in correspondence" % hypothesis["id"])
                    continue
                
                rect_groundtruth = Rect(groundtruth)
                rect_hypothesis = Rect(hypothesis)
                # overlap = rect_groundtruth.overlap(rect_hypothesis) <- original
                overlap = rect_groundtruth.euclidDist(rect_hypothesis) # <- added by kg

                # NOTE: changed >= to <= to accomodate overlap to euclidDist change
                if overlap <= self.overlap_threshold_:
#                        print "Fill Hungarian", rect_groundtruth, rect_hypothesis, overlap

                    # NOTE: changed 1 / overlap to overlap to accomodate overlap to euclidDist change
                    # munkres_matrix[i][j] = 1 / overlap
                    munkres_matrix[i][j] = overlap
                    LOG.info("DIFF candidate %s %s %.2f" % (groundtruth["id"], hypothesis["id"], overlap))
        
        # Do the Munkres
        LOG.debug(munkres_matrix)
        
        # Only run munkres on non-empty matrix
        if len(munkres_matrix) > 0:
            indices = munkres(np.array(munkres_matrix, dtype=np.float64))
            indices = np.nonzero(indices)
            indices = np.array(indices).T
            # print(indices)
            indices = [tuple(i) for i in indices]
            
        else:
            LOG.info("No need to run Hungarian with %d ground truths and %d hypothesis." % (len(groundtruths), len(hypotheses)))
            indices = []
        LOG.info(indices)
        
        correspondencelist = []
        mismatcheslist = []
        
        for gt_index, hypo_index in indices:
            
            # Skip invalid self.mappings_
            # Check for max float distance matches (since Hungarian returns complete mapping)
            if (munkres_matrix[gt_index][hypo_index] == self.munkres_inf_): # NO correspondence <=> overlap >= thresh
                continue
            
            gt_id   = groundtruths[gt_index]["id"]
            hypo_id = hypotheses[hypo_index]["id"]
            
            # Assert no known mappings have been added to hungarian, since keep correspondence should have considered this case.
            if gt_id in self.mappings_:
                assert self.mappings_[gt_id] != hypo_id 
            
            
            # Add to correspondences
            eps = 0.0000001
            # LOG.info("Correspondence found: %s and %s (overlap: %f)" % (gt_id, hypo_id, 1.0 / munkres_matrix[gt_index][hypo_index]))
#                correspondencelist.append("DIFF correspondence %s %s %.2f" % (gt_id, hypo_id, 1.0 / munkres_matrix[gt_index][hypo_index]))
            correspondencelist.append("DIFF correspondence %s %s" % (gt_id, hypo_id))
            correspondences[gt_id] = hypo_id

            ### KG: Possibly a bug, because this overlap is coming from a previous loop... try to fix with line below
            overlap = Rect(groundtruths[gt_index]).euclidDist(Rect(hypotheses[hypo_index]))
            self.total_overlap_ += overlap


            # Count "recoverable" and "non-recoverable" mismatches
            # "recoverable" mismatches
            if gt_id in self.gt_map_ and self.gt_map_[gt_id] != hypo_id and not groundtruths[gt_index].get("dco",False):
                LOG.info("Look ma! We got a recoverable mismatch over here! (%s-%s) -> (%s-%s)" % (gt_id, self.gt_map_[gt_id], gt_id, hypo_id))
                self.recoverable_mismatches_ += 1

            # "non-recoverable" mismatches
            if hypo_id in self.hypo_map_ and self.hypo_map_[hypo_id] != gt_id:
                # Do not count non-recoverable mismatch, if both old ground truth and current ground truth are DCO.
                old_gt_id = self.hypo_map_[hypo_id]
                old_gt_dco = list(filter(lambda g: g["id"] == old_gt_id and g.get("dco",False), groundtruths))

                assert len(old_gt_dco) <= 1;
                if not (groundtruths[gt_index].get("dco",False) and len(old_gt_dco) == 1):
                    LOG.info("Look ma! We got a non-recoverable mismatch over here! (%s-%s) -> (%s-%s)" % (self.hypo_map_[hypo_id], hypo_id, gt_id, hypo_id))
                    self.non_recoverable_mismatches_ += 1

            # Update yin-yang maps                    
            self.gt_map_[gt_id] = hypo_id
            self.hypo_map_[hypo_id] = gt_id

            # Correspondence contradicts previous mapping. Mark and count as mismatch, if ground truth is not a DCO
            # Iterate over all gt-hypo pairs of mapping, since we have to perform a two way check:
            # Correspondence: A-1
            # Mapping: A-2, B-1
            # We have to detect both forms of conflicts
            # for mapping_gt_id, mapping_hypo_id in self.mappings_.items():
            for mapping_gt_id in list(self.mappings_.keys()):
                mapping_hypo_id = self.mappings_[mapping_gt_id]
                
                # CAVE: Other than in perl script:
                # Do not consider for mismatch, if both old gt and new gt are DCO
                gt_with_mapping_gt_id_dco = list(filter(lambda g: g["id"] == mapping_gt_id and g.get("dco",False), groundtruths))
                if len (gt_with_mapping_gt_id_dco) == 1 and groundtruths[gt_index].get("dco",False):
                    LOG.info("Ground truths %s and %s are DCO. Not considering for mismatch." % (mapping_gt_id, gt_id))
#                    print "DIFF DCO %s" % (gt_id), groundtruths[gt_index]
                    
                else:
                # Look ma, we got a conflict over here!
                # New hypothesis for mapped ground truth found
                    if (mapping_gt_id == gt_id and mapping_hypo_id != hypo_id)\
                    or (mapping_gt_id != gt_id and mapping_hypo_id == hypo_id):
                        LOG.info("Correspondence %s-%s contradicts mapping %s-%s. Counting as mismatch and updating mapping." % (gt_id, hypo_id, mapping_gt_id, mapping_hypo_id))
                        mismatcheslist.append("DIFF Mismatch %s-%s -> %s-%s" % (mapping_gt_id, mapping_hypo_id, gt_id, hypo_id))
                        self.mismatches_ = self.mismatches_ + 1

                        # find groundtruth and hypothesis with given ids
                        g = list(filter(lambda g: g["id"] == gt_id, groundtruths))
                        h = list(filter(lambda h: h["id"] == hypo_id, hypotheses))

                        #assert(len(g) == 1)
                        if len(g) != 1:
                            LOG.warning('more than one gt: %s', str(g))
                        assert(len(h) == 1)

                        g = g[0]
                        h = h[0]

                        g["class"] = "mismatch"
                        h["class"] = "mismatch"

                        visualDebugAnnotations.append(g)
                        visualDebugAnnotations.append(h)

                        # mapping will be updated after loop
                        del self.mappings_[mapping_gt_id]
            
#                print "YIN: %d %d" % (self.recoverable_mismatches_, self.non_recoverable_mismatches_)
#                assert(self.recoverable_mismatches_ + self.non_recoverable_mismatches_ == self.mismatches_)
            if(self.recoverable_mismatches_ + self.non_recoverable_mismatches_ != self.mismatches_):
                LOG.info("Look, mismatches differ: g %d b %d  other %d" % (self.recoverable_mismatches_, self.non_recoverable_mismatches_, self.mismatches_))
                LOG.info(self.gt_map_)
                LOG.info(self.hypo_map_)
        
            # Save (overwrite) mapping even if ground truth is dco
            self.mappings_[gt_id] = hypo_id # Update mapping
        
        # Sorted DIFF output
        for c in sorted(correspondencelist):
            LOG.info(c)
            
        for m in sorted(mismatcheslist):
            LOG.info(m)

        # Visual debug
        for g in groundtruths:
            if g["class"] != "mismatch" and g["id"] in correspondences.keys():
                g["class"] = "correspondence"
                visualDebugAnnotations.append(g)
            
        for h in hypotheses:
            if h["class"] != "mismatch" and h["id"] in correspondences.values():
                h["class"] = "correspondence"
                visualDebugAnnotations.append(h)

        
        # TODO get overlap ratio
        # Print out correspondences
#            for gt_id, hypo_id in correspondences.items():
#                print "Correspondence: %s-%s" % (gt_id, hypo_id)
        
        # PAPER STEP 4
        # Count miss, when groundtruth has no correspondence and is not dco
        for groundtruth in groundtruths:
            LOG.info("DCO:", groundtruth)
            if groundtruth["id"] not in correspondences.keys() and groundtruth.get("dco", False) != True:
                LOG.info("Miss: %s" % groundtruth["id"])
                LOG.info("DEBUGMISS: %.2f" % timestamp)
                LOG.info("DIFF Miss %s" % groundtruth["id"])
                groundtruth["class"] = "miss"
                visualDebugAnnotations.append(groundtruth)
                self.misses_ += 1

        # Count false positives
        for hypothesis in hypotheses:
            if hypothesis["id"] not in correspondences.values():
                LOG.info("False positive: %s" % hypothesis["id"])
                LOG.info("DIFF False positive %s" % hypothesis["id"])
                self.false_positive_ids.add(hypothesis["id"])
                self.false_positives_ += 1
                visualDebugAnnotations.append(hypothesis)
                hypothesis["class"] = "false positive"
        
        self.total_correspondences_ += len(correspondences)
        
        self.total_groundtruths_ += len(groundtruths) # Number of objects (ground truths) in current frame

        visualDebugFrame = {
            "timestamp": timestamp,
            "class": frame["class"],
            "annotations": visualDebugAnnotations
        }
        if "num" in frame:
            visualDebugFrame["num"] = frame["num"]

        self.visualDebugFrames_.append(visualDebugFrame)
Example #14
0
    def process_frame(self, frame_number, observations):

        incoming_n = observations.shape[0]

        if len(self._targets) == 0:

            # No targets, so initialize everything:
            for idx in range(incoming_n):
                self._add_target(observations[idx, :])
            return
        
        elif incoming_n == 0:
            assignment = np.full((len(self._targets), 1), False, dtype=np.bool)
        
        else:
            # We have observations & targets, so matching is required:
            matching_matrix = self._calculate_matching_matrix(observations)
            assignment = munkres(matching_matrix)
            
        # Advance all targets:
        new_target_starts = []

        for tidx, target in enumerate(self._targets):

            my_match = assignment[tidx, :]
            if np.any(my_match):
                data_point = observations[my_match, :].squeeze()
                associated_cost = matching_matrix[tidx, my_match]
                if associated_cost > self.max_distance:
                    new_target_starts.append(data_point)
                    data_point = None
            else:
                data_point = None

            target.advance(data_point)

        # Clean out targets:
        self._targets = [target for target in self._targets if target.is_alive]

        # Create new targets:
        for starting_point in new_target_starts:
            self._add_target(starting_point)
        
        # Document what's happening:
        if self._document:
            for tidx, target in enumerate(self._targets):
                values = {}

                values["frame_number"] = frame_number
                values["target_id"] = target.id
                values["x"] = target.filter.x[0]
                values["y"] = target.filter.x[2]
                values["z"] = target.filter.x[4]
                values["x_velocity"] = target.filter.x[1]
                values["y_velocity"] = target.filter.x[3]
                values["z_velocity"] = target.filter.x[5]
                values["x_variance"] = target.filter.P[0, 0]
                values["y_variance"] = target.filter.P[2, 2]
                values["z_variance"] = target.filter.P[4, 4]
                values["n_missed_observations"] = target.frames_without_observation

                self._storage.writerow(values)
Example #15
0
def munkres_wrapper(cost, cutoff, divertMat, initValues, pk, maxKvsAssigned):
    # condition the cost matrix by removing unavailable rows and columns
    allowed = np.logical_not(np.floor(cost))
    cutoffMat = (np.ones(cost.shape)) * cutoff
    costAdj = np.subtract(cost, cutoffMat)
    ndx = np.where(costAdj <= 0)
    costAdj[ndx] = 0
    ndx = np.where(costAdj >= (1 - cutoff))
    costAdj[ndx] = 1

    # MOA algorithm
    assignTmp = munkres(costAdj)

    # convert from True/False to 1/0 matrix
    assign = np.zeros(assignTmp.shape)
    iRow = 0
    for x in assignTmp:
        iCol = 0
        for y in x:
            if y:
                assign[iRow, iCol] = 1
            iCol = iCol + 1
        iRow = iRow + 1

    # remove any "not allowed" assignments (just in case)
    assign = np.multiply(assign, allowed)

    # update threat values based on assignments
    values = np.zeros(initValues.shape)
    nAssigned = np.zeros(initValues.shape)
    iRow = 0
    for row in assign:
        iCol = 0
        for elem in row:
            if elem == 1:
                values[iCol] = initValues[iCol] * (1.0 - pk)
                nAssigned[iCol] = nAssigned[iCol] + 1
            else:
                values[iCol] = initValues[iCol]
            iCol = iCol + 1
        iRow = iRow + 1
    ndx = np.where(values < cutoff)
    values[ndx] = 0

    # assign unassigned KVs
    iRow = 0
    for row in assign:
        ndx = np.where(row > 0)
        if np.size(ndx) == 0:
            # unassigned KV found, make an assignment if possible
            allowedRow = allowed[iRow]
            ndx2 = np.where(allowedRow == False)
            tmpValues = np.copy(values)
            tmpValues[ndx2] = 0
            maxIndx = np.argmax(tmpValues)  # highest value threat
            assign[iRow, maxIndx] = 1
            nAssigned[maxIndx] = nAssigned[maxIndx] + 1
            if nAssigned[maxIndx] >= maxKvsAssigned:
                values[maxIndx] = 0
        iRow = iRow + 1  # go to next row

    # minimize divert if possible
    nWeapons = assign.shape[0]
    nThreats = assign.shape[1]
    for iWpn in range(0, nWeapons):
        for iThrt in range(0, nThreats):
            for jWpn in range(0, nWeapons):
                for jThrt in range(0, nThreats):
                    if iWpn == jWpn and iThrt == jThrt:
                        continue  # same assignment
                    if assign[iWpn, iThrt] == 1 and assign[jWpn, jThrt] == 1:
                        if divertMat[iWpn, jThrt] < divertMat[
                                jWpn, iThrt] and divertMat[iWpn, jThrt] > 0:
                            # swap assignments to minimize divert
                            assign[iWpn, iThrt] = 0
                            assign[iWpn, jThrt] = 1
                            assign[jWpn, jThrt] = 0
                            assign[jWpn, iThrt] = 1

    # return the assignments
    return assign
Example #16
0
def _solve_global_nearest_neighbour(delta_matrix,
                                    gate_distance=np.Inf,
                                    **kwargs):
    try:
        tic = time.time()
        DEBUG = kwargs.get('debug', False)
        # Copy and gating
        if DEBUG: print("delta matrix\n", delta_matrix)
        cost_matrix = np.copy(delta_matrix)
        cost_matrix[cost_matrix > gate_distance] = np.Inf
        if DEBUG: print("cost_matrix\n", cost_matrix)

        # Pre-processing
        valid_matrix = cost_matrix < np.Inf
        if np.all(valid_matrix == False):
            return []
        if DEBUG: print("Valid matrix\n", valid_matrix.astype(int))

        bigM = np.power(
            10.,
            1.0 + np.ceil(np.log10(1. + np.sum(cost_matrix[valid_matrix]))))
        cost_matrix[np.logical_not(valid_matrix)] = bigM
        if DEBUG: print("Modified cost matrix\n", cost_matrix)

        validCol = np.any(valid_matrix, axis=0)
        validRow = np.any(valid_matrix, axis=1)
        if DEBUG: print("validCol", validCol)
        if DEBUG: print("validRow", validRow)
        nRows = int(np.sum(validRow))
        nCols = int(np.sum(validCol))
        n = max(nRows, nCols)
        if DEBUG: print("nRows, nCols, n", nRows, nCols, n)

        maxv = 10. * np.max(cost_matrix[valid_matrix])
        if DEBUG: print("maxv", maxv)

        rows = np.arange(nRows)
        cols = np.arange(nCols)
        dMat = np.zeros((n, n)) + maxv
        dMat[np.ix_(rows, cols)] = cost_matrix[np.ix_(validRow, validCol)]
        if DEBUG: print("dMat\n", dMat)

        # Assignment
        preliminary_assignment_matrix = munkres(dMat.astype(np.double))
        if DEBUG:
            print("preliminary preliminary_assignment_matrix\n",
                  np.asarray(preliminary_assignment_matrix, dtype=np.int))
        preliminary_assignments = [
            (rowI, np.where(row)[0][0])
            for rowI, row in enumerate(preliminary_assignment_matrix)
        ]
        if DEBUG:
            print("preliminary assignments ", preliminary_assignments)

        # Post-processing
        rowIdx = np.where(validRow)[0]
        colIdx = np.where(validCol)[0]
        assignments = []
        for preliminary_assignment in preliminary_assignments:
            row = preliminary_assignment[0]
            col = preliminary_assignment[1]
            if (row >= nRows) or (col >= nCols):
                continue
            rowI = rowIdx[row]
            colI = colIdx[col]
            if valid_matrix[rowI, colI]:
                assignments.append((rowI, colI))
        assert all(
            [delta_matrix[a[0], a[1]] <= gate_distance for a in assignments])
        if DEBUG:
            print("final assignments", assignments)
        toc = time.time() - tic
        log.debug("_solve_global_nearest_neighbour runtime: {:.1f}ms".format(
            toc * 1000))
        return assignments
    except Exception as e:
        print("#" * 20, "CRASH DEBUG INFO", "#" * 20)
        print("deltaMatrix", delta_matrix.shape, "\n", delta_matrix)
        print("gateDistance", gate_distance)
        print("Valid matrix", valid_matrix.shape, "\n",
              valid_matrix.astype(int))
        print("validCol", validCol.astype(int))
        print("validRow", validRow.astype(int))
        print("dMat", dMat.shape, "\n", dMat)
        print("preliminary assignments", preliminary_assignments)
        print("rowIdx", rowIdx)
        print("colIdx", colIdx)
        print("assignments", assignments)
        print("#" * 20, "CRASH DEBUG INFO", "#" * 20)
        time.sleep(0.1)
        raise e
Example #17
0
def calculate_distance(X, Y):
    if not X and not Y:
        return 0
    cost = distance.cdist(X, Y)
    return cost[munkres(cost)].sum()
Example #18
0
def my_munkres(scores):
    # add small random noise!
    random_noise = cs.MUNKRES_RANDOM_NOISE * np.random.rand(*np.shape(scores))
    return munkres(scores + random_noise)
Example #19
0
	def match(self, pair):
		s1l = len(pair.s1["vector"])
		s2l = len(pair.s2["vector"])
		self.tsim = float('-9999')
		self.lsim = float('-9999')
		self.minlen = min(s1l, s2l)
		self.maxlen = max(s1l, s2l)
		self.nmatches = 0
		self.start = -1
		self.end = -1
		if (self.minlen == 0 or
				self.maxlen >= 100):
			return self.tsim

		# For simplicity in later code, make the shorter one first
		if s1l < s2l:
			self.s1 = pair.s1
			self.s2 = pair.s2
			s1l = len(pair.s1["vector"])
			s2l = len(pair.s2["vector"])
		else:
			self.s1 = pair.s2
			self.s2 = pair.s1

		wc = self.wordcount
		if "wv_idfs" not in self.s1:
			self.s1["wv_idfs"] = [math.log(wc / self.vocab[x], 2) for x in self.s1["wv_tokens"]]
		if "wv_idfs" not in self.s2:
			self.s2["wv_idfs"] = [math.log(wc / self.vocab[x], 2) for x in self.s2["wv_tokens"]]

		if self.ngram > 1:
			ng = self.ngram
			v1 = self.s1["vector"]
			v2 = self.s2["vector"]
			t1 = self.s1["wv_tokens"]
			t2 = self.s2["wv_tokens"]
			#idf1 = self.s1["wv_idfs"]
			#idf2 = self.s2["wv_idfs"]
			weights1 = self.s1["weights"]
			weights2 = self.s2["weights"]
			nv1 = [sum(v1[i:i + ng]) for i in range(max(1, len(v1) - ng + 1))]
			nv2 = [sum(v2[i:i + ng]) for i in range(max(1, len(v2) - ng + 1))]
			nt1 = ["_".join(t1[i:i + ng]) for i in range(max(1, len(t1) - ng + 1))]
			nt2 = ["_".join(t2[i:i + ng]) for i in range(max(1, len(t2) - ng + 1))]
			#nidf1 = [max(idf1[i:i + ng]) for i in range(max(1, len(idf1) - ng + 1))]
			#nidf2 = [max(idf2[i:i + ng]) for i in range(max(1, len(idf2) - ng + 1))]
			nweights1 = [max(weights1[i:i + ng]) for i in range(max(1, len(weights1) - ng + 1))]
			nweights2 = [max(weights2[i:i + ng]) for i in range(max(1, len(weights2) - ng + 1))]
			#self.s1 = {"vector": nv1, "wv_tokens": nt1, "wv_idfs": nidf1}
			#self.s2 = {"vector": nv2, "wv_tokens": nt2, "wv_idfs": nidf2}
			self.s1 = {"vector": nv1, "wv_tokens": nt1, "weights": nweights1}
			self.s2 = {"vector": nv2, "wv_tokens": nt2, "weights": nweights2}

			self.minlen = max(self.minlen - ng + 1, 1)
			self.maxlen = max(self.maxlen - ng + 1, 1)

		self.dists = [1] * self.minlen

		self.pair = pair
		#self.dist = pairdist(self.s1["vector"], self.s2["vector"], fn=self.metric)
		#self.dist = pairdist(self.s1, self.s2, fn=self.metric)
		dist = self.metric(self.s1, self.s2)

		# scale by max of idf
		#for i in range(dist.shape[0]):
		#	for j in range(dist.shape[1]):
		#		dist[i][j] *= max(self.s1["wv_idfs"][i], self.s2["wv_idfs"][j])

		self.matchv = np.zeros(dist.shape, int)
		np.fill_diagonal(self.matchv, 1)
		if np.sum(dist) == 0:
			self.tsim = 1
			self.nmatches = min(dist.shape)
			self.start = 0
			self.end = dist.shape[1] - 1
			return self.tsim
		if (dist == dist[0]).all():
			self.tsim = 1 - sum(dist[0])
			self.nmatches = min(dist.shape)
			self.start = 0
			self.end = dist.shape[1] - 1
			return self.tsim
		if (dist.T == dist[:, 0]).all():
			self.tsim = 1 - sum(dist[:, 0])
			self.nmatches = min(dist.shape)
			self.start = 0
			self.end = dist.shape[1] - 1
			return self.tsim

		signal.signal(signal.SIGALRM, munkres_handler)
		signal.alarm(10)
		try:
			matches = munkres(dist)
		except Exception, e:
			printd(e)
			printd("dist: " + dist.shape)
			printd(dist)
			return self.tsim
Example #20
0
    def sample(self, niter=1000, nburn=0, thin=1, ident=False):
        """
        samples niter + nburn iterations only storing the last niter
        draws thinned as indicated.

        if ident is True the munkres identification algorithm will be
        used matching to the INITIAL VALUES. These should be selected
        with great care. We recommend using the EM algorithm. Also
        .. burning doesn't make much sense in this case.
        """

        self._setup_storage(niter)

        # start threads
#         if self.parallel:
#             for w in self.workers:
#                 w.start()

        if self.gpu:
            self.gpu_workers = init_GPUWorkers(self.data, self.dev_list)

        alpha = self._alpha0
        weights = self._weights0
        mu = self._mu0
        Sigma = self._Sigma0

        if self.verbose:
            if self.gpu:
                print "starting GPU enabled MCMC"
            else:
                print "starting MCMC"

        for i in range(-nburn, niter):

            if i==0 and ident:
                labels, zref = self._update_labels(mu, Sigma, weights, True)
                c0 = np.zeros((self.ncomp, self.ncomp), dtype=np.double)
                for j in xrange(self.ncomp):
                    c0[j,:] = np.sum(zref==j)
                zhat = zref.copy()


            if isinstance(self.verbose, int) and self.verbose and \
                    not isinstance(self.verbose, bool):
                if i % self.verbose == 0:
                    print i

            labels, zhat = self._update_labels(mu, Sigma, weights, ident)
            counts = self._update_mu_Sigma(mu, Sigma, labels)

            stick_weights, weights = self._update_stick_weights(counts, alpha)

            alpha = self._update_alpha(stick_weights)


            ## relabel if needed:
            if i>0 and ident:
                cost = c0.copy()
                try:
                    _get_cost(zref, zhat, cost) #cython!!
                except IndexError:
                    print 'Something stranged happened ... do zref and zhat look correct?'
                    import pdb; pdb.set_trace()

                _, iii = np.where(munkres(cost))
                weights = weights[iii]
                mu = mu[iii]
                Sigma = Sigma[iii]
            if i>=0:
                self.weights[i] = weights
                self.alpha[i] = alpha
                self.mu[i] = mu
                self.Sigma[i] = Sigma

        # clean up threads
#         if self.parallel:
#             for i in xrange(self.num_cores):
#                 self.work_queue[i].put(None)
        if self.gpu:
            kill_GPUWorkers(self.gpu_workers)
Example #21
0
    def sample(self, niter=1000, nburn=0, thin=1, ident=False):
        """
        samples niter + nburn iterations only storing the last niter
        draws thinned as indicated.

        if ident is True the munkres identification algorithm will be
        used matching to the INITIAL VALUES. These should be selected
        with great care. We recommend using the EM algorithm. Also
        .. burning doesn't make much sense in this case.
        """

        self._setup_storage(niter)

        # start threads
        if self.parallel:
            for w in self.workers:
                w.start()

        if self.gpu:
            self.gpu_workers = init_GPUWorkers(self.data, self.dev_list)

        alpha = self._alpha0
        weights = self._weights0
        mu = self._mu0
        Sigma = self._Sigma0

        if self.verbose:
            if self.gpu:
                print "starting GPU enabled MCMC"
            else:
                print "starting MCMC"

        for i in range(-nburn, niter):

            if i == 0 and ident:
                labels, zref = self._update_labels(mu, Sigma, weights, True)
                c0 = np.zeros((self.ncomp, self.ncomp), dtype=np.double)
                for j in xrange(self.ncomp):
                    c0[j, :] = np.sum(zref == j)
                zhat = zref.copy()


            if isinstance(self.verbose, int) and self.verbose and \
                    not isinstance(self.verbose, bool):
                if i % self.verbose == 0:
                    print i

            labels, zhat = self._update_labels(mu, Sigma, weights, ident)
            mu, Sigma, counts = self._update_mu_Sigma(Sigma, labels)

            stick_weights, weights = self._update_stick_weights(counts, alpha)

            alpha = self._update_alpha(stick_weights)

            ## relabel if needed:
            if i > 0 and ident:
                cost = c0.copy()
                try:
                    _get_cost(zref, zhat, cost)  #cython!!
                except IndexError:
                    print 'Something stranged happened ... do zref and zhat look correct?'
                    import pdb
                    pdb.set_trace()
                _, iii = np.where(munkres(cost))
                weights = weights[iii]
                mu = mu[iii]
                Sigma = Sigma[iii]
            if i >= 0:
                self.weights[i] = weights
                self.alpha[i] = alpha
                self.mu[i] = mu
                self.Sigma[i] = Sigma

        # clean up threads
        if self.parallel:
            for i in xrange(self.num_cores):
                self.work_queue[i].put(None)
        if self.gpu:
            kill_GPUWorkers(self.gpu_workers)
Example #22
0
    def sample(self, niter=1000, nburn=100, thin=1, tune_interval=100, ident=False):
        """
        Performs MCMC sampling of the posterior. \beta must be sampled
        using Metropolis Hastings and its proposal distribution will
        be tuned every tune_interval iterations during the burnin
        period. It is suggested that an ample burnin is used and the
        AR parameters stores the acceptance rate for the stick weights
        of \beta and \alpha_0.
        """
        if self.verbose:
            if self.gpu:
                print "starting GPU enabled MCMC"
            else:
                print "starting MCMC"
        # multiGPU init
        if self.gpu:
            self.gpu_workers = init_GPUWorkers(self.data, self.dev_list)

        self._ident = ident
        self._setup_storage(niter, thin)
        self._tune_interval = tune_interval

        alpha = self._alpha0
        alpha0 = self._alpha00
        weights = self._weights0
        beta = self._beta0
        stick_beta = self._stick_beta0
        mu = self._mu0
        Sigma = self._Sigma0

        for i in range(-nburn, niter):
            if isinstance(self.verbose, int) and self.verbose and \
                    not isinstance(self.verbose, bool):
                if i % self.verbose == 0:
                    print i
            # update labels
            labels, zhat = self._update_labels(mu, Sigma, weights)

            # Get initial reference if needed
            if i == 0 and ident:
                zref = []
                for ii in xrange(self.ngroups):
                    zref.append(zhat[ii].copy())
                c0 = np.zeros((self.ncomp, self.ncomp), dtype=np.double)
                for j in xrange(self.ncomp):
                    for ii in xrange(self.ngroups):
                        c0[j, :] += np.sum(zref[ii] == j)

            # update mu and sigma
            counts = self._update_mu_Sigma(mu, Sigma, labels, self.alldata)

            # update weights, masks
            stick_weights, weights = self._update_stick_weights(counts, beta, alpha0)
            stick_beta, beta = sampler.sample_beta(
                stick_beta, beta, stick_weights, alpha0,
                alpha, self.AR, self.prop_scale, self.parallel
            )
            # hyper parameters
            alpha = self._update_alpha(stick_beta)
            alpha0 = sampler.sample_alpha0(stick_weights, beta, alpha0,
                                           self.e0, self.f0,
                                           self.prop_scale, self.AR)

            # Relabel
            if i > 0 and ident:
                cost = c0.copy()
                for Z, Zr in zip(zhat, zref):
                    _get_cost(Zr, Z, cost)
                _, iii = np.where(munkres(cost))
                beta = beta[iii]
                weights = weights[:, iii]
                mu = mu[iii]
                Sigma = Sigma[iii]
            # save
            if i >= 0:
                self.beta[i] = beta
                self.weights[i] = weights
                self.alpha[i] = alpha
                self.alpha0[i] = alpha0
                self.mu[i] = mu
                self.Sigma[i] = Sigma
            elif (nburn+i+1) % self._tune_interval == 0:
                self._tune()
        self.stick_beta = stick_beta.copy()
        if self.gpu:
            kill_GPUWorkers(self.gpu_workers)
    def score(self, baselines, scorenames=["rr", "rp"]):
        """
        Scores two sets of modules

        """
        scores = {}

        # recovery and relevance
        if "rr" in scorenames:
            if (self.membershipsA.shape[1] == 0) or (self.membershipsB.shape[1]
                                                     == 0):
                scores["recoveries"] = scores["relevances"] = np.zeros(1)
            else:
                scores["recoveries"] = self.jaccards.max(1)
                scores["relevances"] = self.jaccards.max(0)
            scores["recovery"] = scores["recoveries"].mean()
            scores["relevance"] = scores["relevances"].mean()
            scores["F1rr"] = harmonic_mean(
                [scores["recovery"], scores["relevance"]])

        # recall and precision
        if "rp" in scorenames:
            if (self.membershipsA.shape[1] == 0) or (self.membershipsB.shape[1]
                                                     == 0):
                scores["recalls"] = scores["precisions"] = np.zeros(1)
            else:
                scores["recalls"], scores["precisions"] = ebcubed.cal_ebcubed(
                    self.membershipsA.as_matrix(),
                    self.membershipsB.as_matrix(),
                    self.jaccards.T.astype(np.float64))
            scores["recall"] = scores["recalls"].mean()
            scores["precision"] = scores["precisions"].mean()
            scores["F1rp"] = harmonic_mean(
                [scores["recall"], scores["precision"]])

        # consensus score, uses the python munkres package
        if "consensus" in scorenames:
            if (self.membershipsA.shape[1] == 0) or (self.membershipsB.shape[1]
                                                     == 0):
                scores["consensus"] = 0
            else:
                cost_matrix = np.array(1 - self.jaccards,
                                       dtype=np.double).copy()
                indexes = munkres(cost_matrix)
                consensus = (1 - cost_matrix[indexes]).sum() / max(
                    self.jaccards.shape)

        if ("rr" in scorenames) and ("rp" in scorenames):
            scores["F1rprr"] = harmonic_mean([
                scores["recall"], scores["precision"], scores["recovery"],
                scores["relevance"]
            ])

        # compare with baseline
        if baselines is not None:
            for baseline_name, baseline in baselines.items():
                if "rr" in scorenames:
                    scores["F1rr_" + baseline_name] = harmonic_mean([
                        (scores[scorename] / baseline[scorename])
                        for scorename in ["recovery", "relevance"]
                    ])
                if "rp" in scorenames:
                    scores["F1rp_" + baseline_name] = harmonic_mean([
                        (scores[scorename] / baseline[scorename])
                        for scorename in ["recall", "precision"]
                    ])
                if ("rr" in scorenames) and ("rp" in scorenames):
                    scores["F1rprr_" + baseline_name] = harmonic_mean([
                        (scores[scorename] / baseline[scorename]) for scorename
                        in ["recovery", "relevance", "recall", "precision"]
                    ])
                if "consensus" in scorenames:
                    scores["consensus" + baseline_name] = harmonic_mean([
                        (scores[scorename] / baseline[scorename])
                        for scorename in ["consensus"]
                    ])

        # alternative scores (for non-overlapping and exhaustive clustering)
        if "fmeasure_wiwie" in scorenames:
            scores["fmeasure_wiwie"] = fmeasure_wiwie(self.modulesA,
                                                      self.modulesB)
        if "fmeasure_flowcap" in scorenames:
            scores["fmeasure_flowcap"] = fmeasure_flowcap(
                self.modulesA, self.modulesB)
        if "vmeasure_wiwie" in scorenames:
            scores["vmeasure_wiwie"] = vmeasure_wiwie(self.modulesA,
                                                      self.modulesB)

        return scores
Example #24
0

if __name__ == '__main__':
    cluster1 = stats.DPCluster(.5, np.array([0, 0]), np.eye(2))
    cluster2 = stats.DPCluster(.5, np.array([0, 4]), np.eye(2))
    cluster3 = stats.DPCluster(.25, np.array([0, 0]), np.eye(2))
    cluster4 = stats.DPCluster(.25, np.array([4, 0]), np.eye(2))
    cluster5 = stats.DPCluster(.5, np.array([0, 4]), np.eye(2))
    A = stats.DPMixture([cluster1, cluster2])
    B = stats.DPMixture([cluster3, cluster4, cluster5])
    from munkres import munkres
    print 'Ref has means', A.mus, 'with weights', A.pis
    print 'Test has means', B.mus, 'with weights', B.pis
    print 'mean distance'
    print mean_distance(A, B)
    print munkres(mean_distance(A, B))
    mA = A.make_modal()
    mB = B.make_modal()
    print 'modal distance'
    print mean_distance(mA, mB)
    print munkres(mean_distance(mA, mB))
    print 'modal using means'
    print mean_distance(mA, mB, use_means=True)
    print munkres(mean_distance(mA, mB, use_means=True))

    print 'classification'
    print classification_distance(A, B)
    print munkres(classification_distance(A, B))
    print 'modal classification'
    print classification_distance(mA, mB)
    print munkres(classification_distance(mA, mB))
Example #25
0
    def sample(self,
               niter=1000,
               nburn=100,
               thin=1,
               tune_interval=100,
               ident=False):
        """
        Performs MCMC sampling of the posterior. \beta must be sampled
        using Metropolis Hastings and its proposal distribution will
        be tuned every tune_interval iterations during the burnin
        period. It is suggested that an ample burnin is used and the
        AR parameters stores the acceptance rate for the stick weights
        of \beta and \alpha_0.
        """
        if self.verbose:
            if self.gpu:
                print "starting GPU enabled MCMC"
            else:
                print "starting MCMC"
        # multiGPU init
        if self.gpu:
            self.gpu_workers = init_GPUWorkers(self.data, self.dev_list)

        if self.parallel:
            for w in self.workers:
                w.start()

        self._ident = ident
        self._setup_storage(niter, thin)
        self._tune_interval = tune_interval

        alpha = self._alpha0
        alpha0 = self._alpha00
        weights = self._weights0
        beta = self._beta0
        stick_beta = self._stick_beta0
        mu = self._mu0
        Sigma = self._Sigma0

        for i in range(-nburn, niter):
            if isinstance(self.verbose, int) and self.verbose and \
                    not isinstance(self.verbose, bool):
                if i % self.verbose == 0:
                    print i
            ## update labels
            labels, zhat = self._update_labels(mu, Sigma, weights)
            ## Get initial reference if needed
            if i == 0 and ident:
                zref = []
                for ii in xrange(self.ngroups):
                    zref.append(zhat[ii].copy())
                c0 = np.zeros((self.ncomp, self.ncomp), dtype=np.double)
                for j in xrange(self.ncomp):
                    for ii in xrange(self.ngroups):
                        c0[j, :] += np.sum(zref[ii] == j)

            ## update mu and sigma
            mu, Sigma, counts = self._update_mu_Sigma(Sigma, labels,
                                                      self.alldata)

            ## update weights, masks
            stick_weights, weights = self._update_stick_weights(
                counts, beta, alpha0)
            stick_beta, beta = self._update_beta(stick_beta, beta,
                                                 stick_weights, alpha0, alpha)
            ## hyper parameters
            alpha = self._update_alpha(stick_beta)
            alpha0 = self._update_alpha0(stick_weights, beta, alpha0)

            ## Relabel
            if i > 0 and ident:
                cost = c0.copy()
                for Z, Zr in zip(zhat, zref):
                    _get_cost(Zr, Z, cost)
                _, iii = np.where(munkres(cost))
                beta = beta[iii]
                weights = weights[:, iii]
                mu = mu[iii]
                Sigma = Sigma[iii]
            ## save
            if i >= 0:
                self.beta[i] = beta
                self.weights[i] = weights
                self.alpha[i] = alpha
                self.alpha0[i] = alpha0
                self.mu[i] = mu
                self.Sigma[i] = Sigma
            elif (nburn + i + 1) % self._tune_interval == 0:
                self._tune()
        self.stick_beta = stick_beta.copy()
        if self.gpu:
            kill_GPUWorkers(self.gpu_workers)
        if self.parallel:
            for ii in range(len(self.workers)):
                self.work_queue[ii].put(None)
def ransac_compute_rigid_transform(Dm,
                                   pts1,
                                   pts2,
                                   confidence_thresh=.01,
                                   ransac_iters=20,
                                   sample_size=5,
                                   matching_iter=10,
                                   n_neighbors=10):

    #     q = time.time()

    high_confidence_thresh = np.sort(Dm.flat)[int(confidence_thresh *
                                                  np.size(Dm))]
    #     print 'high_confidence_thresh', high_confidence_thresh

    N1 = len(pts1)
    N2 = len(pts2)

    rs, cs = np.where(Dm < high_confidence_thresh)
    high_confidence_pairs = np.c_[rs, cs]

    if len(high_confidence_pairs) == 0:
        return None, [], None, np.inf

    if OUTPUT:
        print 'high_confidence_pairs', high_confidence_pairs

#     from itertools import combinations
#     possible_samples = list(combinations(high_confidence_pairs, sample_size))
#     random.shuffle(possible_samples)

#     n_possible_samples = len([t for t in combinations(high_confidence_pairs, sample_size)
#                         if allunique([tt[0] for tt in t]) and allunique([tt[1] for tt in t])])
#     print 'n_possible_samples', len(possible_samples)
#     random.shuffle(possible_samples)

#     print 'comb', time.time() - q

#     return

    p1s = np.sort(list(set(rs)))
    p2s = np.sort(list(set(cs)))
    n1 = len(p1s)
    n2 = len(p2s)

    if n1 < sample_size or n2 < sample_size:
        return None, [], None, np.inf

    offsets = []
    scores = []
    matches_list = []
    samples_list = []

    sample_counter = 0
    n_possible_samples = int(
        comb(len(high_confidence_pairs), sample_size, exact=False))

    #     n_possible_samples = len(possible_samples)
    for ri in range(min(ransac_iters, n_possible_samples)):

        samples = []

        for tt in range(100):
            #             s = possible_samples[sample_counter]
            s = random.sample(high_confidence_pairs, sample_size)
            sample_counter += 1
            w1, w2 = zip(*s)
            if len(set(w1)) == len(w1) and len(set(w2)) == len(w2):
                samples = s
                break

#         samples = np.array(possible_samples[ri])

        if OUTPUT:
            print '\nsamples', ri, samples

        X = pts1[[s[0] for s in samples]]
        Y = pts2[[s[1] for s in samples]]

        # generate transform hypothesis
        T, angle = rigid_transform_from_pairs(X, Y)
        if np.abs(angle) > np.pi / 4:
            if OUTPUT:
                print 'angle too wide', np.rad2deg(angle)
            continue

        # apply transform hypothesis
        pts1_trans = rigid_transform_to(pts1, T)

        # iterative closest point association
        matches = None
        matches_prev = None

        for mi in range(matching_iter):

            # given transform, find matching

            #             t1 = time.time()

            #             b = time.time()

            Dh = cdist(pts1_trans, pts2, metric='euclidean')
            Dargmin1 = Dh.argsort(axis=1)
            Dargmin0 = Dh.argsort(axis=0)
            #             print 'cdist', time.time() - b

            #             b = time.time()

            D2 = Dh.copy()
            D2[np.arange(N1)[:, np.newaxis], Dargmin1[:, n_neighbors:]] = 999
            D2[Dargmin0[n_neighbors:, :], np.arange(N2)[np.newaxis, :]] = 999
            D_hc_pairs = D2[p1s[:, np.newaxis], p2s]

            #             D_hc_pairs = 9999 * np.ones((n1, n2))
            #             for i,j in high_confidence_pairs:
            #                 if j in Dargmin1[i,:10] and i in Dargmin0[:10,j]:
            #                     ii = p1s.index(i)
            #                     jj = p2s.index(j)
            #                     D_hc_pairs[ii, jj] = Dh[i,j]

            #             print 'D_hc_pairs', time.time() - b

            if matches is not None:
                matches_prev = matches

#             b = time.time()
            matches_hc_pairs = np.array(zip(*np.nonzero(munkres(D_hc_pairs))))
            #             print 'munkres', time.time() - b, mi

            #             b = time.time()

            #                 print [(p1s[ii], p2s[jj]) for (ii,jj) in matches_hc_pairs]
            matches = np.array([(p1s[ii], p2s[jj])
                                for (ii, jj) in matches_hc_pairs
                                if D_hc_pairs[ii, jj] != 999])
            # some 9999 edges will be included, the "if" above removes them
            #             print 'matches', time.time() - b

            expanded_matches = []
            matches1 = set([i for i, j in matches])
            matches2 = set([j for i, j in matches])
            rem1 = set(range(N1)) - matches1
            rem2 = set(range(N2)) - matches2
            add1 = set([])
            add2 = set([])
            for i in rem1:
                for j in rem2:
                    if j in Dargmin1[
                            i, :
                            3] and i in Dargmin0[:3,
                                                 j] and i not in add1 and j not in add2:
                        add1.add(i)
                        add2.add(j)
                        expanded_matches.append((i, j))

            if len(expanded_matches) > 0 and len(matches) > 0:
                matches = np.vstack([matches, np.array(expanded_matches)])

            if OUTPUT:
                #                 print 'considered pairs', w
                #                 print 'matches', [(i,j) for i,j in matches
                q1, q2 = np.where(D_hc_pairs < 999)
                w = zip(*[p1s[q1], p2s[q2]])
                print 'matches', len(matches), '/', 'considered pairs', len(
                    w), '/', 'all hc pairs', len(high_confidence_pairs)

#             t2 = time.time()

            if len(matches) < 3:
                s = np.inf
                break
            else:
                xs1 = pts1_trans[matches[:, 0], 0]
                x_coverage1 = float(xs1.max() - xs1.min()) / (
                    pts1_trans[:, 0].max() - pts1_trans[:, 0].min())
                ys1 = pts1_trans[matches[:, 0], 1]
                y_coverage1 = float(ys1.max() - ys1.min()) / (
                    pts1_trans[:, 1].max() - pts1_trans[:, 1].min())

                xs2 = pts2[matches[:, 1], 0]
                x_coverage2 = float(xs2.max() - xs2.min()) / (
                    pts2[:, 0].max() - pts2[:, 0].min())
                ys2 = pts2[matches[:, 1], 1]
                y_coverage2 = float(ys2.max() - ys2.min()) / (
                    pts2[:, 1].max() - pts2[:, 1].min())

                coverage = .5 * x_coverage1 * y_coverage1 + .5 * x_coverage2 * y_coverage2

                s = Dh[matches[:, 0], matches[:, 1]].mean() / coverage**2
#             s = .5 * Dm[Dh.argmin(axis=0), np.arange(len(pts2))].mean() + .5 * Dm[np.arange(len(pts1)), Dh.argmin(axis=1)].mean()
#             s = np.mean([np.mean(Dh.min(axis=0)), np.mean(Dh.min(axis=1))])

            X = pts1[matches[:, 0]]
            Y = pts2[matches[:, 1]]

            T, angle = rigid_transform_from_pairs(X, Y)
            if np.abs(angle) > np.pi / 4:
                break

            pts1_trans = rigid_transform_to(pts1, T)

            if matches_prev is not None and all([(i, j) in matches_prev
                                                 for i, j in matches]):
                break

#             print 'coverage and remaining', mi, time.time() - t2

#             print mi, time.time() - t1

#         Dh = cdist(pts1_trans, pts2, metric='euclidean')
#         Dargmin1 = Dh.argsort(axis=1)
#         Dargmin0 = Dh.argsort(axis=0)

#         expanded_matches = []
#         matches1 = set([i for i,j in matches])
#         matches2 = set([j for i,j in matches])
#         rem1 = set(range(N1)) - matches1
#         rem2 = set(range(N2)) - matches2
#         add1 = set([])
#         add2 = set([])
#         for i in rem1:
#             for j in rem2:
#                 if j in Dargmin1[i,:3] and i in Dargmin0[:3,j] and i not in add1 and j not in add2:
#                     add1.add(i)
#                     add2.add(j)
#                     expanded_matches.append((i,j))

#         if len(expanded_matches) > 0 and len(matches) > 0 :
#             matches = np.vstack([matches, np.array(expanded_matches)])

#         print matches

        samples_list.append(samples)
        offsets.append(T)
        matches_list.append(matches)
        scores.append(s)


#         print matches
#         print s
#         plot_two_pointsets(pts1_trans[:,::-1]*np.array([1,-1]), pts2[:,::-1]*np.array([1,-1]),
#                    center1=False, center2=False,
#                    text=True, matchings=matches)

    if len(scores) > 0:
        best_i = np.argmin(scores)

        best_score = scores[best_i]
        best_T = offsets[best_i]
        best_sample = samples_list[best_i]
        best_matches = matches_list[best_i]

        return best_T, best_matches, best_sample, best_score
    else:
        return None, [], None, np.inf
Example #27
0
if __name__ == '__main__':
    cluster1 = stats.DPCluster(.005, np.array([0, 0]), np.eye(2))
    cluster2 = stats.DPCluster(.995, np.array([0, 4]), np.eye(2))
    cluster3 = stats.DPCluster(.25, np.array([0, 0]), np.eye(2))
    cluster4 = stats.DPCluster(.25, np.array([4, 0]), np.eye(2))
    cluster5 = stats.DPCluster(.5, np.array([0, 4]), np.eye(2))
    A = stats.DPMixture([cluster1, cluster2]).get_submodel([0, 1])
    B = stats.DPMixture([cluster3, cluster4, cluster5]).get_submodel([0, 1, 2])
    from munkres import munkres
    print 'Ref has means', A.mus, 'with weights', A.pis
    print 'Test has means', B.mus, 'with weights', B.pis
    mA = A.make_modal()
    mB = B.make_modal()

    print 'classification'
    print classification_distance(A, B)
    print classification_distance(A, B).mean(), classification_distance(A, B).sum()
    print munkres(classification_distance(A, B))
    print 'modal classification'
    print classification_distance(mA, mB)
    print classification_distance(mA, mB).mean()
    print munkres(classification_distance(mA, mB))

    print 'kldiv'
    print kldiv_distance(A, B)
    print munkres(kldiv_distance(A, B))

    print 'modal kldiv'
    print kldiv_distance(mA, mB)
    print munkres(kldiv_distance(mA, mB))
    return cost

if __name__ == '__main__':
    cluster1 = stats.DPCluster(.5, np.array([0, 0]), np.eye(2))
    cluster2 = stats.DPCluster(.5, np.array([0, 4]), np.eye(2))
    cluster3 = stats.DPCluster(.25, np.array([0, 0]), np.eye(2))
    cluster4 = stats.DPCluster(.25, np.array([4, 0]), np.eye(2))
    cluster5 = stats.DPCluster(.5, np.array([0, 4]), np.eye(2))
    A = stats.DPMixture([cluster1, cluster2])
    B = stats.DPMixture([cluster3, cluster4, cluster5])
    from munkres import munkres
    print 'Ref has means', A.mus, 'with weights', A.pis
    print 'Test has means', B.mus, 'with weights', B.pis
    print 'mean distance'
    print mean_distance(A, B)
    print munkres(mean_distance(A, B))
    mA = A.make_modal()
    mB = B.make_modal()
    print 'modal distance'
    print mean_distance(mA, mB)
    print munkres(mean_distance(mA,mB))
    print 'modal using means'
    print mean_distance(mA, mB, use_means=True)
    print munkres(mean_distance(mA,mB, use_means=True))

    print 'classification'
    print classification_distance(A, B)
    print munkres(classification_distance(A,B))
    print 'modal classification'
    print classification_distance(mA, mB)
    print munkres(classification_distance(mA,mB))