def cameo_internal_rankcorr(): assert False, "not sure we should be using this anymore" finescores = [] coarsescores=[] counts=[] for p in path2code: code = path2code[p] if len(code)==2: continue if path2count[p] < 5: continue finescore = cameo_code2score.get(code) if finescore is None: continue coarsescore=cameo_code2score[code[:2]] # print code, finescore, coarsescore finescores.append(finescore) coarsescores.append(coarsescore) counts.append(path2count[p]) print "%d path types over %d corpus event instances" % ( len(counts), sum(counts) ) r1 = rankcorr.rankcorr(finescores, coarsescores, [1]*len(counts), method='gk') r2 = rankcorr.rankcorr(finescores, coarsescores, counts, method='gk') print "raw rank corr = %.4f, weighted by corpus freq = %.4f" % (r1, r2)
def rankcorr_purity_null(): global scored_paths, path2score, cPathFrame K = cPathFrame.shape[1] for itr in xrange(10000): gold = [] fakepred = [] counts = [] for p in scored_paths: gold.append(path2score[p]) fakepred.append(random.randrange(K)) counts.append(path2count[p]) r = rankcorr.rankcorr(fakepred, gold, counts, method='kendall') print r
def rankcorr_purity(): # only works for scaled models global modeldir, cPathFrame, num2path, path2num, scored_paths, frameScales # posterior prob that a pair is concordant # path-level arrays (token-level counts) model_scores = [] gold_scores = [] counts = [] for k in range(cPathFrame.shape[1]): paths = [p for p in scored_paths if cPathFrame[path2num[p],k] > 0] for p in paths: model_scores.append( path2score[p] ) gold_scores.append( frameScales[k] ) counts.append( cPathFrame[path2num[p], k] ) print len(counts) print "rankcorr = %.4f" % (rankcorr.rankcorr(model_scores, gold_scores, counts, method='kendall'))