def irbo(topics, weight=0.9, topk=10): """ compute the inverted rank-biased overlap Parameters ---------- topics: a list of lists of words weight: p (float), default 1.0: Weight of each agreement at depth d:p**(d-1). When set to 1.0, there is no weight, the rbo returns to average overlap. topk: top k words on which the topic diversity will be computed Returns ------- irbo : score of the rank biased overlap over the topics """ if topk > len(topics[0]): raise Exception('Words in topics are less than topk') else: collect = [] for list1, list2 in combinations(topics, 2): word2index = get_word2index(list1, list2) indexed_list1 = [word2index[word] for word in list1] indexed_list2 = [word2index[word] for word in list2] rbo_val = rbo(indexed_list1[:topk], indexed_list2[:topk], p=weight)[2] collect.append(rbo_val) return 1 - np.mean(collect)
def calc_rbo(a, b, p, count): scores = rbo(a, b, p) min = scores['min'] res = scores['res'] ext = scores['ext'] Z = ext / sqrt((2 * (2 * count + 5)) / (9 * count * (count - 1))) p_value = scipy.stats.norm.sf(abs(Z)) * 2 return ext, p_value
def compute_rank_biased_overlap(ranking_A, ranking_B, rbo_parameter=0.99): rbo_output = rbo(ranking_A, ranking_B, p=rbo_parameter) rbo_lower_bound = rbo_output.min rbo_residual = rbo_output.res rbo_estimate = rbo_output.ext reference_overlap = average_overlap(ranking_A, ranking_B) print('Rank-biased overlap estimate: {:.4f}'.format(rbo_estimate)) print('Average overlap = {:.4f}'.format(reference_overlap)) return rbo_estimate, reference_overlap
def main(): args = parse_args() p = args.p lines1 = args.file1.readlines() lines2 = args.file2.readlines() run1 = parse_run(lines1) run2 = parse_run(lines2) queries = list(run1.keys() & run2.keys()) try: queries.sort(key=int) except ValueError: queries.sort() results = [] print('qno,intersection,union,p,min,res,ext') for q in queries: # print(run1[q]) # print(run2[q]) # return result = rbo.rbo(run1[q], run2[q], p) overlap = len(set(run1[q]) & set(run2[q])) total = len(set(run1[q]) | set(run2[q])) current = { 'qno': q, 'intersection': overlap, 'union': total, 'p': p, 'min': result['min'], 'res': result['res'], 'ext': result['ext'] } results.append(current) print('{},{},{},{},{:f},{:f},{:f}'.format( q, overlap, total, p, result['min'], result['res'], result['ext'])) min_mean = np.mean([r['min'] for r in results]) res_mean = np.mean([r['res'] for r in results]) ext_mean = np.mean([r['ext'] for r in results]) overlap_mean = np.mean([r['intersection'] for r in results]) total_mean = np.mean([r['union'] for r in results]) print('{},{},{},{},{:f},{:f},{:f}'.format('mean', overlap_mean, total_mean, p, min_mean, res_mean, ext_mean)) return results
experiment = read_list( "/home/gentoo/src/similarity/test_strategies/ex_vs_ex_experiment/expression.csv", count) mean = read_list( "/home/gentoo/src/similarity/test_strategies/ex_vs_ex_gene_mean/expression.csv", count) max = read_list( "/home/gentoo/src/similarity/test_strategies/ex_vs_ex_gene_max/expression.csv", count) abi = read_list2("ABI-correlation-Znfx1-69524632.csv", count) print(experiment, mean, max, abi) print("exp", "mean") re = rbo(experiment, mean, 0.9) print(re) print("exp", "max") re = rbo(experiment, max, 0.9) print(re) print("mean", "max") re = rbo(mean, max, 0.9) print(re) print("ABI", "exp") re = rbo(abi, experiment, 0.9) print(re) print("ABI", "mean") re = rbo(abi, mean, 0.9)
ranks.sort(reverse=True) vector = [] if not topK or topK > len(ranks): topK = len(ranks) i = 0 while i < topK: rank = ranks[i] vector.append(set(myMap[rank])) i += 1 return vector if __name__ == "__main__": topK = None if len(sys.argv) == 4: topK = int(sys.argv[3]) v1 = getVector(sys.argv[1], topK) v2 = getVector(sys.argv[2], topK) result = rbo(v1, v2, p=0.8) print(str(result["ext"])) # print("res " + str(result["res"])) # print("min " + str(result["min"]))
ind = max.index(elem) max_rank.append([mean_rank[ind][0], elem]) except ValueError: ind = -1 max_rank.append([1, elem]) score_max = [x[0] for x in max_rank] score_mean = [x[0] for x in mean_rank] print(score_max) print(score_mean) s, p = wilcoxon(score_max, score_mean, zero_method="pratt") spear print("wilcoxon") print(p) cor, p = weightedtau(score_max, score_mean) print("weighted tau") print(cor) #print(experiment,mean,max, abi) print("mean", "max") re = rbo(mean, max, 0.9) print(re) cor, p = spearmanr(mean, max) print(cor, p)
Mef2c_667 = read_list("ABI-correlation-Mef2c-667.csv",200) Znfx1 = read_list("ABI-correlation-Znfx1-69524632.csv",200) #top 1 hit of Mef2c queried, should be really close to Mef2c as a way to compare Nlk = read_list("ABI-correlation-Nlk-76085742.csv",200) Mef2c_668 = read_list("ABI-correlation-Mef2c-668.csv",200) Stx1a = read_list("ABI-correlation-Stx1a-2645.csv",200) Mef2c_79567505 = read_list("ABI-correlation-Mef2c-79567505.csv",200) print("Mef2c_667","Mef2c_667") re = rbo(Mef2c_667,Mef2c_667,0.9) print(re) print("Mef2c_667","Nlk") re = rbo(Mef2c_667,Nlk,0.9) print(re) print("Mef2c_667","Mef2c_668") re = rbo(Mef2c_667,Mef2c_668,0.9) print(re) print("Mef2c_667","Mef2c_79567505") re = rbo(Mef2c_667,Mef2c_79567505,0.9) print(re)