def prepare_rankers (self, A, y1, y2): # matrix multiplication with random matrix A Ay1 = np.dot(A,(y1.T)).T Ay2 = np.dot(A,(y2.T)).T # convert to string string_Ay1 = ', '.join(map(str, np.squeeze(np.asarray(Ay1)))) string_Ay2 = ', '.join(map(str, np.squeeze(np.asarray(Ay2)))) # create ranker objects r1 = ranker.ProbabilisticRankingFunction('3', 'random', 64, init=string_Ay1, sample='sample_unit_sphere') r2 = ranker.ProbabilisticRankingFunction('3', 'random', 64, init=string_Ay2, sample='sample_unit_sphere') return r1, r2
def compareSystems(vali_queries,classifierPath,basic_ranker_path,clust_data_path,data,click): print "-Loading Data-" clf = pickle.load( open( classifierPath ) ) basic_ranker=pickle.load( open( basic_ranker_path ) ) clusterData=pickle.load(open(clust_data_path)) queryData=pickle.load(open(data)) ranker_tie="random" feature_count=basic_ranker.feature_count ranker_args="3" arg_str="" sample_send="sample_unit_sphere" iterations=100 rankers=[0]*2 rankers[0]=basic_ranker user_model = environment.CascadeUserModel(click) training_queries = query.load_queries(vali_queries, feature_count) compar_interleave=ProbabilisticInterleave(None) first_win=0 print "-Calculating-" for i in range(iterations): if i%(iterations/10)==0: print str(float(i)*100/float(iterations))+"%" q = training_queries.get_query(random.choice(queryData.query_ranker.keys())) test=queryData.query_ranker[q.get_qid()][0] testWeights=str(test) testWeights=testWeights.replace("[", "") testWeights=testWeights.replace("]", "") weights = np.array([float(num) for num in testWeights.split(",")]) print len(weights) ranker_tie="random" ranker_args="3" sample_send="sample_unit_sphere" rankers[1]=rankerClass.ProbabilisticRankingFunction(ranker_args, ranker_tie, feature_count, sample=sample_send, init=testWeights) l, a = compar_interleave.interleave(rankers[0], rankers[1], q, 10) c = user_model.get_clicks(l, q.get_labels()) o = compar_interleave.infer_outcome(l, a, c, q) if(o<0): first_win+=1 elif(o==0): coin=random.random() if(coin>0.5): first_win+=1 result_com=float(first_win)/float(iterations) print "Basic ranker win rate:"+ str(result_com)
def prepare_ranker (self, A, y): # matrix multiplication with random matrix A Ay = np.dot(A,(y.T)).T # convert to string string_Ay = ', '.join(map(str, np.squeeze(np.asarray(Ay)))) # create ranker objects r = ranker.ProbabilisticRankingFunction('3', 'random', 64, init=string_Ay, sample='sample_unit_sphere') return r
def list_distance(self, a, b, query): aa = str(a.tolist()) aa = aa.replace('[', '').replace(']', '') bb = str(b.tolist()) bb = bb.replace('[', '').replace(']', '') rankerA = ranker.ProbabilisticRankingFunction(['3'], "random", 64, aa, "sample_unit_sphere") rankerB = ranker.ProbabilisticRankingFunction(['3'], "random", 64, bb, "sample_unit_sphere") query = self.training_queries.get_query(query) rankerA.init_ranking(query) rankerB.init_ranking(query) docsA = rankerA.getDocs() docsB = rankerB.getDocs() docsA2 = [str(x.docid) for x in docsA] docsB2 = [str(x.docid) for x in docsB] tau, p_value = scipy.stats.kendalltau(docsA2, docsB2) #Values close to 1 indicate strong agreement, values close to -1 indicate strong disagreement #- range tau between 0 and 1 tau = (tau + 1) / 2 #- invert values distance to get distance instead of agreement tau = 1 - tau return tau
def getRanker(clf, basic_ranker,query,clusterData): max=100 basic_ranker.init_ranking(query) docIds=basic_ranker.get_ranking() i=0 results={} for docId in docIds: if i>max: break i=i+1 features=query.get_feature_vector(docId) X=features y=clf.predict(features) y=y[0] if y in results: results[y]=results[y]+1 else: results[y]=1 found_max=0 arg_max=0 for k in results: if results[k]>found_max: found_max=results[k] arg_max=k rankerVec=clusterData.clusterToRanker[arg_max][0] ranker_tie="random" feature_count=len(rankerVec) ranker_args="3" arg_str="" sample_send="sample_unit_sphere" iterations=100 testWeights=str(rankerVec) testWeights=testWeights.replace("[", "") testWeights=testWeights.replace("]", "") resultRanker=ranker.ProbabilisticRankingFunction(ranker_args, ranker_tie, feature_count, sample=sample_send, init=testWeights) return resultRanker
def perform(): # init test_num_features = 64 queries = query.load_queries('../../data/NP2004/Fold1/test.txt', 64) bi = comparison.BalancedInterleave() user_model = environment.CascadeUserModel( '--p_click 0:0.0,1:1 --p_stop 0:0.0,1:0.0') # make rankers rankers = [] for i in range(0, 5): rankers.append( ranker.ProbabilisticRankingFunction( '3', 'random', 64, init=parseRanker('../../data/features64/ranker-0' + str(i) + '.txt'), sample='sample_unit_sphere')) # main loop for N in [100, 1000, 10000]: pref_matrix = [[0 for x in xrange(5)] for x in xrange(5)] for iter in range(0, N): q = queries[random.choice(queries.keys())] for i in range(0, 5): for j in range(0, 5): if i != j: list, context = bi.interleave(rankers[i], rankers[j], q, 10) clicks = user_model.get_clicks(list, q.get_labels()) result = bi.infer_outcome(list, context, clicks, q) if result < 0: pref_matrix[i][j] += 1 else: pref_matrix[i][j] = 0.50 pref_matrix = generateProbabilityMatrix(pref_matrix, N) printMatrix(pref_matrix) print 'Best ranker is ' + '0' + str( getBestRanker(pref_matrix)) + ' (N = ' + str(N) + ').' print 'done!'
def compareSystemsHist(vali_queries,classifierPath,basic_ranker_path,clust_data_path,data,click): print "-Loading Data-" clf = pickle.load( open( classifierPath ) ) basic_ranker=pickle.load( open( basic_ranker_path ) ) clusterData=pickle.load(open(clust_data_path)) queryData=pickle.load(open(data)) ranker_tie="random" feature_count=basic_ranker.feature_count ranker_args="3" arg_str="" sample_send="sample_unit_sphere" iterations=100 rankers=[0]*2 rankers[0]=basic_ranker user_model = environment.CascadeUserModel(click) training_queries = query.load_queries(vali_queries, feature_count) compar_interleave=ProbabilisticInterleave(None) print "-Calculating-" ii=0 results=[] for qid in queryData.query_ranker.keys(): print str(float(ii)*100/float(len(queryData.query_ranker.keys())))+"%" ii+=1 q=training_queries.get_query(qid) for val in queryData.query_ranker[qid]: test=val #test=queryData.query_ranker[q][0] testWeights=str(test.tolist()) testWeights=testWeights.replace("[", "") testWeights=testWeights.replace("]", "") #weights = np.array([float(num) for num in testWeights.split(",")]) #print len(weights) ranker_tie="random" ranker_args="3" sample_send="sample_unit_sphere" rankers[1]=rankerClass.ProbabilisticRankingFunction(ranker_args, ranker_tie, feature_count, sample=sample_send, init=testWeights) second_win=0 for i in range(iterations): #q = training_queries.get_query(random.choice(training_queries.keys())) l, a = compar_interleave.interleave(rankers[0], rankers[1], q, 10) c = user_model.get_clicks(l, q.get_labels()) o = compar_interleave.infer_outcome(l, a, c, q) if(o>0): second_win+=1 elif(o==0): coin=random.random() if(coin>0.5): second_win+=1 result_com=float(second_win)/float(iterations) results.append(result_com) g=P.hist(results, bins = 20,range=[0,1]) P.xlabel("The win rate of the ranker",fontsize=20) P.ylabel("Number of rankers",fontsize=20) P.show(g)