예제 #1
0
	def prepare_rankers (self, A, y1, y2):
		# matrix multiplication with random matrix A
		Ay1 = np.dot(A,(y1.T)).T
		Ay2 = np.dot(A,(y2.T)).T
		
		# convert to string
		string_Ay1 = ', '.join(map(str, np.squeeze(np.asarray(Ay1))))
		string_Ay2 = ', '.join(map(str, np.squeeze(np.asarray(Ay2))))
		
		# create ranker objects
		r1 = ranker.ProbabilisticRankingFunction('3', 'random', 64, init=string_Ay1, sample='sample_unit_sphere')
		r2 = ranker.ProbabilisticRankingFunction('3', 'random', 64, init=string_Ay2, sample='sample_unit_sphere')
		return r1, r2
예제 #2
0
def compareSystems(vali_queries,classifierPath,basic_ranker_path,clust_data_path,data,click):
    
    print "-Loading Data-"
    clf = pickle.load( open( classifierPath ) )
    basic_ranker=pickle.load( open( basic_ranker_path ) )
    clusterData=pickle.load(open(clust_data_path))
    queryData=pickle.load(open(data))
    
    ranker_tie="random"
    feature_count=basic_ranker.feature_count
    ranker_args="3"
    arg_str=""
    sample_send="sample_unit_sphere"
    iterations=100
    
    rankers=[0]*2
    rankers[0]=basic_ranker
    
    
    user_model = environment.CascadeUserModel(click)
    training_queries = query.load_queries(vali_queries, feature_count)
    compar_interleave=ProbabilisticInterleave(None)

    first_win=0
    print "-Calculating-"
    
    for i in range(iterations):
        if i%(iterations/10)==0:
            print str(float(i)*100/float(iterations))+"%"
        q = training_queries.get_query(random.choice(queryData.query_ranker.keys()))
        
        test=queryData.query_ranker[q.get_qid()][0]
        testWeights=str(test)
        testWeights=testWeights.replace("[", "")
        testWeights=testWeights.replace("]", "")
        weights = np.array([float(num) for num in testWeights.split(",")])
        print len(weights)
        ranker_tie="random"
        ranker_args="3"
        sample_send="sample_unit_sphere"

        rankers[1]=rankerClass.ProbabilisticRankingFunction(ranker_args,
                                                ranker_tie,
                                                feature_count,
                                                sample=sample_send,
                                                init=testWeights)
        
        
        l, a = compar_interleave.interleave(rankers[0], rankers[1], q, 10)
        c = user_model.get_clicks(l, q.get_labels())
        o = compar_interleave.infer_outcome(l, a, c, q)
        if(o<0):
            first_win+=1
        elif(o==0):
            coin=random.random()
            if(coin>0.5):
                first_win+=1
    result_com=float(first_win)/float(iterations)
    print "Basic ranker win rate:"+ str(result_com)
예제 #3
0
	def prepare_ranker (self, A, y):
		# matrix multiplication with random matrix A
		Ay = np.dot(A,(y.T)).T
		
		# convert to string
		string_Ay = ', '.join(map(str, np.squeeze(np.asarray(Ay))))
		
		# create ranker objects
		r = ranker.ProbabilisticRankingFunction('3', 'random', 64, init=string_Ay, sample='sample_unit_sphere')
		return r
예제 #4
0
 def list_distance(self, a, b, query):
     aa = str(a.tolist())
     aa = aa.replace('[', '').replace(']', '')
     bb = str(b.tolist())
     bb = bb.replace('[', '').replace(']', '')
     rankerA = ranker.ProbabilisticRankingFunction(['3'], "random", 64, aa,
                                                   "sample_unit_sphere")
     rankerB = ranker.ProbabilisticRankingFunction(['3'], "random", 64, bb,
                                                   "sample_unit_sphere")
     query = self.training_queries.get_query(query)
     rankerA.init_ranking(query)
     rankerB.init_ranking(query)
     docsA = rankerA.getDocs()
     docsB = rankerB.getDocs()
     docsA2 = [str(x.docid) for x in docsA]
     docsB2 = [str(x.docid) for x in docsB]
     tau, p_value = scipy.stats.kendalltau(docsA2, docsB2)
     #Values close to 1 indicate strong agreement, values close to -1 indicate strong disagreement
     #- range tau between 0 and 1
     tau = (tau + 1) / 2
     #- invert values distance to get distance instead of agreement
     tau = 1 - tau
     return tau
예제 #5
0
def getRanker(clf, basic_ranker,query,clusterData):
        
        max=100
        basic_ranker.init_ranking(query)
        docIds=basic_ranker.get_ranking()
        i=0
        results={}
        for docId in docIds:
            if i>max:
                break
            i=i+1
            features=query.get_feature_vector(docId)
            X=features
            y=clf.predict(features)
            y=y[0]
            if y in results:
                results[y]=results[y]+1
            else:
                results[y]=1
            
        found_max=0
        arg_max=0
        for k in results:
            if results[k]>found_max:
                found_max=results[k]
                arg_max=k
                
        rankerVec=clusterData.clusterToRanker[arg_max][0]
        
        ranker_tie="random"
        feature_count=len(rankerVec)
        ranker_args="3"
        arg_str=""
        sample_send="sample_unit_sphere"
        iterations=100
        
        testWeights=str(rankerVec)
        testWeights=testWeights.replace("[", "")
        testWeights=testWeights.replace("]", "")
        
        resultRanker=ranker.ProbabilisticRankingFunction(ranker_args,
                                                ranker_tie,
                                                feature_count,
                                                sample=sample_send,
                                                init=testWeights)
        
        return resultRanker
예제 #6
0
def perform():
    # init
    test_num_features = 64
    queries = query.load_queries('../../data/NP2004/Fold1/test.txt', 64)
    bi = comparison.BalancedInterleave()
    user_model = environment.CascadeUserModel(
        '--p_click 0:0.0,1:1 --p_stop 0:0.0,1:0.0')

    # make rankers
    rankers = []
    for i in range(0, 5):
        rankers.append(
            ranker.ProbabilisticRankingFunction(
                '3',
                'random',
                64,
                init=parseRanker('../../data/features64/ranker-0' + str(i) +
                                 '.txt'),
                sample='sample_unit_sphere'))

    # main loop
    for N in [100, 1000, 10000]:
        pref_matrix = [[0 for x in xrange(5)] for x in xrange(5)]
        for iter in range(0, N):
            q = queries[random.choice(queries.keys())]
            for i in range(0, 5):
                for j in range(0, 5):
                    if i != j:
                        list, context = bi.interleave(rankers[i], rankers[j],
                                                      q, 10)
                        clicks = user_model.get_clicks(list, q.get_labels())
                        result = bi.infer_outcome(list, context, clicks, q)
                        if result < 0:
                            pref_matrix[i][j] += 1
                    else:
                        pref_matrix[i][j] = 0.50
        pref_matrix = generateProbabilityMatrix(pref_matrix, N)
        printMatrix(pref_matrix)
        print 'Best ranker is ' + '0' + str(
            getBestRanker(pref_matrix)) + ' (N = ' + str(N) + ').'
    print 'done!'
예제 #7
0
def compareSystemsHist(vali_queries,classifierPath,basic_ranker_path,clust_data_path,data,click):
    
    print "-Loading Data-"
    clf = pickle.load( open( classifierPath ) )
    basic_ranker=pickle.load( open( basic_ranker_path ) )
    clusterData=pickle.load(open(clust_data_path))
    queryData=pickle.load(open(data))
    
    ranker_tie="random"
    feature_count=basic_ranker.feature_count
    ranker_args="3"
    arg_str=""
    sample_send="sample_unit_sphere"
    iterations=100
    
    rankers=[0]*2
    rankers[0]=basic_ranker
    
    
    user_model = environment.CascadeUserModel(click)
    training_queries = query.load_queries(vali_queries, feature_count)
    compar_interleave=ProbabilisticInterleave(None)

    print "-Calculating-"
    
    ii=0


    results=[]
    for qid in queryData.query_ranker.keys():
        print str(float(ii)*100/float(len(queryData.query_ranker.keys())))+"%"
        ii+=1
        q=training_queries.get_query(qid)
        for val in queryData.query_ranker[qid]:
            test=val
            #test=queryData.query_ranker[q][0]
            testWeights=str(test.tolist())
            testWeights=testWeights.replace("[", "")
            testWeights=testWeights.replace("]", "")
            #weights = np.array([float(num) for num in testWeights.split(",")])
            #print len(weights)
            ranker_tie="random"
            ranker_args="3"
            sample_send="sample_unit_sphere"
    
            rankers[1]=rankerClass.ProbabilisticRankingFunction(ranker_args,
                                                    ranker_tie,
                                                    feature_count,
                                                    sample=sample_send,
                                                    init=testWeights)
           
            second_win=0
            for i in range(iterations):
                #q = training_queries.get_query(random.choice(training_queries.keys()))          
                l, a = compar_interleave.interleave(rankers[0], rankers[1], q, 10)
                c = user_model.get_clicks(l, q.get_labels())
                o = compar_interleave.infer_outcome(l, a, c, q)
                if(o>0):
                    second_win+=1
                elif(o==0):
                    coin=random.random()
                    if(coin>0.5):
                        second_win+=1
            result_com=float(second_win)/float(iterations)
            results.append(result_com)

    g=P.hist(results, bins = 20,range=[0,1])
    P.xlabel("The win rate of the ranker",fontsize=20)
    P.ylabel("Number of rankers",fontsize=20)
    P.show(g)