Ejemplo n.º 1
0
 def generateBM25FScoreFile(queryUrlFeaturesFile, bm25fScoreFile, corpus):
   outputFileName = bm25fScoreFile # "bm25f_scores.txt"
   
   #populate map with features from file
   (queries, features) = DocUtils.extractFeatures(queryUrlFeaturesFile)
   
   # [url,title,header,body,anchor]
   QueryPageBM25F.bm25f_B     = [1.0,0.1,1.0,1.0,0.1]
   QueryPageBM25F.bm25f_W     = [1.0,0.9,0.8,0.9,0.7]
   QueryPageBM25F.K1          = 1
   QueryPageBM25F.lamd        = 3.0
   QueryPageBM25F.lamd_prime  = 2.0
   QueryPageBM25F.lamd_prime2 = 1.0
   
   QueryPageBM25F.Vf = Pa3Utils.v_logarithmic
   fields_avg_len    = Pa3Utils.features_avg_len(features)
 
   rankedQueries = Pa3Utils.bm25fRankQueries_withScores(features,fields_avg_len,corpus)
   Pa3Utils.printResults(rankedQueries,outputFileName)
Ejemplo n.º 2
0
    def generateWindowSizesFile(queryUrlFeaturesFile, windowSizesFile, corpus):
      outputFileName = windowSizesFile # "window_sizes.txt"
      
      #populate map with features from file
      (queries, features) = DocUtils.extractFeatures(queryUrlFeaturesFile)
      
      INFINITE     = sys.maxsize
      window_sizes = {}
      for query in features:
        queryObject = Query(query,features[query])
        
        urls = []
        for pageStr, pageObject in queryObject.pages.iteritems():
          smallestWindow, windowSizesList = Pa3Utils.findSmallestWindow(queryObject,pageObject)
          window_sizes_with_zero          = [0 if w==INFINITE else w for w in windowSizesList]
          window_sizes_with_zero_str      = " ".join([str(i) for i in window_sizes_with_zero])
          urls.append(pageStr + " " + window_sizes_with_zero_str)

        window_sizes[query] = urls
          
      Pa3Utils.printResults(window_sizes,outputFileName)