예제 #1
0
파일: l2r.py 프로젝트: plusbzz/cs276-pa4
def test(test_data_file, model, task):
  sys.stderr.write('\n## Testing with feature_file = %s ... \n' % (test_data_file))

  if task == 1:
    # Step (1): construct your test feature arrays here
    (X, queries, index_map) = pointwise_test_features(test_data_file)
    
    # Step (2): implement your prediction code here
    y = pointwise_testing(X, model)
    
  elif task == 2:
    # Step (1): construct your test feature arrays here
    (X, queries, index_map) = pairwise_test_features(test_data_file)
    
    # Step (2): implement your prediction code here
    y = pairwise_testing(X, model)
  elif task == 3: 
    # Add more features
    print >> sys.stderr, "Task 3\n"
    
    # Generating BM25F and WindowSizes for test_data_file
    
    bm25f_scores_output_file = "bm25f_scores.txt"
    Pa3Utils.generateBM25FScoreFile(test_data_file, bm25f_scores_output_file, corpus)
    
    window_sizes_output_file = "window_sizes.txt"
    Pa3Utils.generateWindowSizesFile(test_data_file, window_sizes_output_file, corpus)    
    
    extraFeaturesInfo.load(bm25f_scores_output_file, window_sizes_output_file)
    
    # Step (1): construct your test feature arrays here
    #(X, queries, index_map) = pointwise_test_features(test_data_file, extraFeaturesInfo)
    
    # Step (2): implement your prediction code here
    #y = pointwise_testing(X, model)

    # Step (1): construct your test feature arrays here
    (X, queries, index_map) = pairwise_test_features(test_data_file, extraFeaturesInfo)
    
    # Step (2): implement your prediction code here
    y = pairwise_testing(X, model)

  elif task == 4: 
    # Extra credit 
    print >> sys.stderr, "Extra credit\n"
    # Step (1): construct your test feature arrays here
    (X, queries, index_map) = pointwise_test_features(test_data_file)
    
    # Step (2): implement your prediction code here
    y = pointwise_testing(X, model)

  else:
    queries = ['query1', 'query2']
    index_map = {'query1' : {'url1':0}, 'query2': {'url2':1}}
    X = [[0.5, 0.5], [1.5, 1.5]]  
    y = model.predict(X)
  
  # Step (3): output your ranking result to stdout in the format that will be scored by the ndcg.py code
  rankedQueries = DocUtils.getRankedQueries(queries,index_map,y)
  DocUtils.printRankedResults(rankedQueries,"ranked.txt")
예제 #2
0
 def generateBM25FScoreFile(queryUrlFeaturesFile, bm25fScoreFile, corpus):
   outputFileName = bm25fScoreFile # "bm25f_scores.txt"
   
   #populate map with features from file
   (queries, features) = DocUtils.extractFeatures(queryUrlFeaturesFile)
   
   # [url,title,header,body,anchor]
   QueryPageBM25F.bm25f_B     = [1.0,0.1,1.0,1.0,0.1]
   QueryPageBM25F.bm25f_W     = [1.0,0.9,0.8,0.9,0.7]
   QueryPageBM25F.K1          = 1
   QueryPageBM25F.lamd        = 3.0
   QueryPageBM25F.lamd_prime  = 2.0
   QueryPageBM25F.lamd_prime2 = 1.0
   
   QueryPageBM25F.Vf = Pa3Utils.v_logarithmic
   fields_avg_len    = Pa3Utils.features_avg_len(features)
 
   rankedQueries = Pa3Utils.bm25fRankQueries_withScores(features,fields_avg_len,corpus)
   Pa3Utils.printResults(rankedQueries,outputFileName)
예제 #3
0
    def generateWindowSizesFile(queryUrlFeaturesFile, windowSizesFile, corpus):
      outputFileName = windowSizesFile # "window_sizes.txt"
      
      #populate map with features from file
      (queries, features) = DocUtils.extractFeatures(queryUrlFeaturesFile)
      
      INFINITE     = sys.maxsize
      window_sizes = {}
      for query in features:
        queryObject = Query(query,features[query])
        
        urls = []
        for pageStr, pageObject in queryObject.pages.iteritems():
          smallestWindow, windowSizesList = Pa3Utils.findSmallestWindow(queryObject,pageObject)
          window_sizes_with_zero          = [0 if w==INFINITE else w for w in windowSizesList]
          window_sizes_with_zero_str      = " ".join([str(i) for i in window_sizes_with_zero])
          urls.append(pageStr + " " + window_sizes_with_zero_str)

        window_sizes[query] = urls
          
      Pa3Utils.printResults(window_sizes,outputFileName)
예제 #4
0
파일: l2r.py 프로젝트: plusbzz/cs276-pa4
def pairwise_test_features(test_data_file, extraFeaturesInfo=None):
  X,queries,index_map = DocUtils.extractX_pairWise(test_data_file, corpus, extraFeaturesInfo)
  return (X, queries, index_map)
예제 #5
0
파일: l2r.py 프로젝트: plusbzz/cs276-pa4
def pairwise_train_features(train_data_file, train_rel_file, extraFeaturesInfo=None):
  X,y = DocUtils.extractXy_pairWise(train_data_file, train_rel_file, corpus, extraFeaturesInfo)
  return (X, y)