def print_statistics(self, methods): single_queries = Query(self.collection_path).get_queries_of_length(1) queries = {ele['num']:ele['title'] for ele in single_queries} cs = CollectionStats(self.collection_path) performance = Performances(self.collection_path) res = performance.gen_optimal_performances_queries(methods, queries.keys()) avdl = cs.get_avdl() total_terms = cs.get_total_terms() collection_freq = [] for qid in queries: idx = 0 ctf = cs.get_term_collection_occur(queries[qid]) idf = cs.get_term_logidf1(queries[qid]) collection_freq.append( ctf*1.0/total_terms ) print avdl print np.mean(collection_freq) for ele in res: label = ele[0] p = ele[1] para = float(ele[2].split(':')[1]) print label if 'okapi' in label: print 'b:', para, 'beta:', 1.2*para/avdl, 'c2:', 1.2*(1-para) if 'pivoted' in label: print 's:', para, 'beta:', para/avdl, 'c2:', 1-para
def print_best_performances(self, methods=[]): single_queries = Query(self.collection_path).get_queries_of_length(1) queries = {ele['num']:ele['title'] for ele in single_queries} cs = CollectionStats(self.collection_path) performance = Performances(self.collection_path) res = performance.gen_optimal_performances_queries(methods, queries.keys()) print res