def __init__(self): config_loader = Config_loader('config.ini') self.config = config_loader.load_config() reverse_index_builder = Reverse_index_builder( ponderation_method=self.config['Reverse_index']['ponderation'], index_type=self.config['Reverse_index']['index_type'], save_folder_path=self.config['Reverse_index']['save_folder_path']) self.reverse_index = reverse_index_builder.create_reverse_index( 'sources/cacm.all', 'sources/common_words') self._lauch_engine()
def __init__(self): config_loader = Config_loader('config.ini') self.config = config_loader.load_config() reverse_index_builder = Reverse_index_builder( ponderation_method=self.config['Reverse_index']['ponderation'], index_type=self.config['Reverse_index']['index_type'], save_folder_path=self.config['Reverse_index']['save_folder_path'] ) self.reverse_index = reverse_index_builder.create_reverse_index('sources/cacm.all', 'sources/common_words') self._lauch_engine()
def run_testing(self): print 'Launching tests!' print 'Loading documents...', reverse_index_builder = Reverse_index_builder( ponderation_method=self.config['Reverse_index']['ponderation'], index_type=self.config['Reverse_index']['index_type'], save_folder_path=self.config['Reverse_index']['save_folder_path'] ) reverse_index = reverse_index_builder.create_reverse_index('sources/cacm.all', 'sources/common_words') print ' Done' print 'Loading test data...', # {query: [answer1, answer2...]} self.query_answer = self._parse_queries_answers(self.queries_filename, self.answers_filename) print ' Done' print 'Initializing variables...', time_parsing_queries = 0. time_doing_researches = 0. precision = [] recall = [] r_measure = [] f_measure = [] average_precision = [] if self.config['Research_engine']['type'] == 'vectorial': search_engine = Vectorial_search(reverse_index, self.similarity_method) elif self.config['Research_engine']['type'] == 'boolean': search_engine = Boolean_search(reverse_index, self.p_norm, self.default_similarity) elif self.config['Research_engine']['type'] == 'probabilistic': search_engine = Probabilistic_search(reverse_index, self.rsv_relevant_method) query_processor = Process_query(stop_list_filename='sources/cacm.all', format_type=self.config['Research_engine']['type']) print ' Done' t0 = time.time() print 'Let\'s get to it! (this may take 5-10 seconds)' for query in self.query_answer: expected_answers = self.query_answer[query] t_init = time.time() processed_query = query_processor.format_query(query) t_parse = time.time() time_parsing_queries += t_parse - t_init answers_with_score = search_engine.do_search(processed_query) answers = map(lambda (x, y): x, answers_with_score) t_query = time.time() time_doing_researches += t_query - t_parse precision.append(self._compute_precision(answers, expected_answers)) recall.append(self._compute_recall(answers, expected_answers)) r_measure.append(self._compute_r_measure(answers, expected_answers)) f_measure.append(self._compute_f_measure(precision[-1], recall[-1])) average_precision.append(self._compute_average_precision(answers, expected_answers)) number_of_tests = float(len(self.query_answer)) print 'Number of queries tested:', int(number_of_tests), 'in', round(time.time() - t0, 2), 'seconds' print 'Average time spent on query processing:', time_parsing_queries / number_of_tests, 'seconds', print ', doing the research:', time_doing_researches / number_of_tests, 'seconds' print 'Average time spent on a query (total):', (time_doing_researches + time_parsing_queries) / number_of_tests, 'seconds' print """ ################################### # PERFORMANCE MEASURES # ###################################""" print 'Max Precision:', max(precision), 'average:', reduce(lambda x, y: x + y, precision) / float(len(precision)) print 'Max Recall:', max(recall), 'average:', reduce(lambda x, y: x + y, recall) / float(len(recall)) print 'Max F-measure', max(f_measure), 'average:', reduce(lambda x, y: x + y, f_measure) / float(len(f_measure)) print 'Min E-measure', 1 - max(f_measure), 'average:', 1 - reduce(lambda x, y: (x + y), f_measure) / float(len(f_measure)) print 'Max R-measure', max(r_measure), 'average:', reduce(lambda x, y: x + y, r_measure) / float(len(r_measure)) print 'Mean Average Precision (MAP)', reduce(lambda x, y: x + y, average_precision) / float(len(average_precision))