Ejemplo n.º 1
0
    def __init__(self):
        config_loader = Config_loader('config.ini')
        self.config = config_loader.load_config()

        reverse_index_builder = Reverse_index_builder(
            ponderation_method=self.config['Reverse_index']['ponderation'],
            index_type=self.config['Reverse_index']['index_type'],
            save_folder_path=self.config['Reverse_index']['save_folder_path'])
        self.reverse_index = reverse_index_builder.create_reverse_index(
            'sources/cacm.all', 'sources/common_words')

        self._lauch_engine()
Ejemplo n.º 2
0
    def __init__(self):
        config_loader = Config_loader('config.ini')
        self.config = config_loader.load_config()

        reverse_index_builder = Reverse_index_builder(
            ponderation_method=self.config['Reverse_index']['ponderation'],
            index_type=self.config['Reverse_index']['index_type'],
            save_folder_path=self.config['Reverse_index']['save_folder_path']
        )
        self.reverse_index = reverse_index_builder.create_reverse_index('sources/cacm.all', 'sources/common_words')

        self._lauch_engine()
Ejemplo n.º 3
0
    def run_testing(self):
        print 'Launching tests!'
        print 'Loading documents...',

        reverse_index_builder = Reverse_index_builder(
            ponderation_method=self.config['Reverse_index']['ponderation'],
            index_type=self.config['Reverse_index']['index_type'],
            save_folder_path=self.config['Reverse_index']['save_folder_path']
        )

        reverse_index = reverse_index_builder.create_reverse_index('sources/cacm.all', 'sources/common_words')
        print ' Done'

        print 'Loading test data...',
        # {query: [answer1, answer2...]}
        self.query_answer = self._parse_queries_answers(self.queries_filename, self.answers_filename)
        print ' Done'

        print 'Initializing variables...',
        time_parsing_queries = 0.
        time_doing_researches = 0.
        precision = []
        recall = []
        r_measure = []
        f_measure = []
        average_precision = []

        if self.config['Research_engine']['type'] == 'vectorial':
            search_engine = Vectorial_search(reverse_index, self.similarity_method)
        elif self.config['Research_engine']['type'] == 'boolean':
            search_engine = Boolean_search(reverse_index, self.p_norm, self.default_similarity)
        elif self.config['Research_engine']['type'] == 'probabilistic':
            search_engine = Probabilistic_search(reverse_index, self.rsv_relevant_method)
        query_processor = Process_query(stop_list_filename='sources/cacm.all', format_type=self.config['Research_engine']['type'])

        print ' Done'

        t0 = time.time()
        print 'Let\'s get to it! (this may take 5-10 seconds)'
        for query in self.query_answer:
            expected_answers = self.query_answer[query]

            t_init = time.time()
            processed_query = query_processor.format_query(query)
            t_parse = time.time()
            time_parsing_queries += t_parse - t_init

            answers_with_score = search_engine.do_search(processed_query)
            answers = map(lambda (x, y): x, answers_with_score)

            t_query = time.time()
            time_doing_researches += t_query - t_parse

            precision.append(self._compute_precision(answers, expected_answers))
            recall.append(self._compute_recall(answers, expected_answers))
            r_measure.append(self._compute_r_measure(answers, expected_answers))
            f_measure.append(self._compute_f_measure(precision[-1], recall[-1]))
            average_precision.append(self._compute_average_precision(answers, expected_answers))

        number_of_tests = float(len(self.query_answer))
        print 'Number of queries tested:', int(number_of_tests), 'in', round(time.time() - t0, 2), 'seconds'
        print 'Average time spent on query processing:', time_parsing_queries / number_of_tests, 'seconds',
        print ', doing the research:', time_doing_researches / number_of_tests, 'seconds'
        print 'Average time spent on a query (total):', (time_doing_researches + time_parsing_queries) / number_of_tests, 'seconds'
        print """
###################################
#      PERFORMANCE MEASURES       #
###################################"""
        print 'Max Precision:', max(precision), 'average:', reduce(lambda x, y: x + y, precision) / float(len(precision))
        print 'Max Recall:', max(recall), 'average:', reduce(lambda x, y: x + y, recall) / float(len(recall))
        print 'Max F-measure', max(f_measure), 'average:', reduce(lambda x, y: x + y, f_measure) / float(len(f_measure))
        print 'Min E-measure', 1 - max(f_measure), 'average:', 1 - reduce(lambda x, y: (x + y), f_measure) / float(len(f_measure))
        print 'Max R-measure', max(r_measure), 'average:', reduce(lambda x, y: x + y, r_measure) / float(len(r_measure))
        print 'Mean Average Precision (MAP)', reduce(lambda x, y: x + y, average_precision) / float(len(average_precision))