コード例 #1
0
 def __init__(self, books_folder, pages_index_folder, csv_path):
     self.books_folder = books_folder
     self.pages_index_folder = pages_index_folder
     self.csv_path = csv_path
     self.pages_search_engine = TSearchEngine(index_location=self.pages_index_folder)
     self.cfields_search_engine = TCustomFieldsSearchEngine(csv_path)
コード例 #2
0
class TSearchServer():
    def __init__(self, books_folder, pages_index_folder, csv_path):
        self.books_folder = books_folder
        self.pages_index_folder = pages_index_folder
        self.csv_path = csv_path
        self.pages_search_engine = TSearchEngine(index_location=self.pages_index_folder)
        self.cfields_search_engine = TCustomFieldsSearchEngine(csv_path)
        
    def get_pages_segment_data(self, segment_id):
        obj_id, field_id, start, length = self.pages_search_engine.segment_index.get_segment(segment_id)
        import os
        location = os.path.join(self.books_folder, obj_id, field_id)
        f = open(location, "rb")
        f.seek(start)
        snippet = f.read(length)
        return obj_id, field_id, snippet
    
    def select_words_in_snippet(self, words2select, snippet):
        matches = self.pages_search_engine.parsers.parse_buffer(snippet, "windows-1251")
        to_select = []
        for token, position in words2select:
            to_select += [(matches[position].start, matches[position].start + matches[position].length)]
        to_select.sort()
        for sel_index in xrange(len(to_select) - 1, -1, -1):
            sel_start, sel_end = to_select[sel_index]
            snippet = snippet[:sel_start] + "<b>" + snippet[sel_start:sel_end] + "</b>" + snippet[sel_end:]
        snippet = snippet.decode("windows-1251").replace(chr(13), " ").replace(chr(10), " ").replace('"', "'")
        import re
        snippet = re.subn("\s+", " ", snippet)[0]
        return snippet
    
    """ returns object of class TBook"""
    def get_book_data(self, object_id):
        return self.cfields_search_engine.objects[int(object_id)]
    
    def search(self, params):
        EMPTY_RESPONSE = [[], 0]
        filtered_object_ids = self.cfields_search_engine.process_query(title=params["title"],
                                                                       author=params["author"],
                                                                       udc=params["udc"],
                                                                       year=params["year"],
                                                                       year_max=params["year_max"],
                                                                       year_min=params["year_min"],
                                                                       pages_count=params["pages_count"] ,
                                                                       pages_count_max=params["pages_count_max"],
                                                                       pages_count_min=params["pages_count_min"],
                                                                       lib_section=params["filter_lib_section"])
        #no books satisfying filters
        if filtered_object_ids == -1:
            return EMPTY_RESPONSE        
        
        if params["filter_object_id"]:
            filter_object_id = int(params["filter_object_id"])
            if filtered_object_ids == 0: # no restrictions introduced
                filtered_object_ids = [filter_object_id]
            elif filter_object_id in filtered_object_ids:
                filtered_object_ids = [filter_object_id]
            else:
                return EMPTY_RESPONSE

        if filtered_object_ids == 0: #all books are accepted
            filtered_object_ids = None
            
        if not filtered_object_ids and not params["pages_query"]: #all accepted
            return EMPTY_RESPONSE
        
        if not params["pages_query"]:# no query to the pages index
            return EMPTY_RESPONSE

        objects_matching_custom_field = self.cfields_search_engine.find_mentions_of_author_and_title(params["pages_query"])
        if filtered_object_ids != None:
            if len(filtered_object_ids) == 1:#searching inside one book:
                objects_matching_custom_field = []
            else:
                objects_matching_custom_field = [obj_id for obj_id in objects_matching_custom_field \
                                                if obj_id in filtered_object_ids]

        first_object2return = params["start"]
        objects2return = params["len"]

        first_from_custom_matchings = min(len(objects_matching_custom_field), first_object2return)
        take_custom_matchings_count = min(objects2return, max(0, len(objects_matching_custom_field) - first_object2return))

        objects2return = max(0, objects2return - take_custom_matchings_count)
        first_object2return = max(0, first_object2return - len(objects_matching_custom_field))

        total_results_count = take_custom_matchings_count
        joined_results = []
        for obj_id in objects_matching_custom_field[first_from_custom_matchings: first_from_custom_matchings + take_custom_matchings_count]:
            search_result = TSearchEngineResult(obj_id, 0, 0)
            joined_results += [search_result]
        pages_results, pages_results_count = self.pages_search_engine.search(query=params["pages_query"],
                                                                      filter_objects=filtered_object_ids,
                                                                      first_object2return=first_object2return,
                                                                      objects2return=objects2return)
        joined_results += pages_results
        total_results_count += pages_results_count
        return joined_results, total_results_count