def __init__(self, books_folder, pages_index_folder, csv_path): self.books_folder = books_folder self.pages_index_folder = pages_index_folder self.csv_path = csv_path self.pages_search_engine = TSearchEngine(index_location=self.pages_index_folder) self.cfields_search_engine = TCustomFieldsSearchEngine(csv_path)
class TSearchServer(): def __init__(self, books_folder, pages_index_folder, csv_path): self.books_folder = books_folder self.pages_index_folder = pages_index_folder self.csv_path = csv_path self.pages_search_engine = TSearchEngine(index_location=self.pages_index_folder) self.cfields_search_engine = TCustomFieldsSearchEngine(csv_path) def get_pages_segment_data(self, segment_id): obj_id, field_id, start, length = self.pages_search_engine.segment_index.get_segment(segment_id) import os location = os.path.join(self.books_folder, obj_id, field_id) f = open(location, "rb") f.seek(start) snippet = f.read(length) return obj_id, field_id, snippet def select_words_in_snippet(self, words2select, snippet): matches = self.pages_search_engine.parsers.parse_buffer(snippet, "windows-1251") to_select = [] for token, position in words2select: to_select += [(matches[position].start, matches[position].start + matches[position].length)] to_select.sort() for sel_index in xrange(len(to_select) - 1, -1, -1): sel_start, sel_end = to_select[sel_index] snippet = snippet[:sel_start] + "<b>" + snippet[sel_start:sel_end] + "</b>" + snippet[sel_end:] snippet = snippet.decode("windows-1251").replace(chr(13), " ").replace(chr(10), " ").replace('"', "'") import re snippet = re.subn("\s+", " ", snippet)[0] return snippet """ returns object of class TBook""" def get_book_data(self, object_id): return self.cfields_search_engine.objects[int(object_id)] def search(self, params): EMPTY_RESPONSE = [[], 0] filtered_object_ids = self.cfields_search_engine.process_query(title=params["title"], author=params["author"], udc=params["udc"], year=params["year"], year_max=params["year_max"], year_min=params["year_min"], pages_count=params["pages_count"] , pages_count_max=params["pages_count_max"], pages_count_min=params["pages_count_min"], lib_section=params["filter_lib_section"]) #no books satisfying filters if filtered_object_ids == -1: return EMPTY_RESPONSE if params["filter_object_id"]: filter_object_id = int(params["filter_object_id"]) if filtered_object_ids == 0: # no restrictions introduced filtered_object_ids = [filter_object_id] elif filter_object_id in filtered_object_ids: filtered_object_ids = [filter_object_id] else: return EMPTY_RESPONSE if filtered_object_ids == 0: #all books are accepted filtered_object_ids = None if not filtered_object_ids and not params["pages_query"]: #all accepted return EMPTY_RESPONSE if not params["pages_query"]:# no query to the pages index return EMPTY_RESPONSE objects_matching_custom_field = self.cfields_search_engine.find_mentions_of_author_and_title(params["pages_query"]) if filtered_object_ids != None: if len(filtered_object_ids) == 1:#searching inside one book: objects_matching_custom_field = [] else: objects_matching_custom_field = [obj_id for obj_id in objects_matching_custom_field \ if obj_id in filtered_object_ids] first_object2return = params["start"] objects2return = params["len"] first_from_custom_matchings = min(len(objects_matching_custom_field), first_object2return) take_custom_matchings_count = min(objects2return, max(0, len(objects_matching_custom_field) - first_object2return)) objects2return = max(0, objects2return - take_custom_matchings_count) first_object2return = max(0, first_object2return - len(objects_matching_custom_field)) total_results_count = take_custom_matchings_count joined_results = [] for obj_id in objects_matching_custom_field[first_from_custom_matchings: first_from_custom_matchings + take_custom_matchings_count]: search_result = TSearchEngineResult(obj_id, 0, 0) joined_results += [search_result] pages_results, pages_results_count = self.pages_search_engine.search(query=params["pages_query"], filter_objects=filtered_object_ids, first_object2return=first_object2return, objects2return=objects2return) joined_results += pages_results total_results_count += pages_results_count return joined_results, total_results_count