def run_search_dynamic(self, method): if method == "solr": self.docs = [doc.key for doc in self.controller.docStore.docsList[0:5]] self.submitted_docs.update(self.docs[0:5]) self.controller.docsSubmittedID = list(self.submitted_docs) return self.doc_ids_to_results(self.docs) if method == "lda": controller_lda = Controller(self.new_query, self.domain_name) controller_lda.docsSubmittedID = list(self.submitted_docs) controller_lda.query = self.query self.docs_lda = controller_lda.run_lda() self.submitted_docs.update(self.docs_lda) self.docs = self.docs_lda return self.doc_ids_to_results(self.docs_lda) if method == "kmeans": self.docs_kmean = self.controller.run_kmeans() logging.info("========DOCS KMEAN==========") logging.info(str(self.docs_kmean)) self.docs = self.docs_kmean self.submitted_docs.update(self.docs[0:5]) self.controller.docsSubmittedID = list(self.submitted_docs) return self.doc_ids_to_results(self.docs) if method == "combined": new_query = self.new_query self.docs_solr = [doc.key for doc in self.controller.docStore.docsList[0:5]] controller_lda = Controller(new_query, self.domain_name) controller_lda.docsSubmittedID = list(self.submitted_docs) controller_lda.query = self.query self.docs_lda = controller_lda.run_lda() self.docs_kmean = self.controller.run_kmeans() self.docs = self.controller.run_combine(self.docs_lda, self.docs_kmean, self.docs_solr, self.poids) self.submitted_docs.update(self.docs) self.controller.docsSubmittedID = list(self.submitted_docs) return self.doc_ids_to_results(self.docs)
def search(self, query, page_number): """Select 5 random documents. """ if page_number == 1: # Initialize variables for all pages for this topic self.new_query = None self.words_to_add = [] self.id_topic += 1 if self.id_topic != 3: return [] # elif(self.id_topic in self.feedbacks): # self.query_object.update_processed_query(self.new_query) # self.query = self.query_object.query self.query_object.process_query(query) self.domain_name = self.query_object.domain_name self.query = self.query_object.query method = self.method logging.info("QUERY informations : " + " ".join([self.query, self.domain_name, str(page_number), method])) if self.current_topic == 0 or self.current_topic != self.id_topic: self.query = self.query.lower() self.controller = Controller(self.query, self.domain_name) self.current_topic = self.id_topic if page_number == 1: if self.domain_name: docs_static = self.run_search_static(method) print docs_static return docs_static else: return [] if page_number > 2: if self.id_topic not in self.feedbacks.iterkeys() and page_number < 1: self.docs = self.controller.new_req_random_doc() self.submitted_docs.update(self.docs) self.controller.docsSubmittedID = list(self.submitted_docs) return self.doc_ids_to_results(self.docs) else: return [] if page_number >= 2: if self.id_topic in self.feedbacks.iterkeys() and page_number > 3: return [] self.data = self.controller.data self.docs_id = self.controller.docs_id try: self.controller.new_turn(self.new_query, page_number) except ValueError: # self.docs = self.controller.docStore.docsList[0:5] self.docs = [doc.key for doc in self.controller.docStore.docsList[0:5]] self.submitted_docs.update(self.docs) self.controller.docsSubmittedID = list(self.submitted_docs) return self.doc_ids_to_results(self.docs) self.data = self.controller.data self.docs_id = self.controller.docs_id return self.run_search_dynamic(method)
class SearchSystem(object): """A ranking system that returns a random document id. """ def __init__(self, doc_store, method, options, poids): print "===========INIT================" self.doc_store = doc_store self.submitted_docs = set() self.feedbacks = {} self.current_topic = 0 self.rel_docs = [] self.non_rel_docs = [] self.id_topic = 0 # self.dict_query_domain = pxml.create_dict_query_domain() self.words_to_add = [] self.feedback_options = options self.method = method self.poids = poids self.query = None self.domain_name = None self.new_query = None self.query_object = Query() self.allreadyHere = 0 def search(self, query, page_number): """Select 5 random documents. """ if page_number == 1: # Initialize variables for all pages for this topic self.new_query = None self.words_to_add = [] self.id_topic += 1 if self.id_topic != 3: return [] # elif(self.id_topic in self.feedbacks): # self.query_object.update_processed_query(self.new_query) # self.query = self.query_object.query self.query_object.process_query(query) self.domain_name = self.query_object.domain_name self.query = self.query_object.query method = self.method logging.info("QUERY informations : " + " ".join([self.query, self.domain_name, str(page_number), method])) if self.current_topic == 0 or self.current_topic != self.id_topic: self.query = self.query.lower() self.controller = Controller(self.query, self.domain_name) self.current_topic = self.id_topic if page_number == 1: if self.domain_name: docs_static = self.run_search_static(method) print docs_static return docs_static else: return [] if page_number > 2: if self.id_topic not in self.feedbacks.iterkeys() and page_number < 1: self.docs = self.controller.new_req_random_doc() self.submitted_docs.update(self.docs) self.controller.docsSubmittedID = list(self.submitted_docs) return self.doc_ids_to_results(self.docs) else: return [] if page_number >= 2: if self.id_topic in self.feedbacks.iterkeys() and page_number > 3: return [] self.data = self.controller.data self.docs_id = self.controller.docs_id try: self.controller.new_turn(self.new_query, page_number) except ValueError: # self.docs = self.controller.docStore.docsList[0:5] self.docs = [doc.key for doc in self.controller.docStore.docsList[0:5]] self.submitted_docs.update(self.docs) self.controller.docsSubmittedID = list(self.submitted_docs) return self.doc_ids_to_results(self.docs) self.data = self.controller.data self.docs_id = self.controller.docs_id return self.run_search_dynamic(method) def run_search_static(self, method): if method == "solr": self.docs_solr = [doc.key for doc in self.controller.docStore.docsList[0:5]] self.docs = self.docs_solr[0:5] elif method == "kmeans": self.docs_kmean = self.controller.run_kmeans() logging.info("========DOCS KMEAN==========") logging.info(str(self.docs_kmean)) self.docs = self.docs_kmean elif method == "lda": self.docs_lda = self.controller.run_lda() logging.info("========DOCS LDA==========") logging.info(str(self.docs_lda)) self.docs = self.docs_lda elif method == "combined": self.docs_solr = [doc.key for doc in self.controller.docStore.docsList[0:5]] self.docs_kmean = self.controller.run_kmeans() self.docs_lda = self.controller.run_lda() self.docs = self.controller.run_combine(self.docs_lda, self.docs_kmean, self.docs_solr, self.poids) self.submitted_docs.update(self.docs[0:5]) self.controller.docsSubmittedID = list(self.submitted_docs) return self.doc_ids_to_results(self.docs) def run_search_dynamic(self, method): if method == "solr": self.docs = [doc.key for doc in self.controller.docStore.docsList[0:5]] self.submitted_docs.update(self.docs[0:5]) self.controller.docsSubmittedID = list(self.submitted_docs) return self.doc_ids_to_results(self.docs) if method == "lda": controller_lda = Controller(self.new_query, self.domain_name) controller_lda.docsSubmittedID = list(self.submitted_docs) controller_lda.query = self.query self.docs_lda = controller_lda.run_lda() self.submitted_docs.update(self.docs_lda) self.docs = self.docs_lda return self.doc_ids_to_results(self.docs_lda) if method == "kmeans": self.docs_kmean = self.controller.run_kmeans() logging.info("========DOCS KMEAN==========") logging.info(str(self.docs_kmean)) self.docs = self.docs_kmean self.submitted_docs.update(self.docs[0:5]) self.controller.docsSubmittedID = list(self.submitted_docs) return self.doc_ids_to_results(self.docs) if method == "combined": new_query = self.new_query self.docs_solr = [doc.key for doc in self.controller.docStore.docsList[0:5]] controller_lda = Controller(new_query, self.domain_name) controller_lda.docsSubmittedID = list(self.submitted_docs) controller_lda.query = self.query self.docs_lda = controller_lda.run_lda() self.docs_kmean = self.controller.run_kmeans() self.docs = self.controller.run_combine(self.docs_lda, self.docs_kmean, self.docs_solr, self.poids) self.submitted_docs.update(self.docs) self.controller.docsSubmittedID = list(self.submitted_docs) return self.doc_ids_to_results(self.docs) def doc_ids_to_results(self, doc_ids): num_docs = len(doc_ids) confidences = [str(int(1000 * random.random())) for _ in xrange(num_docs)] results = list(chain(*zip(doc_ids, confidences))) logging.info("results returned" + str(results)) return results def process_feedback(self, feedbacks): # data = json.load(feedbacks) file_log = open("example.log", "a") pprint(feedbacks, file_log) file_log.close() self.rel_docs = [] self.non_rel_docs = [] # time.sleep(10) for feedback in feedbacks: for subtopic in feedback["subtopics"]: if self.current_topic in self.feedbacks: self.feedbacks[self.current_topic].append( ( subtopic["subtopic_id"], feedback["stream_id"], subtopic["subtopic_name"], subtopic["passage_text"], ) ) self.rel_docs.append(feedback["stream_id"]) else: self.feedbacks[self.current_topic] = [] self.feedbacks[self.current_topic].append( ( subtopic["subtopic_id"], feedback["stream_id"], subtopic["subtopic_name"], subtopic["passage_text"], ) ) self.rel_docs.append(feedback["stream_id"]) logging.info("subtopic_name" + subtopic["subtopic_name"]) logging.info("subtopic_id" + str(subtopic["subtopic_id"])) for doc in self.docs[0:5]: if doc not in self.rel_docs: self.non_rel_docs.append(doc) feedback_processor = FeedbackPocessing( self.feedbacks, self.query_object.raw_query, self.controller, self.id_topic, self.domain_name, self.rel_docs, self.non_rel_docs, ) self.new_query = feedback_processor.use_feedback( self.feedback_options.pos, self.feedback_options.nb_words_add, self.feedback_options.Ne_list_pos, self.feedback_options.rocchio, ) self.query = self.new_query