def on_receive(self, message): log.log_info("Orchestrator received message: {:}".format(message)) if message['method'] == 'load_file': data = message['data'] if data['file']: response = self.document_parser.ask(message) if response['status'] == 0: return msg.build_response(status=0) else: return msg.build_response( status=-2, error_msg="Orchestrator.load_file failed: {:}".format( response['error_msg'])) else: return msg.build_response( status=-1, error_msg="No file provided to Orchestrator.load_file") elif message['method'] == 'search': data = message['data'] if data['query']: response = self.query_processor.ask( msg.build_request(method='query', data=message['data'])) if response['status'] == 0: return response else: return msg.build_response( status=-1, error_msg="No word provided to Orchestrator.search") return msg.build_response( status=-13, error_msg="No method to process message: {:}".format(message))
def _get_doc_info(self, doc_id): response = self.document_parser.ask( msg.build_request(method='get_doc', data={'doc_id': doc_id})) if response['status'] == 0: data = response['data'] return data['title'], data['text'], data['author'], data['date'] else: log.log_error("Could not get info for doc: {:}".format( response['error_msg']))
def _stem_word(self, word): response = self.document_parser.ask( msg.build_request(method='stem_word', data={'word': word})) if response['status'] == 0: return response['data']['stem'] else: log.log_error("Orchestrator could not stem word: {:}".format( response['error_msg'])) return ""
def main(argv): log.flush_log() log.log_info("Hello") # process arguments document = None if '-d' in argv: document = argv[argv.index('-d') + 1] # launch orchestrator orchestrator = Orchestrator.start() if document: response = orchestrator.ask( msg.build_request(method='load_file', data={'file': document})) if response['status'] != 0: log.log_error(response['error_msg']) else: log.log_info("Loaded file") is_running = True while is_running: query = raw_input("Search: ") log.log_info("Query: {:}".format(query)) if query == "q" or query == "quit": is_running = False else: response = orchestrator.ask( msg.build_request(method='search', data={'query': query})) if response['status'] == 0: # log.log_info("Found word: {:}".format(response['data'])) print response['data'] else: log.log_error(response['error_msg']) orchestrator.stop() log.log_info("Goodbye")
def _load_file(self, _file): try: log.log_info("DocumentParser.load_file parsing xml...") tree = ET.parse(_file) root = tree.getroot() log.log_info("Loading pages...") for page in root.findall('page'): page_id = int(page.find('id').text) log.log_info("Loading page: {:}".format(page_id)) page_data = self._parse_xml_page(page) self.index_handler.ask( msg.build_request(method='store_page', data={'page': page_data})) log.log_info("Done loading pages") self.parsed_docs.append(_file) except: log.log_error("DocumentParser.load_file error parsing xml") log.log_debug(traceback.format_exc())
def _process_query(self, query): # assume single word query query = str(query).lower() stem_word = self._stem_word(query) if stem_word != "": response = self.index_handler.ask( msg.build_request(method='search', data={'word': stem_word})) if response['status'] == 0: # TODO(JC): Add document retrival result = self._beautify_result(response['data'], query) return msg.build_response(status=0, data=result) else: return msg.build_response( status=-3, error_msg="QueryProcessor.process_query failed: {:}". format(response['error_msg'])) else: return msg.build_response(status=-2, error_msg="Stemmed word is empty string")