def run(self): """Infinetly process protocols which have not been processed yet.""" while True: logging.info("Updater requests semaphore.") self.sem.acquire() logging.info("Updater obtained semaphore.") protocol = self.db_client.protocol_get_next() if protocol is None: continue try: fpath = os.path.join(self.protocols_directory, protocol.fname) wget.download(protocol.url, fpath) speeches = parsing.get_speeches(fpath, self.dtd_file) processing.analyze_speeches(speeches) self.db_client.speech_insert_collection(speeches) except myexceptions.SpeechParsingException as parse_exception: logging.error("Failed parsing speeches in protocol %s", protocol.url) print(str(parse_exception)) except myexceptions.SpeechAnalysisException: logging.error("Failed analyzing speech in protocol %s", protocol.url) finally: # Execute this even if a failure occured to pretend that the # broken protocol is updated multiple times self.db_client.protocol_is_done(protocol)
def run( protocol_file: str, dtd_file: str, output_file: str, parse_only: bool ) -> None: """Parse and process speeches of test protocol. Tries to parse and process the given protocol. The result is written to the given output file (JSON format). Args: protocol_file (str): protocol to parse dtd_file (str): document type definition output_file (str): output file parse_only (bool): skip processing """ ts_start = time.time() speeches = parsing.get_speeches(protocol_file, dtd_file) print("Parsing speeches took {:3.2f} seconds".format(time.time()-ts_start)) if parse_only: return ts_start = time.time() processing.analyze_speeches(speeches) print("Processing speeches took {:3.2f} seconds".format( time.time()-ts_start )) speeches_json = [speech.to_json() for speech in speeches] with open(output_file, "a") as out: json.dump(speeches_json, out, indent=4) print("Check result in {}".format(output_file))
def test_get_speeches_filepath_no_xml(self): """Test function with python file as protocol file. Additionaly, a valid dtd file is provided. """ with self.assertRaises(FileNotFoundError): list(parsing.get_speeches("./parsing.py", TestClass.DTD_FILE))
def test_get_speeches_filepath_not_exists(self): """Test function with not existing path as protocol file. Additionaly, a valid dtd file is provided. """ with self.assertRaises(FileNotFoundError): list(parsing.get_speeches("XYZ", TestClass.DTD_FILE))
def test_get_speeches_filepath_empty(self): """Test function with empty string as protocol file. Additionaly, a valid dtd file is provided. """ with self.assertRaises(FileNotFoundError): list(parsing.get_speeches("", TestClass.DTD_FILE))
def test_get_speeches_filepath_valid(self): """Test function with example protocol file as protocol file. Additionaly, a valid dtd file is provided. Assert that the number of parsed speaches is equal to the number of speeches in the protocol. """ speeches = list( parsing.get_speeches(TestClass.PROTOCOL, TestClass.DTD_FILE) ) self.assertEqual(len(speeches), TestClass.PROTOCOL_NUMBER_OF_SPEECHES)