コード例 #1
0
 def run(self):
     """Infinetly process protocols which have not been processed yet."""
     while True:
         logging.info("Updater requests semaphore.")
         self.sem.acquire()
         logging.info("Updater obtained semaphore.")
         protocol = self.db_client.protocol_get_next()
         if protocol is None:
             continue
         try:
             fpath = os.path.join(self.protocols_directory, protocol.fname)
             wget.download(protocol.url, fpath)
             speeches = parsing.get_speeches(fpath, self.dtd_file)
             processing.analyze_speeches(speeches)
             self.db_client.speech_insert_collection(speeches)
         except myexceptions.SpeechParsingException as parse_exception:
             logging.error("Failed parsing speeches in protocol %s",
                           protocol.url)
             print(str(parse_exception))
         except myexceptions.SpeechAnalysisException:
             logging.error("Failed analyzing speech in protocol %s",
                           protocol.url)
         finally:
             # Execute this even if a failure occured to pretend that the
             # broken protocol is updated multiple times
             self.db_client.protocol_is_done(protocol)
コード例 #2
0
def run(
        protocol_file: str, dtd_file: str, output_file: str,
        parse_only: bool
) -> None:
    """Parse and process speeches of test protocol.

    Tries to parse and process the given protocol. The result is written to
    the given output file (JSON format).

    Args:
        protocol_file (str): protocol to parse
        dtd_file (str): document type definition
        output_file (str): output file
        parse_only (bool): skip processing

    """
    ts_start = time.time()
    speeches = parsing.get_speeches(protocol_file, dtd_file)
    print("Parsing speeches took {:3.2f} seconds".format(time.time()-ts_start))
    if parse_only:
        return
    ts_start = time.time()
    processing.analyze_speeches(speeches)
    print("Processing speeches took {:3.2f} seconds".format(
        time.time()-ts_start
    ))
    speeches_json = [speech.to_json() for speech in speeches]
    with open(output_file, "a") as out:
        json.dump(speeches_json, out, indent=4)
    print("Check result in {}".format(output_file))
コード例 #3
0
ファイル: parsing.py プロジェクト: Kexplx/meinbundestag
    def test_get_speeches_filepath_no_xml(self):
        """Test function with python file as protocol file.

        Additionaly, a valid dtd file is provided.
        """
        with self.assertRaises(FileNotFoundError):
            list(parsing.get_speeches("./parsing.py", TestClass.DTD_FILE))
コード例 #4
0
ファイル: parsing.py プロジェクト: Kexplx/meinbundestag
    def test_get_speeches_filepath_not_exists(self):
        """Test function with not existing path as protocol file.

        Additionaly, a valid dtd file is provided.
        """
        with self.assertRaises(FileNotFoundError):
            list(parsing.get_speeches("XYZ", TestClass.DTD_FILE))
コード例 #5
0
ファイル: parsing.py プロジェクト: Kexplx/meinbundestag
    def test_get_speeches_filepath_empty(self):
        """Test function with empty string as protocol file.

        Additionaly, a valid dtd file is provided.
        """
        with self.assertRaises(FileNotFoundError):
            list(parsing.get_speeches("", TestClass.DTD_FILE))
コード例 #6
0
ファイル: parsing.py プロジェクト: Kexplx/meinbundestag
    def test_get_speeches_filepath_valid(self):
        """Test function with example protocol file as protocol file.

        Additionaly, a valid dtd file is provided. Assert that the number
        of parsed speaches is equal to the number of speeches in the protocol.
        """
        speeches = list(
            parsing.get_speeches(TestClass.PROTOCOL, TestClass.DTD_FILE)
        )
        self.assertEqual(len(speeches), TestClass.PROTOCOL_NUMBER_OF_SPEECHES)