def test_from_data(self):
        data = myopen(
            os.path.join(os.path.dirname(__file__), '20_ptm_381742.doc'),
            'rb').read()
        with PlenumProtocolFile.get_from_data(data) as protocol:

            if six.PY2:
                expected_exception = "'NoneType' object has no attribute 'decode'"
            if six.PY3:
                expected_exception = "'NoneType' object is not iterable"

            expected_data = {
                'knesset_num_heb': 'עשרים',
                'meeting_num_heb': 'מאתיים-ותשע-עשרה',
                "booklet_num_heb": None,
                'booklet_meeting_num_heb': 'רי"ט',
                'date_string_heb': ('21', 'מרס', '2017'),
                'time_string': ('16', '00'),
                'datetime': datetime(2017, 3, 21, 16, 0),
                "knesset_num": 20,
                'booklet_num': expected_exception,
                "booklet_meeting_num": 219
            }
            actual_data = self._get_protocol_data(protocol, expected_data)
            self.assertEqual(actual_data, expected_data)
 def __init__(self, name, parent_datapackage_path):
     self._meeting_schema = PlenumMeetings.get_json_table_schema()
     self._protocol_schema = PlenumProtocolFile.get_json_table_schema()
     schema = merge_table_schemas(self._meeting_schema,
                                  self._protocol_schema,
                                  {"fields": [{"name": "protocol_original", "type": "string", "description": "original file (without processing), in case of error will be empty"},
                                              {"name": "protocol_antiword_text", "type": "string", "description": "text after antiword processing, in case of error will be empty"},
                                              {"name": "scraper_errors", "type": "string", "description": "comma separated list of errors encountered"}]})
     super(PlenumMeetingsResource, self).__init__(name, parent_datapackage_path, schema,
                                                  file_fields=["protocol_original", "protocol_antiword_text"])
     self.descriptor["plenum_errors"] = []
Example #3
0
 def test_from_file(self):
     with open(os.path.join(os.path.dirname(__file__), '20_ptm_318579.doc')) as f:
         with PlenumProtocolFile.get_from_file(f) as protocol:
             plenum_protocol_assertions(self, protocol)
Example #4
0
 def test_from_url(self):
     with PlenumProtocolFile.get_from_url('http://fs.knesset.gov.il/20/Plenum/20_ptm_318579.doc') as protocol:
         plenum_protocol_assertions(self, protocol)
 def test_from_file(self):
     with PlenumProtocolFile.get_from_filename(
             os.path.join(os.path.dirname(__file__),
                          '20_ptm_318579.doc')) as protocol:
         plenum_protocol_assertions(self, protocol)
Example #6
0
 def test_from_content(self):
     with open(os.path.join(os.path.dirname(__file__),
                            '20_ptm_318579.doc')) as f:
         with PlenumProtocolFile.get_from_data(f.read()) as protocol:
             plenum_protocol_assertions(self, protocol)
Example #7
0
 def test_from_url(self):
     with PlenumProtocolFile.get_from_url(
             'http://fs.knesset.gov.il/20/Plenum/20_ptm_318579.doc'
     ) as protocol:
         plenum_protocol_assertions(self, protocol)
Example #8
0
 def __init__(self, url, protocol, date):
     self.url = url
     self.protocol = PlenumProtocolFile.get_from_data(protocol)
     self.date = date