def test_from_data(self): data = myopen( os.path.join(os.path.dirname(__file__), '20_ptm_381742.doc'), 'rb').read() with PlenumProtocolFile.get_from_data(data) as protocol: if six.PY2: expected_exception = "'NoneType' object has no attribute 'decode'" if six.PY3: expected_exception = "'NoneType' object is not iterable" expected_data = { 'knesset_num_heb': 'עשרים', 'meeting_num_heb': 'מאתיים-ותשע-עשרה', "booklet_num_heb": None, 'booklet_meeting_num_heb': 'רי"ט', 'date_string_heb': ('21', 'מרס', '2017'), 'time_string': ('16', '00'), 'datetime': datetime(2017, 3, 21, 16, 0), "knesset_num": 20, 'booklet_num': expected_exception, "booklet_meeting_num": 219 } actual_data = self._get_protocol_data(protocol, expected_data) self.assertEqual(actual_data, expected_data)
def __init__(self, name, parent_datapackage_path): self._meeting_schema = PlenumMeetings.get_json_table_schema() self._protocol_schema = PlenumProtocolFile.get_json_table_schema() schema = merge_table_schemas(self._meeting_schema, self._protocol_schema, {"fields": [{"name": "protocol_original", "type": "string", "description": "original file (without processing), in case of error will be empty"}, {"name": "protocol_antiword_text", "type": "string", "description": "text after antiword processing, in case of error will be empty"}, {"name": "scraper_errors", "type": "string", "description": "comma separated list of errors encountered"}]}) super(PlenumMeetingsResource, self).__init__(name, parent_datapackage_path, schema, file_fields=["protocol_original", "protocol_antiword_text"]) self.descriptor["plenum_errors"] = []
def test_from_file(self): with open(os.path.join(os.path.dirname(__file__), '20_ptm_318579.doc')) as f: with PlenumProtocolFile.get_from_file(f) as protocol: plenum_protocol_assertions(self, protocol)
def test_from_url(self): with PlenumProtocolFile.get_from_url('http://fs.knesset.gov.il/20/Plenum/20_ptm_318579.doc') as protocol: plenum_protocol_assertions(self, protocol)
def test_from_file(self): with PlenumProtocolFile.get_from_filename( os.path.join(os.path.dirname(__file__), '20_ptm_318579.doc')) as protocol: plenum_protocol_assertions(self, protocol)
def test_from_content(self): with open(os.path.join(os.path.dirname(__file__), '20_ptm_318579.doc')) as f: with PlenumProtocolFile.get_from_data(f.read()) as protocol: plenum_protocol_assertions(self, protocol)
def test_from_url(self): with PlenumProtocolFile.get_from_url( 'http://fs.knesset.gov.il/20/Plenum/20_ptm_318579.doc' ) as protocol: plenum_protocol_assertions(self, protocol)
def __init__(self, url, protocol, date): self.url = url self.protocol = PlenumProtocolFile.get_from_data(protocol) self.date = date