def test_get_dmi_documento_militar_de_indentidad(self): given = 'DMI : 803802300' expected = 'DMI', '803802300' result = get_dni(given) self.assertEqual(expected, result)
def test_get_dni_from_dni_and_number(self): given = 'DNI 08675405' expected = 'DNI', '08675405' result = get_dni(given) self.assertEqual(expected, result)
def test_get_dni_from_cedula_and_number(self): given = 'CEDULA DIPLOMATICA DE IDENTIDAD CS469187' expected = 'CEDULA DIPLOMATICA DE IDENTIDAD', 'CS469187' result = get_dni(given) self.assertEqual(expected, result)
def parse(self, response): with open("page_" + response.meta['date'].strftime("%Y-%m-%d") + "_.html", "w") as handle: handle.write(response.body) this_date = response.meta['date'] for sel in response.xpath('//tr'): record = sel.xpath('td/text()').extract() if len(record) > 6: if this_date < datetime.date(2008, 5, 29): item = ManoloItem() try: item['full_name'] = sel.xpath('td')[2].xpath('text()').extract()[0] except IndexError: item['full_name'] = '' try: item['id_document'] = sel.xpath('td')[3].xpath('text()').extract()[0] except IndexError: item['id_document'] = '' try: item['id_number'] = sel.xpath('td')[4].xpath('text()').extract()[0] except IndexError: item['id_number'] = '' try: item['reason'] = sel.xpath('td')[5].xpath('text()').extract()[0] except IndexError: item['reason'] = '' try: item['host_name'] = sel.xpath('td')[6].xpath('text()').extract()[0] except IndexError: item['host_name'] = '' try: item['time_start'] = sel.xpath('td')[1].xpath('text()').extract()[0] except IndexError: item['time_start'] = '' try: item['time_end'] = sel.xpath('td')[8].xpath('text()').extract()[0] except IndexError: item['time_end'] = '' item['institution'] = 'Trib.Const.' item['date'] = response.meta['date'] item = utils.make_hash(item) yield item elif datetime.date(2008, 5, 29) <= this_date < datetime.date(2014, 8, 1): item = ManoloItem() try: item['full_name'] = sel.xpath('td')[2].xpath('text()').extract()[0] except IndexError: item['full_name'] = '' try: item['id_document'] = sel.xpath('td')[3].xpath('text()').extract()[0] except IndexError: item['id_document'] = '' try: item['id_number'] = sel.xpath('td')[4].xpath('text()').extract()[0] except IndexError: item['id_number'] = '' try: item['reason'] = sel.xpath('td')[5].xpath('text()').extract()[0] except IndexError: item['reason'] = '' try: item['host_name'] = sel.xpath('td')[6].xpath('text()').extract()[0] except IndexError: item['host_name'] = '' try: item['time_start'] = sel.xpath('td')[1].xpath('text()').extract()[0] except IndexError: item['time_start'] = '' try: item['time_end'] = sel.xpath('td')[7].xpath('text()').extract()[0] except IndexError: item['time_end'] = '' item['institution'] = 'Trib.Const.' item['date'] = response.meta['date'] item = utils.make_hash(item) yield item else: item = ManoloItem() try: item['full_name'] = sel.xpath('td')[1].xpath('text()').extract()[0] except IndexError: item['full_name'] = '' try: item['id_document'], item['id_number'] = utils.get_dni(sel.xpath('td')[2].xpath('text()').extract()[0]) except IndexError: item['id_document'] = '' item['id_number'] = '' try: item['entity'] = sel.xpath('td')[3].xpath('text()').extract()[0] except IndexError: item['entity'] = '' try: item['reason'] = sel.xpath('td')[4].xpath('text()').extract()[0] except IndexError: item['reason'] = '' try: item['host_name'] = sel.xpath('td')[5].xpath('text()').extract()[0] except IndexError: item['host_name'] = '' try: item['office'] = sel.xpath('td')[6].xpath('text()').extract()[0] except IndexError: item['office'] = '' try: item['time_start'] = sel.xpath('td')[7].xpath('text()').extract()[0] except IndexError: item['time_start'] = '' try: item['time_end'] = sel.xpath('td')[8].xpath('text()').extract()[0] except IndexError: item['time_end'] = '' item['institution'] = 'Trib.Const.' item['date'] = response.meta['date'] item = utils.make_hash(item) yield item
def test_get_dni_from_numbers(self): given = '47174595' expected = 'DNI', '47174595' result = get_dni(given) self.assertEqual(expected, result)
def test_get_cex(self): given = 'CEX : 1478-2011' expected = 'CEX', '1478-2011' result = get_dni(given) self.assertEqual(expected, result)
def test_get_passport(self): given = 'PSP : AI0386093' expected = 'PSP', 'AI0386093' result = get_dni(given) self.assertEqual(expected, result)
def test_get_otr(self): given = 'OTR : 0196307704' expected = 'OTR', '0196307704' result = get_dni(given) self.assertEqual(expected, result)
def test_get_dmi_with_dash(self): given = 'DMI : O-95505' expected = 'DMI', 'O-95505' result = get_dni(given) self.assertEqual(expected, result)
def test_get_cdi(self): given = 'CDI : EA814654' expected = 'CDI', 'EA814654' result = get_dni(given) self.assertEqual(expected, result)
def test_get_brevete(self): given = 'ATG : BREVETE' expected = 'ATG', 'BREVETE' result = get_dni(given) self.assertEqual(expected, result)
def parse(self, response): with open( "page_" + response.meta['date'].strftime("%Y-%m-%d") + "_.html", "w") as handle: handle.write(response.body) this_date = response.meta['date'] for sel in response.xpath('//tr'): record = sel.xpath('td/text()').extract() if len(record) > 6: if this_date < datetime.date(2008, 5, 29): item = ManoloItem() try: item['full_name'] = sel.xpath('td')[2].xpath( 'text()').extract()[0] except IndexError: item['full_name'] = '' try: item['id_document'] = sel.xpath('td')[3].xpath( 'text()').extract()[0] except IndexError: item['id_document'] = '' try: item['id_number'] = sel.xpath('td')[4].xpath( 'text()').extract()[0] except IndexError: item['id_number'] = '' try: item['reason'] = sel.xpath('td')[5].xpath( 'text()').extract()[0] except IndexError: item['reason'] = '' try: item['host_name'] = sel.xpath('td')[6].xpath( 'text()').extract()[0] except IndexError: item['host_name'] = '' try: item['time_start'] = sel.xpath('td')[1].xpath( 'text()').extract()[0] except IndexError: item['time_start'] = '' try: item['time_end'] = sel.xpath('td')[8].xpath( 'text()').extract()[0] except IndexError: item['time_end'] = '' item['institution'] = 'Trib.Const.' item['date'] = response.meta['date'] item = utils.make_hash(item) yield item elif datetime.date(2008, 5, 29) <= this_date < datetime.date( 2014, 8, 1): item = ManoloItem() try: item['full_name'] = sel.xpath('td')[2].xpath( 'text()').extract()[0] except IndexError: item['full_name'] = '' try: item['id_document'] = sel.xpath('td')[3].xpath( 'text()').extract()[0] except IndexError: item['id_document'] = '' try: item['id_number'] = sel.xpath('td')[4].xpath( 'text()').extract()[0] except IndexError: item['id_number'] = '' try: item['reason'] = sel.xpath('td')[5].xpath( 'text()').extract()[0] except IndexError: item['reason'] = '' try: item['host_name'] = sel.xpath('td')[6].xpath( 'text()').extract()[0] except IndexError: item['host_name'] = '' try: item['time_start'] = sel.xpath('td')[1].xpath( 'text()').extract()[0] except IndexError: item['time_start'] = '' try: item['time_end'] = sel.xpath('td')[7].xpath( 'text()').extract()[0] except IndexError: item['time_end'] = '' item['institution'] = 'Trib.Const.' item['date'] = response.meta['date'] item = utils.make_hash(item) yield item else: item = ManoloItem() try: item['full_name'] = sel.xpath('td')[1].xpath( 'text()').extract()[0] except IndexError: item['full_name'] = '' try: item['id_document'], item['id_number'] = utils.get_dni( sel.xpath('td')[2].xpath('text()').extract()[0]) except IndexError: item['id_document'] = '' item['id_number'] = '' try: item['entity'] = sel.xpath('td')[3].xpath( 'text()').extract()[0] except IndexError: item['entity'] = '' try: item['reason'] = sel.xpath('td')[4].xpath( 'text()').extract()[0] except IndexError: item['reason'] = '' try: item['host_name'] = sel.xpath('td')[5].xpath( 'text()').extract()[0] except IndexError: item['host_name'] = '' try: item['office'] = sel.xpath('td')[6].xpath( 'text()').extract()[0] except IndexError: item['office'] = '' try: item['time_start'] = sel.xpath('td')[7].xpath( 'text()').extract()[0] except IndexError: item['time_start'] = '' try: item['time_end'] = sel.xpath('td')[8].xpath( 'text()').extract()[0] except IndexError: item['time_end'] = '' item['institution'] = 'Trib.Const.' item['date'] = response.meta['date'] item = utils.make_hash(item) yield item