Ejemplo n.º 1
0
 def test_get_dmi_documento_militar_de_indentidad(self):
     given = 'DMI : 803802300'
     expected = 'DMI', '803802300'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 2
0
 def test_get_dni_from_dni_and_number(self):
     given = 'DNI 08675405'
     expected = 'DNI', '08675405'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 3
0
 def test_get_dni_from_cedula_and_number(self):
     given = 'CEDULA DIPLOMATICA DE IDENTIDAD CS469187'
     expected = 'CEDULA DIPLOMATICA DE IDENTIDAD', 'CS469187'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 4
0
    def parse(self, response):
        with open("page_" + response.meta['date'].strftime("%Y-%m-%d") + "_.html", "w") as handle:
            handle.write(response.body)
        this_date = response.meta['date']
        for sel in response.xpath('//tr'):
            record = sel.xpath('td/text()').extract()
            if len(record) > 6:
                if this_date < datetime.date(2008, 5, 29):
                    item = ManoloItem()
                    try:
                        item['full_name'] = sel.xpath('td')[2].xpath('text()').extract()[0]
                    except IndexError:
                        item['full_name'] = ''

                    try:
                        item['id_document'] = sel.xpath('td')[3].xpath('text()').extract()[0]
                    except IndexError:
                        item['id_document'] = ''

                    try:
                        item['id_number'] = sel.xpath('td')[4].xpath('text()').extract()[0]
                    except IndexError:
                        item['id_number'] = ''

                    try:
                        item['reason'] = sel.xpath('td')[5].xpath('text()').extract()[0]
                    except IndexError:
                        item['reason'] = ''

                    try:
                        item['host_name'] = sel.xpath('td')[6].xpath('text()').extract()[0]
                    except IndexError:
                        item['host_name'] = ''

                    try:
                        item['time_start'] = sel.xpath('td')[1].xpath('text()').extract()[0]
                    except IndexError:
                        item['time_start'] = ''

                    try:
                        item['time_end'] = sel.xpath('td')[8].xpath('text()').extract()[0]
                    except IndexError:
                        item['time_end'] = ''

                    item['institution'] = 'Trib.Const.'
                    item['date'] = response.meta['date']

                    item = utils.make_hash(item)
                    yield item
                elif datetime.date(2008, 5, 29) <= this_date < datetime.date(2014, 8, 1):
                    item = ManoloItem()
                    try:
                        item['full_name'] = sel.xpath('td')[2].xpath('text()').extract()[0]
                    except IndexError:
                        item['full_name'] = ''

                    try:
                        item['id_document'] = sel.xpath('td')[3].xpath('text()').extract()[0]
                    except IndexError:
                        item['id_document'] = ''

                    try:
                        item['id_number'] = sel.xpath('td')[4].xpath('text()').extract()[0]
                    except IndexError:
                        item['id_number'] = ''

                    try:
                        item['reason'] = sel.xpath('td')[5].xpath('text()').extract()[0]
                    except IndexError:
                        item['reason'] = ''

                    try:
                        item['host_name'] = sel.xpath('td')[6].xpath('text()').extract()[0]
                    except IndexError:
                        item['host_name'] = ''

                    try:
                        item['time_start'] = sel.xpath('td')[1].xpath('text()').extract()[0]
                    except IndexError:
                        item['time_start'] = ''

                    try:
                        item['time_end'] = sel.xpath('td')[7].xpath('text()').extract()[0]
                    except IndexError:
                        item['time_end'] = ''

                    item['institution'] = 'Trib.Const.'
                    item['date'] = response.meta['date']

                    item = utils.make_hash(item)
                    yield item
                else:
                    item = ManoloItem()
                    try:
                        item['full_name'] = sel.xpath('td')[1].xpath('text()').extract()[0]
                    except IndexError:
                        item['full_name'] = ''

                    try:
                        item['id_document'], item['id_number'] = utils.get_dni(sel.xpath('td')[2].xpath('text()').extract()[0])
                    except IndexError:
                        item['id_document'] = ''
                        item['id_number'] = ''

                    try:
                        item['entity'] = sel.xpath('td')[3].xpath('text()').extract()[0]
                    except IndexError:
                        item['entity'] = ''

                    try:
                        item['reason'] = sel.xpath('td')[4].xpath('text()').extract()[0]
                    except IndexError:
                        item['reason'] = ''

                    try:
                        item['host_name'] = sel.xpath('td')[5].xpath('text()').extract()[0]
                    except IndexError:
                        item['host_name'] = ''

                    try:
                        item['office'] = sel.xpath('td')[6].xpath('text()').extract()[0]
                    except IndexError:
                        item['office'] = ''

                    try:
                        item['time_start'] = sel.xpath('td')[7].xpath('text()').extract()[0]
                    except IndexError:
                        item['time_start'] = ''

                    try:
                        item['time_end'] = sel.xpath('td')[8].xpath('text()').extract()[0]
                    except IndexError:
                        item['time_end'] = ''

                    item['institution'] = 'Trib.Const.'
                    item['date'] = response.meta['date']

                    item = utils.make_hash(item)
                    yield item
Ejemplo n.º 5
0
 def test_get_dni_from_numbers(self):
     given = '47174595'
     expected = 'DNI', '47174595'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 6
0
 def test_get_cex(self):
     given = 'CEX : 1478-2011'
     expected = 'CEX', '1478-2011'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 7
0
 def test_get_passport(self):
     given = 'PSP : AI0386093'
     expected = 'PSP', 'AI0386093'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 8
0
 def test_get_otr(self):
     given = 'OTR : 0196307704'
     expected = 'OTR', '0196307704'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 9
0
 def test_get_cex(self):
     given = 'CEX : 1478-2011'
     expected = 'CEX', '1478-2011'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 10
0
 def test_get_dmi_documento_militar_de_indentidad(self):
     given = 'DMI : 803802300'
     expected = 'DMI', '803802300'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 11
0
 def test_get_dmi_with_dash(self):
     given = 'DMI : O-95505'
     expected = 'DMI', 'O-95505'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 12
0
 def test_get_dni_from_cedula_and_number(self):
     given = 'CEDULA DIPLOMATICA DE IDENTIDAD CS469187'
     expected = 'CEDULA DIPLOMATICA DE IDENTIDAD', 'CS469187'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 13
0
 def test_get_dni_from_dni_and_number(self):
     given = 'DNI 08675405'
     expected = 'DNI', '08675405'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 14
0
 def test_get_dni_from_numbers(self):
     given = '47174595'
     expected = 'DNI', '47174595'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 15
0
 def test_get_dmi_with_dash(self):
     given = 'DMI : O-95505'
     expected = 'DMI', 'O-95505'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 16
0
 def test_get_cdi(self):
     given = 'CDI : EA814654'
     expected = 'CDI', 'EA814654'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 17
0
 def test_get_otr(self):
     given = 'OTR : 0196307704'
     expected = 'OTR', '0196307704'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 18
0
 def test_get_passport(self):
     given = 'PSP : AI0386093'
     expected = 'PSP', 'AI0386093'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 19
0
 def test_get_cdi(self):
     given = 'CDI : EA814654'
     expected = 'CDI', 'EA814654'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 20
0
 def test_get_brevete(self):
     given = 'ATG : BREVETE'
     expected = 'ATG', 'BREVETE'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 21
0
 def test_get_brevete(self):
     given = 'ATG : BREVETE'
     expected = 'ATG', 'BREVETE'
     result = get_dni(given)
     self.assertEqual(expected, result)
Ejemplo n.º 22
0
    def parse(self, response):
        with open(
                "page_" + response.meta['date'].strftime("%Y-%m-%d") +
                "_.html", "w") as handle:
            handle.write(response.body)
        this_date = response.meta['date']
        for sel in response.xpath('//tr'):
            record = sel.xpath('td/text()').extract()
            if len(record) > 6:
                if this_date < datetime.date(2008, 5, 29):
                    item = ManoloItem()
                    try:
                        item['full_name'] = sel.xpath('td')[2].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['full_name'] = ''

                    try:
                        item['id_document'] = sel.xpath('td')[3].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['id_document'] = ''

                    try:
                        item['id_number'] = sel.xpath('td')[4].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['id_number'] = ''

                    try:
                        item['reason'] = sel.xpath('td')[5].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['reason'] = ''

                    try:
                        item['host_name'] = sel.xpath('td')[6].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['host_name'] = ''

                    try:
                        item['time_start'] = sel.xpath('td')[1].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['time_start'] = ''

                    try:
                        item['time_end'] = sel.xpath('td')[8].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['time_end'] = ''

                    item['institution'] = 'Trib.Const.'
                    item['date'] = response.meta['date']

                    item = utils.make_hash(item)
                    yield item
                elif datetime.date(2008, 5, 29) <= this_date < datetime.date(
                        2014, 8, 1):
                    item = ManoloItem()
                    try:
                        item['full_name'] = sel.xpath('td')[2].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['full_name'] = ''

                    try:
                        item['id_document'] = sel.xpath('td')[3].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['id_document'] = ''

                    try:
                        item['id_number'] = sel.xpath('td')[4].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['id_number'] = ''

                    try:
                        item['reason'] = sel.xpath('td')[5].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['reason'] = ''

                    try:
                        item['host_name'] = sel.xpath('td')[6].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['host_name'] = ''

                    try:
                        item['time_start'] = sel.xpath('td')[1].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['time_start'] = ''

                    try:
                        item['time_end'] = sel.xpath('td')[7].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['time_end'] = ''

                    item['institution'] = 'Trib.Const.'
                    item['date'] = response.meta['date']

                    item = utils.make_hash(item)
                    yield item
                else:
                    item = ManoloItem()
                    try:
                        item['full_name'] = sel.xpath('td')[1].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['full_name'] = ''

                    try:
                        item['id_document'], item['id_number'] = utils.get_dni(
                            sel.xpath('td')[2].xpath('text()').extract()[0])
                    except IndexError:
                        item['id_document'] = ''
                        item['id_number'] = ''

                    try:
                        item['entity'] = sel.xpath('td')[3].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['entity'] = ''

                    try:
                        item['reason'] = sel.xpath('td')[4].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['reason'] = ''

                    try:
                        item['host_name'] = sel.xpath('td')[5].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['host_name'] = ''

                    try:
                        item['office'] = sel.xpath('td')[6].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['office'] = ''

                    try:
                        item['time_start'] = sel.xpath('td')[7].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['time_start'] = ''

                    try:
                        item['time_end'] = sel.xpath('td')[8].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['time_end'] = ''

                    item['institution'] = 'Trib.Const.'
                    item['date'] = response.meta['date']

                    item = utils.make_hash(item)
                    yield item