Beispiel #1
0
 def test_hash_missing_id_number(self):
     item = {
         'date': '2015-08-11',
         'entity': u'SCOTIANK BANCK',
         'full_name': u'DOMINGUEZ OCAÑA, SANDRA BEATRIZ',
         'host_name': u'AGUADO ALFARO, JOSE ALBERTO',
         'id_document': u'DNI/LE',
         'institution': u'congreso',
         'location': '',
         'meeting_place': '',
         'office': u'ADMINISTRACION LUIS ALBERTO SANCHEZ - FERNANDO BELAUNDE TERRY',
         'reason': u'MANTENIMIENTO PROGRAMADO',
         'time_end': u'15:06',
         'time_start': u'09:04',
         'title': u'ADMINISTRADOR DE EDIFICIOS',
     }
     result = make_hash(item)
     expected = 'daf54933e2164e0c2da44ea0fc2b66dce011ecc1'
     self.assertEqual(expected, result['sha1'])
 def test_correct_hash_sha1_for_legacy_data(self):
     item = {
         'date': '2012-01-04',
         'entity': u'PARTICULAR',
         'full_name': u'BUENO NINAHUANCA, JHON BILL',
         'host_name': u'RIVAS CIFUENTES, BENJAMIN DIONISIO',
         'id_document': u'DNI',
         'id_number': u'40748332',
         'institution': u'minem',
         'location': '',
         'meeting_place': '',
         'office': u'DGER - SALA DEP',
         'reason': u'CONSULTA CIUDADANA',
         'time_start': u'08:39',
         'time_end': u'',
         'title': u'Especialista I',
     }
     result = make_hash(item)
     expected = '09dc4688afd00bb9ba60e69a4d1369b09dc261cf'
     self.assertEqual(expected, result['sha1'])
Beispiel #3
0
 def test_correct_hash_sha1_for_legacy_data(self):
     item = {
         'full_name': u'LAVERIAN HERRERA, EFRAIN',
         'entity': '',
         'meeting_place': '',
         'office': u'OFICINA DE LOGISTICA',
         'host_name': u'URDANEGUI CABREJOS, FABRIZIO MARIO RAUL',
         'reason': u'DEJAR DOCUMENTO',
         'institution': u'produce',
         'location': '',
         'id_number': u'32613418',
         'id_document': u'DNI',
         'date': u'2008-01-02',
         'time_start': u'16:16:51',
         'time_end': u'17:15:31',
         'objective': '',
     }
     result = make_hash(item)
     expected = 'af716f0ed4aa8e3d3f4e1b05908c30f02f3e74fa'
     self.assertEqual(expected, result['sha1'])
Beispiel #4
0
 def test_hash_using_complete_data(self):
     item = {
         'date': '2015-08-11',
         'entity': u'SCOTIANK BANCK',
         'full_name': u'DOMINGUEZ OCAÑA, SANDRA BEATRIZ',
         'host_name': u'AGUADO ALFARO, JOSE ALBERTO',
         'id_document': u'DNI/LE',
         'id_number': u'10153798',
         'institution': u'congreso',
         'location': '',
         'meeting_place': '',
         'office': u'ADMINISTRACION LUIS ALBERTO SANCHEZ - FERNANDO BELAUNDE TERRY',
         'reason': u'MANTENIMIENTO PROGRAMADO',
         'time_end': u'15:06',
         'time_start': u'09:04',
         'title': u'ADMINISTRADOR DE EDIFICIOS',
     }
     result = make_hash(item)
     expected = '4784d22af48c79154d69b4dd4c1562b8f3a7d182'
     self.assertEqual(expected, result['sha1'])
 def test_correct_hash_sha1_for_legacy_data(self):
     item = {
         'date': '2013-10-24',
         'entity': u'',
         'full_name': u'JUAN PONCE VILLARROEL',
         'host_name': u'FERNANDO NOBLECILLA ZUÑIGA',
         'id_document': u'DNI',
         'id_number': u'08882615',
         'institution': u'defensa',
         'location': '',
         'meeting_place': '',
         'office': u'',
         'reason': u'VISITA PERSONAL',
         'time_start': u'17:28',
         'time_end': u'18:00',
         'title': u'',
     }
     result = make_hash(item)
     expected = 'dd3e23e4a1b146e250f759666bd0cfdcf0c3db8d'
     self.assertEqual(expected, result['sha1'])
 def test_correct_hash_sha1_for_legacy_data(self):
     item = {
         'full_name': u'LAVERIAN HERRERA, EFRAIN',
         'entity': '',
         'meeting_place': '',
         'office': u'OFICINA DE LOGISTICA',
         'host_name': u'URDANEGUI CABREJOS, FABRIZIO MARIO RAUL',
         'reason': u'DEJAR DOCUMENTO',
         'institution': u'produce',
         'location': '',
         'id_number': u'32613418',
         'id_document': u'DNI',
         'date': u'2008-01-02',
         'time_start': u'16:16:51',
         'time_end': u'17:15:31',
         'objective': '',
     }
     result = make_hash(item)
     expected = 'af716f0ed4aa8e3d3f4e1b05908c30f02f3e74fa'
     self.assertEqual(expected, result['sha1'])
 def test_correct_hash_sha1_for_legacy_data(self):
     item = {
         'date': '2012-01-04',
         'entity': u'PARTICULAR',
         'full_name': u'BUENO NINAHUANCA, JHON BILL',
         'host_name': u'RIVAS CIFUENTES, BENJAMIN DIONISIO',
         'id_document': u'DNI',
         'id_number': u'40748332',
         'institution': u'minem',
         'location': '',
         'meeting_place': '',
         'office': u'DGER - SALA DEP',
         'reason': u'CONSULTA CIUDADANA',
         'time_start': u'08:39',
         'time_end': u'',
         'title': u'Especialista I',
     }
     result = make_hash(item)
     expected = '09dc4688afd00bb9ba60e69a4d1369b09dc261cf'
     self.assertEqual(expected, result['sha1'])
 def test_correct_hash_sha1_for_legacy_data(self):
     item = {
         'date': '2013-10-24',
         'entity': u'',
         'full_name': u'JUAN PONCE VILLARROEL',
         'host_name': u'FERNANDO NOBLECILLA ZUÑIGA',
         'id_document': u'DNI',
         'id_number': u'08882615',
         'institution': u'defensa',
         'location': '',
         'meeting_place': '',
         'office': u'',
         'reason': u'VISITA PERSONAL',
         'time_start': u'17:28',
         'time_end': u'18:00',
         'title': u'',
     }
     result = make_hash(item)
     expected = 'dd3e23e4a1b146e250f759666bd0cfdcf0c3db8d'
     self.assertEqual(expected, result['sha1'])
 def test_hash_missing_id_number(self):
     item = {
         'date': '2015-08-11',
         'entity': u'SCOTIANK BANCK',
         'full_name': u'DOMINGUEZ OCAÑA, SANDRA BEATRIZ',
         'host_name': u'AGUADO ALFARO, JOSE ALBERTO',
         'id_document': u'DNI/LE',
         'institution': u'congreso',
         'location': '',
         'meeting_place': '',
         'office':
         u'ADMINISTRACION LUIS ALBERTO SANCHEZ - FERNANDO BELAUNDE TERRY',
         'reason': u'MANTENIMIENTO PROGRAMADO',
         'time_end': u'15:06',
         'time_start': u'09:04',
         'title': u'ADMINISTRADOR DE EDIFICIOS',
     }
     result = make_hash(item)
     expected = 'daf54933e2164e0c2da44ea0fc2b66dce011ecc1'
     self.assertEqual(expected, result['sha1'])
Beispiel #10
0
 def test_hash_using_complete_data(self):
     item = {
         'date': '2015-08-11',
         'entity': u'SCOTIANK BANCK',
         'full_name': u'DOMINGUEZ OCAÑA, SANDRA BEATRIZ',
         'host_name': u'AGUADO ALFARO, JOSE ALBERTO',
         'id_document': u'DNI/LE',
         'id_number': u'10153798',
         'institution': u'congreso',
         'location': '',
         'meeting_place': '',
         'office':
         u'ADMINISTRACION LUIS ALBERTO SANCHEZ - FERNANDO BELAUNDE TERRY',
         'reason': u'MANTENIMIENTO PROGRAMADO',
         'time_end': u'15:06',
         'time_start': u'09:04',
         'title': u'ADMINISTRADOR DE EDIFICIOS',
     }
     result = make_hash(item)
     expected = '4784d22af48c79154d69b4dd4c1562b8f3a7d182'
     self.assertEqual(expected, result['sha1'])
 def test_correct_hash_sha1_for_legacy_data(self):
     item = {
         'full_name': u'FIGUEROA BERMUDEZ FRANKLIN',
         'entity': u'SCOTIABANK',
         'meeting_place': '',
         'office': u'DESPACHO VICE - MINISTERIAL DE LA MUJER',
         'host_name': u'CENTRO DOCUMENTARIO',
         'reason': '',
         'institution': u'min. mujer',
         'location': '',
         'id_number': u'42982496',
         'id_document': u'DNI',
         'date': u'2012-02-29',
         'time_start': u'12:33',
         'time_end': u'13:18',
         'objective': '',
         'num_visit': '',
         'title': '',
     }
     result = make_hash(item)
     expected = '50aa11295b04317f97e6e27dcb965c50d3e78a3b'
     self.assertEqual(expected, result['sha1'])
 def test_correct_hash_sha1_for_legacy_data(self):
     item = {
         'full_name': u'FIGUEROA BERMUDEZ FRANKLIN',
         'entity': u'SCOTIABANK',
         'meeting_place': '',
         'office': u'DESPACHO VICE - MINISTERIAL DE LA MUJER',
         'host_name': u'CENTRO DOCUMENTARIO',
         'reason': '',
         'institution': u'min. mujer',
         'location': '',
         'id_number': u'42982496',
         'id_document': u'DNI',
         'date': u'2012-02-29',
         'time_start': u'12:33',
         'time_end': u'13:18',
         'objective': '',
         'num_visit': '',
         'title': '',
     }
     result = make_hash(item)
     expected = '50aa11295b04317f97e6e27dcb965c50d3e78a3b'
     self.assertEqual(expected, result['sha1'])
Beispiel #13
0
 def test_correct_hash_sha1_for_legacy_data(self):
     item = {
         'full_name': u'Victor Alberto Menacho Aguinaga',
         'entity': u'E.P.Ancon 1',
         'meeting_place': '',
         'office': u'Unidad De Recursos Humanos',
         'host_name': u'WENDY DIANA HINOSTROZA HUARANGA',
         'reason': u'DOCUMENTACION',
         'institution': u'inpe',
         'location': '',
         'id_number': u'42000454',
         'id_document': u'DNI/LE',
         'date': u'2011-08-01',
         'time_start': u'11:35',
         'time_end': u'11:49',
         'objective': u'Documentacion',
         'num_visit': '',
         'title': '',
     }
     result = make_hash(item)
     expected = 'cad139c1cc501911d881edc6587f0ce887c2d6ce'
     self.assertEqual(expected, result['sha1'])
Beispiel #14
0
    def parse(self, response):
        with open("page_" + response.meta['date'].strftime("%Y-%m-%d") + "_.html", "w") as handle:
            handle.write(response.body)
        this_date = response.meta['date']
        for sel in response.xpath('//tr'):
            record = sel.xpath('td/text()').extract()
            if len(record) > 6:
                if this_date < datetime.date(2008, 5, 29):
                    item = ManoloItem()
                    try:
                        item['full_name'] = sel.xpath('td')[2].xpath('text()').extract()[0]
                    except IndexError:
                        item['full_name'] = ''

                    try:
                        item['id_document'] = sel.xpath('td')[3].xpath('text()').extract()[0]
                    except IndexError:
                        item['id_document'] = ''

                    try:
                        item['id_number'] = sel.xpath('td')[4].xpath('text()').extract()[0]
                    except IndexError:
                        item['id_number'] = ''

                    try:
                        item['reason'] = sel.xpath('td')[5].xpath('text()').extract()[0]
                    except IndexError:
                        item['reason'] = ''

                    try:
                        item['host_name'] = sel.xpath('td')[6].xpath('text()').extract()[0]
                    except IndexError:
                        item['host_name'] = ''

                    try:
                        item['time_start'] = sel.xpath('td')[1].xpath('text()').extract()[0]
                    except IndexError:
                        item['time_start'] = ''

                    try:
                        item['time_end'] = sel.xpath('td')[8].xpath('text()').extract()[0]
                    except IndexError:
                        item['time_end'] = ''

                    item['institution'] = 'Trib.Const.'
                    item['date'] = response.meta['date']

                    item = utils.make_hash(item)
                    yield item
                elif datetime.date(2008, 5, 29) <= this_date < datetime.date(2014, 8, 1):
                    item = ManoloItem()
                    try:
                        item['full_name'] = sel.xpath('td')[2].xpath('text()').extract()[0]
                    except IndexError:
                        item['full_name'] = ''

                    try:
                        item['id_document'] = sel.xpath('td')[3].xpath('text()').extract()[0]
                    except IndexError:
                        item['id_document'] = ''

                    try:
                        item['id_number'] = sel.xpath('td')[4].xpath('text()').extract()[0]
                    except IndexError:
                        item['id_number'] = ''

                    try:
                        item['reason'] = sel.xpath('td')[5].xpath('text()').extract()[0]
                    except IndexError:
                        item['reason'] = ''

                    try:
                        item['host_name'] = sel.xpath('td')[6].xpath('text()').extract()[0]
                    except IndexError:
                        item['host_name'] = ''

                    try:
                        item['time_start'] = sel.xpath('td')[1].xpath('text()').extract()[0]
                    except IndexError:
                        item['time_start'] = ''

                    try:
                        item['time_end'] = sel.xpath('td')[7].xpath('text()').extract()[0]
                    except IndexError:
                        item['time_end'] = ''

                    item['institution'] = 'Trib.Const.'
                    item['date'] = response.meta['date']

                    item = utils.make_hash(item)
                    yield item
                else:
                    item = ManoloItem()
                    try:
                        item['full_name'] = sel.xpath('td')[1].xpath('text()').extract()[0]
                    except IndexError:
                        item['full_name'] = ''

                    try:
                        item['id_document'], item['id_number'] = utils.get_dni(sel.xpath('td')[2].xpath('text()').extract()[0])
                    except IndexError:
                        item['id_document'] = ''
                        item['id_number'] = ''

                    try:
                        item['entity'] = sel.xpath('td')[3].xpath('text()').extract()[0]
                    except IndexError:
                        item['entity'] = ''

                    try:
                        item['reason'] = sel.xpath('td')[4].xpath('text()').extract()[0]
                    except IndexError:
                        item['reason'] = ''

                    try:
                        item['host_name'] = sel.xpath('td')[5].xpath('text()').extract()[0]
                    except IndexError:
                        item['host_name'] = ''

                    try:
                        item['office'] = sel.xpath('td')[6].xpath('text()').extract()[0]
                    except IndexError:
                        item['office'] = ''

                    try:
                        item['time_start'] = sel.xpath('td')[7].xpath('text()').extract()[0]
                    except IndexError:
                        item['time_start'] = ''

                    try:
                        item['time_end'] = sel.xpath('td')[8].xpath('text()').extract()[0]
                    except IndexError:
                        item['time_end'] = ''

                    item['institution'] = 'Trib.Const.'
                    item['date'] = response.meta['date']

                    item = utils.make_hash(item)
                    yield item
Beispiel #15
0
    def parse(self, response):
        with open(
                "page_" + response.meta['date'].strftime("%Y-%m-%d") +
                "_.html", "w") as handle:
            handle.write(response.body)
        this_date = response.meta['date']
        for sel in response.xpath('//tr'):
            record = sel.xpath('td/text()').extract()
            if len(record) > 6:
                if this_date < datetime.date(2008, 5, 29):
                    item = ManoloItem()
                    try:
                        item['full_name'] = sel.xpath('td')[2].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['full_name'] = ''

                    try:
                        item['id_document'] = sel.xpath('td')[3].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['id_document'] = ''

                    try:
                        item['id_number'] = sel.xpath('td')[4].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['id_number'] = ''

                    try:
                        item['reason'] = sel.xpath('td')[5].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['reason'] = ''

                    try:
                        item['host_name'] = sel.xpath('td')[6].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['host_name'] = ''

                    try:
                        item['time_start'] = sel.xpath('td')[1].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['time_start'] = ''

                    try:
                        item['time_end'] = sel.xpath('td')[8].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['time_end'] = ''

                    item['institution'] = 'Trib.Const.'
                    item['date'] = response.meta['date']

                    item = utils.make_hash(item)
                    yield item
                elif datetime.date(2008, 5, 29) <= this_date < datetime.date(
                        2014, 8, 1):
                    item = ManoloItem()
                    try:
                        item['full_name'] = sel.xpath('td')[2].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['full_name'] = ''

                    try:
                        item['id_document'] = sel.xpath('td')[3].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['id_document'] = ''

                    try:
                        item['id_number'] = sel.xpath('td')[4].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['id_number'] = ''

                    try:
                        item['reason'] = sel.xpath('td')[5].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['reason'] = ''

                    try:
                        item['host_name'] = sel.xpath('td')[6].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['host_name'] = ''

                    try:
                        item['time_start'] = sel.xpath('td')[1].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['time_start'] = ''

                    try:
                        item['time_end'] = sel.xpath('td')[7].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['time_end'] = ''

                    item['institution'] = 'Trib.Const.'
                    item['date'] = response.meta['date']

                    item = utils.make_hash(item)
                    yield item
                else:
                    item = ManoloItem()
                    try:
                        item['full_name'] = sel.xpath('td')[1].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['full_name'] = ''

                    try:
                        item['id_document'], item['id_number'] = utils.get_dni(
                            sel.xpath('td')[2].xpath('text()').extract()[0])
                    except IndexError:
                        item['id_document'] = ''
                        item['id_number'] = ''

                    try:
                        item['entity'] = sel.xpath('td')[3].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['entity'] = ''

                    try:
                        item['reason'] = sel.xpath('td')[4].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['reason'] = ''

                    try:
                        item['host_name'] = sel.xpath('td')[5].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['host_name'] = ''

                    try:
                        item['office'] = sel.xpath('td')[6].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['office'] = ''

                    try:
                        item['time_start'] = sel.xpath('td')[7].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['time_start'] = ''

                    try:
                        item['time_end'] = sel.xpath('td')[8].xpath(
                            'text()').extract()[0]
                    except IndexError:
                        item['time_end'] = ''

                    item['institution'] = 'Trib.Const.'
                    item['date'] = response.meta['date']

                    item = utils.make_hash(item)
                    yield item