def test_parse_item(self):
        filename = os.path.join('data/defensa', '19-08-2015.html')
        items = self.spider.parse(fake_response_from_file(filename, meta={'date': u'19/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'HORTENCIA VANESSA GONZALES VALDIVIA')
        self.assertEqual(item.get('time_start'), u'08:48')
        self.assertEqual(item.get('institution'), u'defensa')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'41795231')
        self.assertEqual(item.get('entity'), u'CONIDA')
        self.assertEqual(item.get('reason'), u'REUNION DE TRABAJO')
        self.assertEqual(item.get('host_name'), u'DUPEYRAT LUQUE WOLFGANG CARLOS DOUGLAS')
        self.assertEqual(item.get('time_end'), u'09:49')
        self.assertEqual(item.get('date'), u'08/11/2017')
        self.assertEqual(item.get('sha1'), u'd7d3fc2a9a0f123473817b201dac7e651aee445a')

        item = next(items)
        self.assertEqual(item.get('full_name'), u'RIGOBERTO SALAS ASENCIOS')
        self.assertEqual(item.get('institution'), u'defensa')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'43847220')
        self.assertEqual(item.get('entity'), u'AGENCIA DE COMPRAS DE LAS FF. AA.')
        self.assertEqual(item.get('reason'), u'REUNION DE TRABAJO')
        self.assertEqual(item.get('host_name'), u'DUPEYRAT LUQUE WOLFGANG CARLOS DOUGLAS')
        self.assertEqual(item.get('time_start'), u'09:00')
        self.assertEqual(item.get('time_end'), u'12:35')
        self.assertEqual(item.get('date'), u'08/11/2017')
        self.assertEqual(item.get('sha1'), u'eb0ba95644a1e97f93d2ca8332ecece7007627f2')

        number_of_items = sum(1 for _ in items)
        self.assertEqual(number_of_items, 8)
    def test_parse_item(self):
        filename = os.path.join('data/defensa', '19-08-2015.html')
        items = self.spider.parse(fake_response_from_file(filename, meta={'date': u'19/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'AURELIO COREDOR MIRANO')
        self.assertEqual(item.get('time_start'), u'08:38')
        self.assertEqual(item.get('institution'), u'defensa')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'43447287')
        self.assertEqual(item.get('entity'), None)
        self.assertEqual(item.get('reason'), u'REUNIÓN DE TRABAJO')
        self.assertEqual(item.get('host_name'), u'HUGO DAVID MEJIA HUAMAN')
        self.assertEqual(item.get('time_end'), None)
        self.assertEqual(item.get('date'), u'2015-08-19')
        self.assertEqual(item.get('sha1'), u'd9f07e3a5effd7f0b9164dfc14822c5395ed3b58')

        item = next(items)
        self.assertEqual(item.get('full_name'), u'LUIS ANIBAL OLIVERA SANTA CRUZ')
        self.assertEqual(item.get('institution'), u'defensa')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'09392580')
        self.assertEqual(item.get('entity'), u'FAP')
        self.assertEqual(item.get('reason'), u'REUNIÓN DE TRABAJO')
        self.assertEqual(item.get('host_name'), u'JORGE RICARDO TORRES MONTEZA')
        self.assertEqual(item.get('time_start'), u'08:44')
        self.assertEqual(item.get('time_end'), u'11:49')
        self.assertEqual(item.get('date'), u'2015-08-19')
        self.assertEqual(item.get('sha1'), u'8d0e1ee7b60b8b2ee9e26d30e708d606a8d06a45')

        number_of_items = 1 + sum(1 for _ in items)
        self.assertEqual(number_of_items, 13)
    def test_parse_item(self):
        filename = os.path.join('data/justicia', '27-08-2015.html')
        items = self.spider.parse(
            fake_response_from_file(filename, meta={'date': u'27/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'CERON GUTIERREZ, NANCY')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'07862529')
        self.assertEqual(item.get('entity'), u'PARTICULAR')
        self.assertEqual(
            item.get('reason'),
            u'CONSULTA DE EXPEDIENTE DEL REGISTRO UNICO DE VICTIMAS')
        self.assertEqual(item.get('host_name'), u'SOTO PEREZ, ADRIEL EDUARDO')
        self.assertEqual(item.get('office'), u'CONSEJO DE REPARACIONES')
        # self.assertEqual(item.get('meeting_place'), u'POR DEFINIR')
        self.assertEqual(item.get('time_start'), u'10:12')
        self.assertEqual(item.get('time_end'), u'11:12')
        self.assertEqual(item.get('institution'), u'minjus')
        self.assertEqual(
            item.get('location'),
            u'CONSEJO DE REPARACIONES - SECRETARIA TECNICA, NEISER LLACZA ARCE 158, MIRAFLORES'
        )
        self.assertEqual(item.get('date'), u'2015-08-27')

        number_of_items = 1 + sum(1 for _ in items)

        self.assertEqual(number_of_items, 30)
    def test_parse_item(self):
        filename = os.path.join('data/minem', '19-08-2015.html')
        items = self.spider.parse(
            fake_response_from_file(filename, meta={'date': u'19/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'),
                         u'CARMEN ALICIA GUTIERREZ VELASCO')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'08241251')
        self.assertEqual(item.get('entity'), u'ESLOM')
        self.assertEqual(item.get('reason'), u'REUNI\xc3\u201cN DE TRABAJO')
        self.assertEqual(item.get('host_name'),
                         u'OMAR FRANCO CHAMBERGO RODRIGUEZ')
        self.assertEqual(item.get('office'),
                         u'DIRECCION GENERAL DE HIDROCARBUROS-N')
        self.assertEqual(item.get('time_start'), u'08:01')
        self.assertEqual(item.get('time_end'), u'08:17')
        self.assertEqual(item.get('meeting_place'), u'OFICINA DEL FUNCIONARIO')
        self.assertEqual(item.get('institution'), u'minem')
        self.assertEqual(item.get('date'), u'2015-08-19')
        self.assertEqual(item.get('sha1'),
                         u'fa5238c796089a49ed8583ce36457c30e5e58e05')

        number_of_items = 1 + sum(1 for _ in items)

        self.assertEqual(number_of_items, 20)
Esempio n. 5
0
    def test_parse_item(self):
        filename = os.path.join('data/pcm', '18-08-2015.html')
        items = self.spider.parse(
            fake_response_from_file(filename, meta={'date': u'18/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('date'), u'2015-08-18')
        self.assertEqual(item.get('full_name'),
                         u'CUEVA FRANCISCO GUERRA GARCIA')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'09179830')
        self.assertEqual(item.get('entity'), u'COMISION DINI')
        self.assertEqual(item.get('reason'), u'MOTIVO INSTITUCIONAL')
        self.assertEqual(item.get('location'), u'PALACIO')
        self.assertEqual(item.get('host_name'), u'Pedro Cateriano Bellido')
        self.assertEqual(
            item.get('office'),
            u'DESPACHO MINISTERIAL [PRESIDENTE DEL CONSEJO DE MINISTROS]')
        self.assertEqual(item.get('meeting_place'), u'SALA TELLO')
        self.assertEqual(item.get('time_start'), u'19:00')
        self.assertEqual(item.get('time_end'), u'19:40')
        self.assertEqual(item.get('institution'), u'pcm')

        number_of_items = 1 + sum(1 for x in items)
        self.assertEqual(number_of_items, 15)
Esempio n. 6
0
    def test_parse_item(self):
        filename = os.path.join('data/produce', '20-08-2015.html')
        items = self.spider.parse(
            fake_response_from_file(filename, meta={'date': u'20/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'),
                         u'MAGUIÑA ROBLES, WILFREDO HERIBERTO')
        self.assertEqual(item.get('time_start'), u'09:08:34')
        self.assertEqual(item.get('institution'), u'produce')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'32824731')
        self.assertEqual(item.get('office'),
                         u'DESPACHO VICEMINISTERIAL DE PESQUERIA')
        self.assertEqual(item.get('reason'), u'ENTREVISTA')
        self.assertEqual(item.get('host_name'),
                         u'KASTNER URIBE, MONICA CARLOTA')
        self.assertEqual(item.get('time_end'), u'09:16:28')
        self.assertEqual(item.get('date'), u'2015-08-20')

        self.assertEqual(item.get('sha1'),
                         u'1edbaca51007f25bd6bd07b0025bc94309544e3e')

        item = next(items)
        item = next(items)

        self.assertEqual(item.get('full_name'),
                         u'REGALO QUIJANO, WALTER MANUEL')
        self.assertEqual(item.get('time_start'), u'09:16:39')
        self.assertEqual(item.get('institution'), u'produce')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'08182131')
        self.assertEqual(item.get('office'),
                         u'DESPACHO VICEMINISTERIAL DE PESQUERIA')
        self.assertEqual(item.get('reason'), u'ENTREVISTA')
        self.assertEqual(item.get('host_name'),
                         u'KASTNER URIBE, MONICA CARLOTA')
        self.assertEqual(item.get('time_end'), u'11:53:16')
        self.assertEqual(item.get('date'), u'2015-08-20')
        self.assertEqual(item.get('sha1'),
                         u'd435683995c845a4a947895d3197725e255753ef')

        number_of_items = 1 + sum(1 for _ in items)

        self.assertEqual(number_of_items, 28)
    def test_parse_item(self):
        filename = os.path.join('data/ambiente', '19-08-2015.html')
        items = self.spider.parse(fake_response_from_file(filename, meta={'date': u'19/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'PATRICIA ITURREGUI BYRNE')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'07231335')
        self.assertEqual(item.get('entity'), u'PERSONAL')
        self.assertEqual(item.get('reason'), u'OTROS')
        self.assertEqual(item.get('host_name'), u'RUPERTO ANDRES TABOADA DELGADO')
        self.assertEqual(item.get('office'), u'POR DEFINIR')
        self.assertEqual(item.get('meeting_place'), u'POR DEFINIR')
        self.assertEqual(item.get('time_start'), u'04:01:20 p.m.')
        self.assertEqual(item.get('time_end'), u'09:07:00 a.m.')
        self.assertEqual(item.get('institution'), u'ambiente')
        self.assertEqual(item.get('date'), u'2015-08-19')
        number_of_items = 1 + sum(1 for _ in items)
        self.assertEqual(number_of_items, 15)
    def test_parse_item(self):
        filename = os.path.join('data/minvi', '18-08-2015.html')
        items = self.spider.parse(fake_response_from_file(filename, meta={'date': u'18/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'CESAR MAMANI ROMERO')
        self.assertEqual(item.get('time_start'), u'18:07')
        self.assertEqual(item.get('institution'), u'vivienda')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'10157944')
        self.assertEqual(item.get('entity'), u'TRABAJADOR - PARH')
        self.assertEqual(item.get('reason'), u'REUNI\xc3\u201cN DE TRABAJO')
        self.assertEqual(item.get('host_name'), u'DAYANA FARRO .')
        self.assertEqual(item.get('title'), u'P.A.H.R. [OTROS]')
        self.assertEqual(item.get('office'), u'PNT')
        self.assertEqual(item.get('time_end'), None)
        self.assertEqual(item.get('date'), u'2015-08-18')

        number_of_items = 1 + sum(1 for x in items)
        self.assertEqual(number_of_items, 15)
    def test_parse_item(self):
        filename = os.path.join('data/osce', '18-08-2015.html')
        items = self.spider.parse(fake_response_from_file(filename, meta={'date': u'18/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'Silvia Sousa Cristofol')
        self.assertEqual(item.get('time_start'), u'16:38')
        self.assertEqual(item.get('institution'), u'osce')
        self.assertEqual(item.get('id_document'), u'CARNET DE EXTRANJERIA')
        self.assertEqual(item.get('id_number'), u'000904735')
        self.assertEqual(item.get('entity'), u'everis')
        self.assertEqual(item.get('reason'), u'REUNIÓN DE TRABAJO')
        self.assertEqual(item.get('host_name'), u'Isabel Rosario Vega Palomino')
        self.assertEqual(item.get('title'), u'[Ninguno]')
        self.assertEqual(item.get('office'), u'Sala de Espera')
        self.assertEqual(item.get('time_end'), None)
        self.assertEqual(item.get('date'), u'2015-08-18')

        number_of_items = 1 + sum(1 for x in items)
        self.assertEqual(number_of_items, 15)
    def test_parse_item(self):
        filename = os.path.join('data/mujer', '20-08-2015.html')
        items = self.spider.parse(fake_response_from_file(filename, meta={'date': u'20/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'VIGO LOPEZ BETTY CATHERINE')
        self.assertEqual(item.get('time_start'), u'07:56 AM')
        self.assertEqual(item.get('institution'), u'min. mujer')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'43521527')
        self.assertEqual(item.get('entity'), u'SAN MIGUEL')
        self.assertEqual(item.get('reason'), None)
        self.assertEqual(item.get('host_name'), u'PNCVFS ,')
        self.assertEqual(item.get('office'), u'PROGRAMA NACIONAL CONTRA LA VIOLENCIA FAMILIAR Y SEXUAL')
        self.assertEqual(item.get('time_end'), u'10:42 AM')
        self.assertEqual(item.get('date'), u'2015-08-20')
        self.assertEqual(item.get('sha1'), u'a00e952857d8c86ab3877ee3805bda686bd3a999')

        number_of_items = 1 + sum(1 for x in items)
        self.assertEqual(number_of_items, 20)
    def test_parse_item(self):
        filename = os.path.join('data/minagr', '18-08-2015.html')
        items = self.spider.parse(fake_response_from_file(filename, meta={'date': u'18/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'VICTOR HUGO SEVERINO VALLE')
        self.assertEqual(item.get('time_start'), u'17:04')
        self.assertEqual(item.get('institution'), u'minagr')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'25856019')
        self.assertEqual(item.get('entity'), u'MAKA SAC')
        self.assertEqual(item.get('reason'), u'DOCUMENTOS')
        self.assertEqual(item.get('host_name'), u'CARLOS ANTONIO LARA PALACIOS')
        self.assertEqual(item.get('title'), u'[CONTADOR P\xc3\u0161BLICO PARA ALMAC\xc3\u2030N Y PATRIMONIO]')
        self.assertEqual(item.get('office'), u'ALMACEN CENTRAL')
        self.assertEqual(item.get('time_end'), u'17:23')
        self.assertEqual(item.get('date'), u'2015-08-18')

        number_of_items = 1 + sum(1 for x in items)
        self.assertEqual(number_of_items, 15)
    def test_parse_item(self):
        filename = os.path.join('data/mincu', '18-08-2015.html')
        items = self.spider.parse(fake_response_from_file(filename, meta={'date': u'18/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'INGRID BARRIONUEVO ECHEGARAY')
        self.assertEqual(item.get('time_start'), u'16:40')
        self.assertEqual(item.get('institution'), u'mincu')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'10085172')
        self.assertEqual(item.get('entity'), u'PARTICULAR')
        self.assertEqual(item.get('reason'), u'REUNIÓN DE TRABAJO')
        self.assertEqual(item.get('host_name'), u'JOIZ ELIZABETH DOBLADILLO ORTIZ')
        self.assertEqual(item.get('title'), u'[SERVICIOS DE UN ASISTENTE EN COMUNICACIONES]')
        self.assertEqual(item.get('office'), u'QHAPAQ ÑAN')
        self.assertEqual(item.get('time_end'), u'16:53')
        self.assertEqual(item.get('date'), u'2015-08-18')

        number_of_items = 1 + sum(1 for x in items)
        self.assertEqual(number_of_items, 15)
    def test_parse_item(self):
        filename = os.path.join('data/minvi', '18-08-2015.html')
        items = self.spider.parse(
            fake_response_from_file(filename, meta={'date': u'18/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'CESAR MAMANI ROMERO')
        self.assertEqual(item.get('time_start'), u'18:07')
        self.assertEqual(item.get('institution'), u'vivienda')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'10157944')
        self.assertEqual(item.get('entity'), u'TRABAJADOR - PARH')
        self.assertEqual(item.get('reason'), u'REUNI\xc3\u201cN DE TRABAJO')
        self.assertEqual(item.get('host_name'), u'DAYANA FARRO .')
        self.assertEqual(item.get('title'), u'P.A.H.R. [OTROS]')
        self.assertEqual(item.get('office'), u'PNT')
        self.assertEqual(item.get('time_end'), None)
        self.assertEqual(item.get('date'), u'2015-08-18')

        number_of_items = 1 + sum(1 for x in items)
        self.assertEqual(number_of_items, 15)
    def test_parse_item(self):
        filename = os.path.join('data/presidencia', '31-08-2015.html')
        items = self.spider.parse(fake_response_from_file(filename, meta={'date': u'31/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'PAJARES CRIADO SERGIO ADOLFO')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'09272448')
        self.assertEqual(item.get('entity'), u'COSAPI DATA.S.A.')
        self.assertEqual(item.get('reason'), u'OTROS: PROYECTO DE CABLEADO (SUPERVISAR)')
        self.assertEqual(item.get('host_name'), u'CALDERON MONTOYA BLAS')
        self.assertEqual(item.get('office'), u'DIRECCION DE OPERACIONES')
        self.assertEqual(item.get('time_start'), u'19:04')
        self.assertEqual(item.get('time_end'), u'19:53')
        self.assertEqual(item.get('institution'), u'presidencia')
        self.assertEqual(item.get('meeting_place'), u'DESAMPARADOS')
        self.assertEqual(item.get('date'), u'2015-08-31')

        number_of_items = 1 + sum(1 for _ in items)

        self.assertEqual(number_of_items, 47)
    def test_parse_item(self):
        filename = os.path.join('data/minsa', '18-08-2015.html')
        items = self.spider.parse(fake_response_from_file(filename, meta={'date': u'18/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'MELENDEZ ARISTA GREIDY')
        self.assertEqual(item.get('time_start'), u'18:45:09')
        self.assertEqual(item.get('institution'), u'minsa')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'41339966')
        self.assertEqual(item.get('entity'), u'DIRESA AMAZONAS')
        self.assertEqual(item.get('reason'), u'TRAMITE')
        self.assertEqual(item.get('host_name'), u'VELASQUEZ VALDIVIA ANIBAL')
        self.assertEqual(item.get('title'), u'MINISTRO DE SALUD')
        self.assertEqual(item.get('office'), u'DESPACHO MINISTERIAL')
        self.assertEqual(item.get('time_end'), None)
        self.assertEqual(item.get('date'), u'2015-08-18')

        number_of_items = 1 + sum(1 for x in items)

        self.assertEqual(number_of_items, 20)
    def test_parse_item(self):
        filename = os.path.join('data/congreso', '18-08-2015.html')
        items = self.spider.parse(fake_response_from_file(filename, meta={'date': u'18/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'ZEVALLOS FLOREZ, CESAR')
        self.assertEqual(item.get('id_document'), u'DNI/LE')
        self.assertEqual(item.get('id_number'), u'07632139')
        self.assertEqual(item.get('entity'), u'EL ROCOTO')
        self.assertEqual(item.get('reason'), u'PERSONAL DEL CONCESIONARIO')
        self.assertEqual(item.get('host_name'), u'PEREYRA SALAZAR, WALTER')
        self.assertEqual(item.get('office'), u'G.F. DE SERVICIO SOCIAL')
        self.assertEqual(item.get('time_start'), u'08:15')
        self.assertEqual(item.get('time_end'), None)
        self.assertEqual(item.get('institution'), u'congreso')
        self.assertEqual(item.get('title'), u'TECNICO ADMINISTRATIVO')
        self.assertEqual(item.get('date'), u'2015-08-18')

        number_of_items = 1 + sum(1 for x in items)

        self.assertEqual(number_of_items, 10)
    def test_parse_item(self):
        filename = os.path.join('data/ambiente', '19-08-2015.html')
        items = self.spider.parse(
            fake_response_from_file(filename, meta={'date': u'19/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'PATRICIA ITURREGUI BYRNE')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'07231335')
        self.assertEqual(item.get('entity'), u'PERSONAL')
        self.assertEqual(item.get('reason'), u'OTROS')
        self.assertEqual(item.get('host_name'),
                         u'RUPERTO ANDRES TABOADA DELGADO')
        self.assertEqual(item.get('office'), u'POR DEFINIR')
        self.assertEqual(item.get('meeting_place'), u'POR DEFINIR')
        self.assertEqual(item.get('time_start'), u'04:01:20 p.m.')
        self.assertEqual(item.get('time_end'), u'09:07:00 a.m.')
        self.assertEqual(item.get('institution'), u'ambiente')
        self.assertEqual(item.get('date'), u'2015-08-19')
        number_of_items = 1 + sum(1 for _ in items)
        self.assertEqual(number_of_items, 15)
Esempio n. 18
0
    def test_parse_item(self):
        filename = os.path.join('data/pcm', '18-08-2015.html')
        items = self.spider.parse(fake_response_from_file(filename, meta={'date': u'18/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('date'), u'2015-08-18')
        self.assertEqual(item.get('full_name'), u'CUEVA FRANCISCO GUERRA GARCIA')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'09179830')
        self.assertEqual(item.get('entity'), u'COMISION DINI')
        self.assertEqual(item.get('reason'), u'MOTIVO INSTITUCIONAL')
        self.assertEqual(item.get('location'), u'PALACIO')
        self.assertEqual(item.get('host_name'), u'Pedro Cateriano Bellido')
        self.assertEqual(item.get('office'), u'DESPACHO MINISTERIAL [PRESIDENTE DEL CONSEJO DE MINISTROS]')
        self.assertEqual(item.get('meeting_place'), u'SALA TELLO')
        self.assertEqual(item.get('time_start'), u'19:00')
        self.assertEqual(item.get('time_end'), u'19:40')
        self.assertEqual(item.get('institution'), u'pcm')

        number_of_items = 1 + sum(1 for x in items)
        self.assertEqual(number_of_items, 15)
Esempio n. 19
0
    def test_parse_item(self):
        filename = os.path.join('data/inpe', '19-08-2015.html')
        items = self.spider.parse(fake_response_from_file(filename, meta={'date': u'19/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'MARIA DEL PILAR LLUEN MACALOPU')
        self.assertEqual(item.get('time_start'), u'08:39:00')
        self.assertEqual(item.get('institution'), u'inpe')
        self.assertEqual(item.get('id_document'), u'DNI/LE')
        self.assertEqual(item.get('id_number'), u'17434996')
        self.assertEqual(item.get('entity'), u'Particular')
        self.assertEqual(item.get('reason'), u'Reunion')
        self.assertEqual(item.get('host_name'), u'MILAGROS MAGDALENA MU\xc3\u2018OZ GONZALES')
        self.assertEqual(item.get('title'), u'---')
        self.assertEqual(item.get('office'), u'Unidad De Recursos Humanos')
        self.assertEqual(item.get('time_end'), u'08:54:00')
        self.assertEqual(item.get('date'), u'2015-08-19')
        self.assertEqual(item.get('sha1'), u'b5e65b73ec827ec50734d748e09e641b2990d679')

        number_of_items = 1 + sum(1 for _ in items)
        self.assertEqual(number_of_items, 60)
    def test_parse_item(self):
        filename = os.path.join('data/osce', '18-08-2015.html')
        items = self.spider.parse(
            fake_response_from_file(filename, meta={'date': u'18/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'Silvia Sousa Cristofol')
        self.assertEqual(item.get('time_start'), u'16:38')
        self.assertEqual(item.get('institution'), u'osce')
        self.assertEqual(item.get('id_document'), u'CARNET DE EXTRANJERIA')
        self.assertEqual(item.get('id_number'), u'000904735')
        self.assertEqual(item.get('entity'), u'everis')
        self.assertEqual(item.get('reason'), u'REUNIÓN DE TRABAJO')
        self.assertEqual(item.get('host_name'),
                         u'Isabel Rosario Vega Palomino')
        self.assertEqual(item.get('title'), u'[Ninguno]')
        self.assertEqual(item.get('office'), u'Sala de Espera')
        self.assertEqual(item.get('time_end'), None)
        self.assertEqual(item.get('date'), u'2015-08-18')

        number_of_items = 1 + sum(1 for x in items)
        self.assertEqual(number_of_items, 15)
    def test_parse_item(self):
        filename = os.path.join('data/defensa', '19-08-2015.html')
        items = self.spider.parse(
            fake_response_from_file(filename, meta={'date': u'19/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'),
                         u'HORTENCIA VANESSA GONZALES VALDIVIA')
        self.assertEqual(item.get('time_start'), u'08:48')
        self.assertEqual(item.get('institution'), u'defensa')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'41795231')
        self.assertEqual(item.get('entity'), u'CONIDA')
        self.assertEqual(item.get('reason'), u'REUNION DE TRABAJO')
        self.assertEqual(item.get('host_name'),
                         u'DUPEYRAT LUQUE WOLFGANG CARLOS DOUGLAS')
        self.assertEqual(item.get('time_end'), u'09:49')
        self.assertEqual(item.get('date'), u'08/11/2017')
        self.assertEqual(item.get('sha1'),
                         u'd7d3fc2a9a0f123473817b201dac7e651aee445a')

        item = next(items)
        self.assertEqual(item.get('full_name'), u'RIGOBERTO SALAS ASENCIOS')
        self.assertEqual(item.get('institution'), u'defensa')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'43847220')
        self.assertEqual(item.get('entity'),
                         u'AGENCIA DE COMPRAS DE LAS FF. AA.')
        self.assertEqual(item.get('reason'), u'REUNION DE TRABAJO')
        self.assertEqual(item.get('host_name'),
                         u'DUPEYRAT LUQUE WOLFGANG CARLOS DOUGLAS')
        self.assertEqual(item.get('time_start'), u'09:00')
        self.assertEqual(item.get('time_end'), u'12:35')
        self.assertEqual(item.get('date'), u'08/11/2017')
        self.assertEqual(item.get('sha1'),
                         u'eb0ba95644a1e97f93d2ca8332ecece7007627f2')

        number_of_items = sum(1 for _ in items)
        self.assertEqual(number_of_items, 8)
    def test_parse_item(self):
        filename = os.path.join('data/minem', '19-08-2015.html')
        items = self.spider.parse(fake_response_from_file(filename, meta={'date': u'19/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'CARMEN ALICIA GUTIERREZ VELASCO')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'08241251')
        self.assertEqual(item.get('entity'), u'ESLOM')
        self.assertEqual(item.get('reason'), u'REUNI\xc3\u201cN DE TRABAJO')
        self.assertEqual(item.get('host_name'), u'OMAR FRANCO CHAMBERGO RODRIGUEZ')
        self.assertEqual(item.get('office'), u'DIRECCION GENERAL DE HIDROCARBUROS-N')
        self.assertEqual(item.get('time_start'), u'08:01')
        self.assertEqual(item.get('time_end'), u'08:17')
        self.assertEqual(item.get('meeting_place'), u'OFICINA DEL FUNCIONARIO')
        self.assertEqual(item.get('institution'), u'minem')
        self.assertEqual(item.get('date'), u'2015-08-19')
        self.assertEqual(item.get('sha1'), u'fa5238c796089a49ed8583ce36457c30e5e58e05')

        number_of_items = 1 + sum(1 for _ in items)

        self.assertEqual(number_of_items, 20)
Esempio n. 23
0
    def test_parse_item(self):
        filename = os.path.join('data/minedu', '18-08-2015.html')
        items = self.spider.parse(
            fake_response_from_file(filename, meta={'date': u'18/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'),
                         u'CARLOS MANUEL RIVERA BARDALES')
        self.assertEqual(item.get('time_start'), u'17:19')
        self.assertEqual(item.get('institution'), u'minedu')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'43531636')
        self.assertEqual(item.get('entity'), u'PARTICULAR')
        self.assertEqual(item.get('reason'), u'MOTIVO INSTITUCIONAL')
        self.assertEqual(item.get('host_name'), u'BERTHA ANGELA BANICH ALLEON')
        self.assertEqual(item.get('title'), u'[SECRETARIA / O IV]')
        self.assertEqual(item.get('office'), u'EDIFICIO L PISO 02')
        self.assertEqual(item.get('time_end'), None)
        self.assertEqual(item.get('date'), u'2015-08-18')

        number_of_items = 1 + sum(1 for x in items)
        self.assertEqual(number_of_items, 15)
Esempio n. 24
0
    def test_parse_item(self):
        filename = os.path.join('data/minsa', '18-08-2015.html')
        items = self.spider.parse(
            fake_response_from_file(filename, meta={'date': u'18/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'MELENDEZ ARISTA GREIDY')
        self.assertEqual(item.get('time_start'), u'18:45:09')
        self.assertEqual(item.get('institution'), u'minsa')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'41339966')
        self.assertEqual(item.get('entity'), u'DIRESA AMAZONAS')
        self.assertEqual(item.get('reason'), u'TRAMITE')
        self.assertEqual(item.get('host_name'), u'VELASQUEZ VALDIVIA ANIBAL')
        self.assertEqual(item.get('title'), u'MINISTRO DE SALUD')
        self.assertEqual(item.get('office'), u'DESPACHO MINISTERIAL')
        self.assertEqual(item.get('time_end'), None)
        self.assertEqual(item.get('date'), u'2015-08-18')

        number_of_items = 1 + sum(1 for x in items)

        self.assertEqual(number_of_items, 20)
    def test_parse_item(self):
        filename = os.path.join('data/presidencia', '31-08-2015.html')
        items = self.spider.parse(fake_response_from_file(filename, meta={'date': u'31/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'RICCE CHUMBE WALTER HUMBERTO')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'25424360')
        self.assertEqual(item.get('entity'), u'MINISTERIO DE AMBIENTE')
        self.assertEqual(item.get('reason'), u'REUNION DE TRABAJO')
        self.assertEqual(item.get('host_name'), u'MONTALVA DE FALLA JOSE')
        self.assertEqual(item.get('office'), u'SUBSECRETARIA GENERAL')
        self.assertEqual(item.get('title'), u'DIRECTOR GENERAL')
        self.assertEqual(item.get('time_start'), u'10:50')
        self.assertEqual(item.get('time_end'), u'10:54')
        self.assertEqual(item.get('institution'), u'presidencia')
        self.assertEqual(item.get('meeting_place'), u'EDIFICIO PALACIO')
        self.assertEqual(item.get('date'), u'2015-08-31')

        number_of_items = 1 + sum(1 for _ in items)

        self.assertEqual(number_of_items, 12)
    def test_parse_item(self):
        filename = os.path.join('data/justicia', '27-08-2015.html')
        items = self.spider.parse(fake_response_from_file(filename, meta={'date': u'27/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'CERON GUTIERREZ, NANCY')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'07862529')
        self.assertEqual(item.get('entity'), u'PARTICULAR')
        self.assertEqual(item.get('reason'), u'CONSULTA DE EXPEDIENTE DEL REGISTRO UNICO DE VICTIMAS')
        self.assertEqual(item.get('host_name'), u'SOTO PEREZ, ADRIEL EDUARDO')
        self.assertEqual(item.get('office'), u'CONSEJO DE REPARACIONES')
        # self.assertEqual(item.get('meeting_place'), u'POR DEFINIR')
        self.assertEqual(item.get('time_start'), u'10:12')
        self.assertEqual(item.get('time_end'), u'11:12')
        self.assertEqual(item.get('institution'), u'minjus')
        self.assertEqual(item.get('location'), u'CONSEJO DE REPARACIONES - SECRETARIA TECNICA, NEISER LLACZA ARCE 158, MIRAFLORES')
        self.assertEqual(item.get('date'), u'2015-08-27')

        number_of_items = 1 + sum(1 for _ in items)

        self.assertEqual(number_of_items, 30)
    def test_parse_item(self):
        filename = os.path.join('data/congreso', '18-08-2015.html')
        items = self.spider.parse(
            fake_response_from_file(filename, meta={'date': u'18/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'ZEVALLOS FLOREZ, CESAR')
        self.assertEqual(item.get('id_document'), u'DNI/LE')
        self.assertEqual(item.get('id_number'), u'07632139')
        self.assertEqual(item.get('entity'), u'EL ROCOTO')
        self.assertEqual(item.get('reason'), u'PERSONAL DEL CONCESIONARIO')
        self.assertEqual(item.get('host_name'), u'PEREYRA SALAZAR, WALTER')
        self.assertEqual(item.get('office'), u'G.F. DE SERVICIO SOCIAL')
        self.assertEqual(item.get('time_start'), u'08:15')
        self.assertEqual(item.get('time_end'), None)
        self.assertEqual(item.get('institution'), u'congreso')
        self.assertEqual(item.get('title'), u'TECNICO ADMINISTRATIVO')
        self.assertEqual(item.get('date'), u'2015-08-18')

        number_of_items = 1 + sum(1 for x in items)

        self.assertEqual(number_of_items, 10)
    def test_parse_item(self):
        filename = os.path.join('data/defensa', '19-08-2015.html')
        items = self.spider.parse(
            fake_response_from_file(filename, meta={'date': u'19/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'AURELIO COREDOR MIRANO')
        self.assertEqual(item.get('time_start'), u'08:38')
        self.assertEqual(item.get('institution'), u'defensa')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'43447287')
        self.assertEqual(item.get('entity'), None)
        self.assertEqual(item.get('reason'), u'REUNIÓN DE TRABAJO')
        self.assertEqual(item.get('host_name'), u'HUGO DAVID MEJIA HUAMAN')
        self.assertEqual(item.get('time_end'), None)
        self.assertEqual(item.get('date'), u'2015-08-19')
        self.assertEqual(item.get('sha1'),
                         u'd9f07e3a5effd7f0b9164dfc14822c5395ed3b58')

        item = next(items)
        self.assertEqual(item.get('full_name'),
                         u'LUIS ANIBAL OLIVERA SANTA CRUZ')
        self.assertEqual(item.get('institution'), u'defensa')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'09392580')
        self.assertEqual(item.get('entity'), u'FAP')
        self.assertEqual(item.get('reason'), u'REUNIÓN DE TRABAJO')
        self.assertEqual(item.get('host_name'),
                         u'JORGE RICARDO TORRES MONTEZA')
        self.assertEqual(item.get('time_start'), u'08:44')
        self.assertEqual(item.get('time_end'), u'11:49')
        self.assertEqual(item.get('date'), u'2015-08-19')
        self.assertEqual(item.get('sha1'),
                         u'8d0e1ee7b60b8b2ee9e26d30e708d606a8d06a45')

        number_of_items = 1 + sum(1 for _ in items)
        self.assertEqual(number_of_items, 13)
    def test_parse_item(self):
        filename = os.path.join('data/mincu', '18-08-2015.html')
        items = self.spider.parse(
            fake_response_from_file(filename, meta={'date': u'18/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'),
                         u'INGRID BARRIONUEVO ECHEGARAY')
        self.assertEqual(item.get('time_start'), u'16:40')
        self.assertEqual(item.get('institution'), u'mincu')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'10085172')
        self.assertEqual(item.get('entity'), u'PARTICULAR')
        self.assertEqual(item.get('reason'), u'REUNIÓN DE TRABAJO')
        self.assertEqual(item.get('host_name'),
                         u'JOIZ ELIZABETH DOBLADILLO ORTIZ')
        self.assertEqual(item.get('title'),
                         u'[SERVICIOS DE UN ASISTENTE EN COMUNICACIONES]')
        self.assertEqual(item.get('office'), u'QHAPAQ ÑAN')
        self.assertEqual(item.get('time_end'), u'16:53')
        self.assertEqual(item.get('date'), u'2015-08-18')

        number_of_items = 1 + sum(1 for x in items)
        self.assertEqual(number_of_items, 15)
    def test_parse_item(self):
        filename = os.path.join('data/produce', '20-08-2015.html')
        items = self.spider.parse(fake_response_from_file(filename, meta={'date': u'20/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'MAGUIÑA ROBLES, WILFREDO HERIBERTO')
        self.assertEqual(item.get('time_start'), u'09:08:34')
        self.assertEqual(item.get('institution'), u'produce')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'32824731')
        self.assertEqual(item.get('office'), u'DESPACHO VICEMINISTERIAL DE PESQUERIA')
        self.assertEqual(item.get('reason'), u'ENTREVISTA')
        self.assertEqual(item.get('host_name'), u'KASTNER URIBE, MONICA CARLOTA')
        self.assertEqual(item.get('time_end'), u'09:16:28')
        self.assertEqual(item.get('date'), u'2015-08-20')

        self.assertEqual(item.get('sha1'), u'1edbaca51007f25bd6bd07b0025bc94309544e3e')

        item = next(items)
        item = next(items)

        self.assertEqual(item.get('full_name'), u'REGALO QUIJANO, WALTER MANUEL')
        self.assertEqual(item.get('time_start'), u'09:16:39')
        self.assertEqual(item.get('institution'), u'produce')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'08182131')
        self.assertEqual(item.get('office'), u'DESPACHO VICEMINISTERIAL DE PESQUERIA')
        self.assertEqual(item.get('reason'), u'ENTREVISTA')
        self.assertEqual(item.get('host_name'), u'KASTNER URIBE, MONICA CARLOTA')
        self.assertEqual(item.get('time_end'), u'11:53:16')
        self.assertEqual(item.get('date'), u'2015-08-20')
        self.assertEqual(item.get('sha1'), u'd435683995c845a4a947895d3197725e255753ef')

        number_of_items = 1 + sum(1 for _ in items)

        self.assertEqual(number_of_items, 28)
    def test_parse_item(self):
        filename = os.path.join('data/minagr', '18-08-2015.html')
        items = self.spider.parse(
            fake_response_from_file(filename, meta={'date': u'18/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'VICTOR HUGO SEVERINO VALLE')
        self.assertEqual(item.get('time_start'), u'17:04')
        self.assertEqual(item.get('institution'), u'minagr')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'25856019')
        self.assertEqual(item.get('entity'), u'MAKA SAC')
        self.assertEqual(item.get('reason'), u'DOCUMENTOS')
        self.assertEqual(item.get('host_name'),
                         u'CARLOS ANTONIO LARA PALACIOS')
        self.assertEqual(
            item.get('title'),
            u'[CONTADOR P\xc3\u0161BLICO PARA ALMAC\xc3\u2030N Y PATRIMONIO]')
        self.assertEqual(item.get('office'), u'ALMACEN CENTRAL')
        self.assertEqual(item.get('time_end'), u'17:23')
        self.assertEqual(item.get('date'), u'2015-08-18')

        number_of_items = 1 + sum(1 for x in items)
        self.assertEqual(number_of_items, 15)
    def test_parse_item(self):
        filename = os.path.join('data/presidencia', '31-08-2015.html')
        items = self.spider.parse(
            fake_response_from_file(filename, meta={'date': u'31/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'),
                         u'RICCE CHUMBE WALTER HUMBERTO')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'25424360')
        self.assertEqual(item.get('entity'), u'MINISTERIO DE AMBIENTE')
        self.assertEqual(item.get('reason'), u'REUNION DE TRABAJO')
        self.assertEqual(item.get('host_name'), u'MONTALVA DE FALLA JOSE')
        self.assertEqual(item.get('office'), u'SUBSECRETARIA GENERAL')
        self.assertEqual(item.get('title'), u'DIRECTOR GENERAL')
        self.assertEqual(item.get('time_start'), u'10:50')
        self.assertEqual(item.get('time_end'), u'10:54')
        self.assertEqual(item.get('institution'), u'presidencia')
        self.assertEqual(item.get('meeting_place'), u'EDIFICIO PALACIO')
        self.assertEqual(item.get('date'), u'2015-08-31')

        number_of_items = 1 + sum(1 for _ in items)

        self.assertEqual(number_of_items, 12)
    def test_parse_item(self):
        filename = os.path.join('data/mujer', '20-08-2015.html')
        items = self.spider.parse(
            fake_response_from_file(filename, meta={'date': u'20/08/2015'}))

        item = next(items)
        self.assertEqual(item.get('full_name'), u'VIGO LOPEZ BETTY CATHERINE')
        self.assertEqual(item.get('time_start'), u'07:56 AM')
        self.assertEqual(item.get('institution'), u'min. mujer')
        self.assertEqual(item.get('id_document'), u'DNI')
        self.assertEqual(item.get('id_number'), u'43521527')
        self.assertEqual(item.get('entity'), u'SAN MIGUEL')
        self.assertEqual(item.get('reason'), None)
        self.assertEqual(item.get('host_name'), u'PNCVFS ,')
        self.assertEqual(
            item.get('office'),
            u'PROGRAMA NACIONAL CONTRA LA VIOLENCIA FAMILIAR Y SEXUAL')
        self.assertEqual(item.get('time_end'), u'10:42 AM')
        self.assertEqual(item.get('date'), u'2015-08-20')
        self.assertEqual(item.get('sha1'),
                         u'a00e952857d8c86ab3877ee3805bda686bd3a999')

        number_of_items = 1 + sum(1 for x in items)
        self.assertEqual(number_of_items, 20)