def scrape_civil_escrito(self, causa_id, causa_obj, soup): """Searches for escritos pendientes in civil causa""" rows = soup.find(id='titTablaCivEsc').parent.parent.find_all( 'tr' ) # double parent because this one has header tr inside <thead> header = True for row in rows: if not header: # skip the header row tds = row.find_all('td') input_el = row.find('input', attrs={'name': 'id_documento'}) id_documento = input_el.attrs['value'] if id_documento: doc_id = '{}__{}'.format( causa_id, id_documento) # id = causa_id__id_documento created = False try: doc_obj = EscritoCivilPorResolver.objects.get( id=doc_id) except: doc_obj = EscritoCivilPorResolver( id=doc_id, causa=causa_obj, fecha=simplify_string(tds[2].contents[0]), tipo=simplify_string(tds[3].contents[0]), solicitante=simplify_string(tds[4].contents[0]), ) doc_obj.save() created = True if created: if self.profile.initial_migration_done: print('Sending notification: {}'.format(doc_obj)) send_new_doc_notification(doc_obj) else: header = False
def scrape_familia_document(self, causa): """Opens the detail of a causa. `causa` is the soup form element""" session = self.session url = Scraper.CAUSA_TYPES['familia']['detail'] data = {} for input_elm in causa.find_all('input'): if 'name' in input_elm.attrs.keys( ) and 'value' in input_elm.attrs.keys(): data[input_elm.attrs['name']] = input_elm.attrs['value'] causa_id = 'FAM_{}_{}_{}'.format(data['tipo_causa'], data['rol_causa'], data['era_causa']) tr = causa.parent.parent tds = tr.find_all('td') caratulado = simplify_string(tds[3].contents[0]) try: causa_obj = Causa.objects.get(id=causa_id) except Exception as ex: causa_obj = Causa(id=causa_id, user=self.profile, type=Causa.TYPE_CHOICES_FAMILIA, archived=False, rol='{}-{}-{}'.format(data['tipo_causa'], data['rol_causa'], data['era_causa']), caratulado=caratulado) causa_obj.save() if self.profile.initial_migration_done: send_new_causa_notification(causa_obj) if causa_obj and causa_id: # print(causa_obj) # Open causa details: resp = session.post(url, data=data, headers=Scraper.SCRAPER_HEADERS) if 'Causa Familia' in resp.text: resp_text = resp.content.decode('ISO-8859-1').replace( '\r', ' ').replace('\n', '') html = '<html><body>{}</body></html>'.format(resp_text) soup = BeautifulSoup(html, 'html.parser') rows = soup.find(id='titTablaFam').parent.find_all('tr') header = True for row in rows: if not header: # skip the header row link = row.find('a') if link and 'onclick' in link.attrs: onclick = link.attrs[ 'onclick'] # E.g "vvbbFF('Resolución',3,59987979,0);" doc_id = onclick.replace('vvbbFF(', '').replace( ');', '').replace(',', '-').replace("'", '') tds = row.find_all('td') if doc_id: created = False try: doc_obj = DocFamilia.objects.get(id=doc_id) except: doc_obj = DocFamilia( id=doc_id, causa=causa_obj, etapa=simplify_string( tds[2].contents[0]), tramite=simplify_string( tds[3].contents[0]), desc_tramite=simplify_string( tds[4].contents[0]), referencia=simplify_string( tds[5].contents[0]), fecha=simplify_string( tds[6].contents[0]), ) doc_obj.save() created = True if created: if self.profile.initial_migration_done: print( 'Sending notification: {}'.format( doc_obj)) send_new_doc_notification(doc_obj) # print(doc_obj) else: header = False
def scrape_cobranza_document(self, causa): """Opens the detail of a causa. `causa` is the soup form element""" session = self.session url = Scraper.CAUSA_TYPES['cobranza']['detail'] data = {} for input_elm in causa.find_all('input'): if 'name' in input_elm.attrs.keys( ) and 'value' in input_elm.attrs.keys(): data[input_elm.attrs['name']] = input_elm.attrs['value'] causa_id = 'COB_{}_{}_{}'.format(data['tipo_causa'], data['rol_causa'], data['era_causa']) tr = causa.parent.parent tds = tr.find_all('td') caratulado = simplify_string(tds[3].contents[0]) try: causa_obj = Causa.objects.get(id=causa_id) except Exception as ex: causa_obj = Causa(id=causa_id, user=self.profile, type=Causa.TYPE_CHOICES_COBRANZA, archived=False, rol='{}-{}-{}'.format(data['tipo_causa'], data['rol_causa'], data['era_causa']), caratulado=caratulado) causa_obj.save() if self.profile.initial_migration_done: send_new_causa_notification(causa_obj) if causa_obj and causa_id: # print(causa_obj) # Open causa details: resp = session.post(url, data=data, headers=Scraper.SCRAPER_HEADERS) if 'Causa Cobranza' in resp.text: resp_text = resp.content.decode('ISO-8859-1').replace( '\r', ' ').replace('\n', '') html = '<html><body>{}</body></html>'.format(resp_text) soup = BeautifulSoup(html, 'html.parser') rows = soup.find(id='titTablaCob').parent.find_all('tr') header = True for row in rows: if not header: # skip the header row doc_data = {} for doc_input in row.find_all('input'): try: doc_data[doc_input.attrs[ 'name']] = doc_input.attrs['value'] except: pass doc_keys = doc_data.keys() doc_id = None if 'cod_tribunal' in doc_keys and 'crr_iddocumento' in doc_keys: doc_id = 'COB-{}-{}'.format( doc_data['cod_tribunal'], doc_data['crr_iddocumento']) tds = row.find_all('td') if doc_id: created = False try: doc_obj = DocCobranza.objects.get(id=doc_id) except: doc_obj = DocCobranza( id=doc_id, causa=causa_obj, etapa=simplify_string(tds[1].contents[0]), tramite=simplify_string( tds[2].contents[0]), desc_tramite=simplify_string( tds[3].contents[0]), fecha=simplify_string(tds[4].contents[0]), ) doc_obj.save() created = True if created: if self.profile.initial_migration_done: print('Sending notification: {}'.format( doc_obj)) send_new_doc_notification(doc_obj) # print(doc_obj) else: header = False
def scrape_civil_document(self, causa): """Opens the detail of a causa. `causa` is the soup form element""" session = self.session url = Scraper.CAUSA_TYPES['civil']['detail'] data = {} for input_elm in causa.find_all('input'): if 'name' in input_elm.attrs.keys( ) and 'value' in input_elm.attrs.keys(): data[input_elm.attrs['name']] = input_elm.attrs['value'] causa_id = 'CIV_{}_{}_{}'.format(data['tipo'], data['rol'], data['ano']) tr = causa.parent.parent tds = tr.find_all('td') caratulado = simplify_string(tds[3].contents[0]) try: causa_obj = Causa.objects.get(id=causa_id) except Exception as ex: causa_obj = Causa(id=causa_id, user=self.profile, type=Causa.TYPE_CHOICES_CIVIL, archived=False, rol='{}-{}-{}'.format(data['tipo'], data['rol'], data['ano']), caratulado=caratulado) causa_obj.save() if self.profile.initial_migration_done: send_new_causa_notification(causa_obj) if causa_obj and causa_id: # print(causa_obj) # Open causa details: resp = session.post(url, data=data, headers=Scraper.SCRAPER_HEADERS) if 'Causa Civil' in resp.text: resp_text = resp.content.decode('ISO-8859-1').replace( '\r', ' ').replace('\n', '') html = '<html><body>{}</body></html>'.format(resp_text) soup = BeautifulSoup(html, 'html.parser') self.scrape_civil_escrito(causa_id, causa_obj, soup) rows = soup.find(id='titTablaCiv').parent.parent.find_all( 'tr' ) # double parent becaouse this one has header tr inside <thead> header = True for row in rows: if not header: # skip the header row tds = row.find_all('td') if tds[0].contents[0] and tds[0].contents[0].strip( ) != '': doc_id = '{}__{}'.format(causa_id, tds[0].contents[0].strip( )) # id = causa_id__folio created = False try: doc_obj = DocCivil.objects.get(id=doc_id) except: tribunal = '' try: strongs = soup.find_all('strong') if len(strongs) >= 8: tribunal = simplify_string( strongs[7].parent.contents[1]) except: pass doc_obj = DocCivil( id=doc_id, causa=causa_obj, etapa=simplify_string(tds[3].contents[0]), tramite=simplify_string( tds[4].contents[0]), descripcion=simplify_string( tds[5].contents[0]), fecha=simplify_string(tds[6].contents[0]), foja=simplify_string(tds[7].contents[0]), tribunal=tribunal) doc_obj.save() created = True if created: if self.profile.initial_migration_done: print('Sending notification: {}'.format( doc_obj)) send_new_doc_notification(doc_obj) # print(doc_obj) else: header = False
def scrape_apelaciones_document(self, causa): """Opens the detail of a causa. `causa` is the soup form element""" session = self.session url = Scraper.CAUSA_TYPES['apelaciones']['detail'] data = {} for input_elm in causa.find_all('input'): if 'name' in input_elm.attrs.keys( ) and 'value' in input_elm.attrs.keys(): data[input_elm.attrs['name']] = input_elm.attrs['value'] causa_id = 'APE_{}_{}'.format(data['rol_causa'], data['era_causa']) tr = causa.parent.parent tds = tr.find_all('td') caratulado = simplify_string(tds[3].contents[0]) try: causa_obj = Causa.objects.get(id=causa_id) except Exception as ex: causa_obj = Causa(id=causa_id, user=self.profile, type=Causa.TYPE_CHOICES_APELACIONES, archived=False, rol='{}-{}'.format(data['rol_causa'], data['era_causa']), caratulado=caratulado) causa_obj.save() if self.profile.initial_migration_done: send_new_causa_notification(causa_obj) if causa_obj and causa_id: # print(causa_obj) # Open causa details: resp = session.post(url, data=data, headers=Scraper.SCRAPER_HEADERS) if 'Recurso Corte de Apelaciones' in resp.text: resp_text = resp.content.decode('ISO-8859-1').replace( '\r', ' ').replace('\n', '') html = '<html><body>{}</body></html>'.format(resp_text) soup = BeautifulSoup(html, 'html.parser') rows = soup.find(id='titTablaApeGrid').parent.find_all('tr') header = True for row in rows: if not header: # skip the header row tds = row.find_all('td') if tds[2].contents[0] and tds[2].contents[0].strip( ) != '': doc_id = '{}__{}'.format( causa_id, tds[2].contents[0].strip()) created = False try: doc_obj = DocApelaciones.objects.get(id=doc_id) except: libro = '' nro_ingreso = '' try: descPopUps = soup.find_all( 'tr', attrs={'class': 'descPopUp'}) if len(descPopUps) >= 2: descPopUpData = descPopUps[1].find_all( 'td') libro = simplify_string( descPopUpData[0].contents[0]) nro_ingreso = simplify_string( descPopUpData[1].contents[0]) except: pass doc_obj = DocApelaciones( id=doc_id, causa=causa_obj, tipo=simplify_string(tds[1].contents[0]), descripcion=simplify_string( tds[3].contents[0]), fecha=simplify_string(tds[4].contents[0]), salas=simplify_string(tds[5].contents[0]), foja_inicial=simplify_string( tds[6].contents[0]), libro=libro, nro_ingreso=nro_ingreso) doc_obj.save() created = True if created: if self.profile.initial_migration_done: print('Sending notification: {}'.format( doc_obj)) send_new_doc_notification(doc_obj) # print(doc_obj) else: header = False
def scrape_suprema_document(self, causa): """Opens the detail of a causa""" session = self.session url = Scraper.CAUSA_TYPES['suprema']['detail'] data = {} for input_elm in causa.find_all('input'): if 'name' in input_elm.attrs.keys( ) and 'value' in input_elm.attrs.keys(): data[input_elm.attrs['name']] = input_elm.attrs['value'] causa_id = 'SUP_{}_{}'.format(data['rol_causa'], data['era_causa']) try: causa_obj = Causa.objects.get(id=causa_id) except Exception as ex: causa_obj = Causa(id=causa_id, user=self.profile, type=Causa.TYPE_CHOICES_SUPREMA, archived=False, rol='{}-{}'.format(data['rol_causa'], data['era_causa']), caratulado=data['caratulado']) causa_obj.save() if self.profile.initial_migration_done: send_new_causa_notification(causa_obj) if causa_obj and causa_id: # print(causa_obj) # Open causa details: resp = session.post(url, data=data, headers=Scraper.SCRAPER_HEADERS) if 'Recurso Corte Suprema' in resp.text: resp_text = resp.content.decode('ISO-8859-1').replace( '\r', ' ').replace('\n', '') html = '<html><body>{}</body></html>'.format(resp_text) soup = BeautifulSoup(html, 'html.parser') rows = soup.find(id='titTablaSup').parent.find_all('tr') header = True for row in rows: if not header: # skip the header row tds = row.find_all('td') iddoc_input = row.find('input', attrs={'name': 'iddoc'}) if iddoc_input: doc_id = '{}__{}'.format( causa_id, iddoc_input.attrs['value']) created = False try: doc_obj = DocSuprema.objects.get(id=doc_id) except: doc_obj = DocSuprema( id=doc_id, causa=causa_obj, anio=simplify_string(tds[2].contents[0]), fecha=simplify_string(tds[3].contents[0]), tipo=simplify_string(tds[4].contents[0]), nomenclatura=simplify_string( tds[5].contents[0]), descripcion=simplify_string( tds[6].contents[0]), salas=simplify_string(tds[7].contents[0])) doc_obj.save() created = True if created: if self.profile.initial_migration_done: print('Sending notification: {}'.format( doc_obj)) send_new_doc_notification(doc_obj) # print(doc_obj) else: header = False