class SenadoresScrapper(object): def __init__(self): self.host = "http://www.senado.gov.py/" self.mongo = SilpyMongoClient() self.parser = SenadoresParser() self.browser = utils.get_new_browser() self.browser.get(self.host) def driver_quit(self): self.browser.quit() def obtener_lista_de_senadores(self): url = base_url + "index.php/senado/nomina/nomina-alfabetica" self.browser.get(url) #wait here for something utils.wait_for_document_ready(self.browser) data = self.browser.page_source senadores = self.parser.parse_senator_list(data) #self.mongo.update_senadores(senadores) return senadores def extract_senators_data(self): try: senadores = self.obtener_lista_de_senadores() for s in senadores: id = s['id'] s = self.get_member_info(s) self.mongo.update_senador(s) self.browser.close() except Exception, err: print "WARNING: Improve Exception handling." traceback.print_exc()
def get_members_data(self): mongo_client = SilpyMongoClient() members = self.get_member_list() for m in members: try: print "Procesando diputado " + m['name'] id = m['diputado_id'] m.update(self.get_member_details(id)) cv = self.get_member_cv(id) m['cv'] = cv result = mongo_client.update_diputado(m) except Exception, err: print "Improve exception handling" traceback.print_exc()
def __init__(self): self.host = "http://www.senado.gov.py/" self.mongo = SilpyMongoClient() self.parser = SenadoresParser() self.browser = utils.get_new_browser() self.browser.get(self.host)