예제 #1
0
def exp_pag_enlaces_limites(metadata, url):
    driver = dcrawl()
    driver.setup(url, 2)
    for i in range(metadata['lim_inf'], metadata['lim_sup'] + 1):
        driver.get(metadata['urls'][0] + str(i))
        exp_pag_sencilla_sub(metadata, driver)
        driver.back()
    driver.close()
예제 #2
0
def exp_pag_enlaces(metadata, url):
    driver = dcrawl()
    driver.setup(url, 1)
    enlaces = driver.getlinks(metadata['enlaces'])
    for enlace in enlaces:
        driver.get(enlace)
        exp_pag_sencilla_sub(metadata, driver)
        driver.back()
    driver.close()
예제 #3
0
def exp_pag_contenedor(metadata, url):
    driver = dcrawl()
    driver.setup(url, 1)
    filas = driver.getrows(metadata['contenedores'])
    new_version = Pagina(url, metadata['entidad'])
    for fila in filas:
        result = driver.exp_pag_sencilla_sub(metadata, fila)
        new_version.agregar_persona(result)
    driver.close()
    new_version.finalizar()
    return new_version
예제 #4
0
 def __init__(self):
     self.driver = dcrawl()
     self.nombre = None
     self.entidad = None
     self.all_options = []
     self.selectElement = None
     self.xpathRegresar = ".//div[@id='ctl00_MainContent_btnRegresar_CD']"
     self.xpathBusqAvan = "//div[@id='ctl00_MainContent_btnBusquedaAvanzada']"
     self.xpathSelect = "//select[@id='ctl00_MainContent_ddlAreacon']"
     self.xpathBuscar = "//div[@id='ctl00_MainContent_btOK_CD']"
     self.xpathCancelar = "//div[@id='ctl00_MainContent_btnCancel_CD']"
     self.xpathNombre = "//input[@id='ctl00_MainContent_txtNombreAspirante']"
예제 #5
0
def exp_pag_sencilla(metadata, url):
    driver = dcrawl()
    driver.setup(url, 2)
    persona = Person()
    new_version = Pagina(url, metadata['entidad'])
    for dato in metadata['info']:
        result_data = driver.explotar_tipo(metadata['info'][dato])
        persona.add_attribute(dato, result_data)
    driver.close()
    persona.set_timestamp()
    if persona.name:
        new_version.agregar_persona(persona.persona)
    new_version.finalizar()
    return new_version
예제 #6
0
def update_enlaces(metadata, url):
    driver = dcrawl()
    driver.setup(url, 1)
    enlaces = driver.getlinks(metadata['enlaces'])
    driver.close()
    nuevos = Set()
    id_metadata = ""
    for enlace in enlaces:
        if estado_pagina(enlace):
            if redis.existe(enlace):
                id_metadata = redis.get_hash_atribute(enlace, 'metadata_id')
            else:
                nuevos.add(enlace)
        else:
            if redis.existe(enlace):
                redis.put_hash_atribute(enlace, 'estado', 2)
    redis.updated_link(url)
    nuevos_enlaces(nuevos, metadata, id_metadata)
예제 #7
0
 def __init__(self):
     self.driver = dcrawl()
     self.total_links = []
     self.BusqButXpath = ("//input[@id='find']")
     self.BusqFieldXpath = ("//input[@id='query']")
     self.SigPagXpath = ("//ul[1]/li[@class='next']/a[@class='step']")