Ejemplo n.º 1
0
def scrap_cmp(doctor):
    max_retries = 3
    logger.info('\tStarting')
    logger.info('\tQuerying {}'.format(doctor.id))
    proxy_server = get_proxy(service='cmp')
    if not proxy_server:
        logger.info('\tError: Out of proxies!')
        logger.info('\tFinished')
        return

    processed = False
    retries = 0
    while not processed:
        if retries == max_retries:
            logger.info('\tGiving up!')
            # Update doctor status record
            doctor.status = 3  # Error
            doctor.save()
            break
        try:
            logger.info('\tUsing proxy {}'.format(proxy_server))
            driver = get_driver(proxy_server)
            record = scrap_and_recognize(driver, doctor)
            if record['cmp'] != doctor.id:
                record['status'] = 2  # Invalid

            # Update doctor fields and create specialties if applicable
            doctor.name = record['name']
            doctor.surname = record['surname']
            doctor.state = record['state']
            doctor.email = record['email']
            doctor.region = record['region']
            doctor.notes = record['notes']
            doctor.image_path = record['image_path']
            doctor.status = record['status']
            doctor.save()
            if record['specialties']:
                for specialty in record['specialties']:
                    DoctorSpecialty.create(
                        doctor=doctor,
                        name=specialty['name'],
                        type=specialty['type'],
                        code=specialty['code'],
                        end_date=specialty['end_date'],
                    )

            processed = True
            logger.info('\tProcessed')
        except Exception as e:
            logger.info('\tError: {}'.format(e))
            processed = True
        finally:
            try:
                driver.quit()
            except Exception:
                pass

    logger.info('\tFinished')
Ejemplo n.º 2
0
def scrap_movistar_lines(rrll):
    max_retries = 3
    logger.info('\tStarting')
    logger.info('\tQuerying {}'.format(rrll.ruc))
    proxy_server = get_proxy(service='movistar_line')
    if not proxy_server:
        logger.info('\tError: Out of proxies!')
        logger.info('\tFinished')
        return

    processed = False
    retries = 0
    while not processed:
        if retries == max_retries:
            logger.info('\tGiving up!')
            # Update rrll status record
            rrll.status = 3  # Error
            rrll.save()
            break
        try:
            logger.info('\tUsing proxy {}'.format(proxy_server))
            driver = get_driver(proxy_server)
            v_json = scrap_and_recognize(driver, rrll)

            if v_json['ruc'] != rrll.ruc and v_json['dni'] != rrll.dni:
                v_json['status'] = 2  # Invalid

            # Update rrll fields and create rrll records
            rrll.status = v_json['status']
            rrll.save()
            if v_json['records']:
                for record in v_json['records']:
                    TelephoneLine.create(
                        rrll=rrll,
                        modality=record['modality'],
                        telephone=record['telephone'],
                    )
            processed = True
            logger.info('\tProcessed')

        except Exception as e:
            logger.info('\tError: {}'.format(e))
            rrll.status = 3  # Error
            rrll.save()
            processed = True
        finally:
            try:
                driver.quit()
            except Exception:
                pass

    logger.info('\tFinished')
Ejemplo n.º 3
0
def scrap_document_number(graduate):
    max_retries = 3
    logger.info('\tStarting')
    logger.info('\tQuerying {}'.format(graduate.id))
    proxy_server = get_proxy(service='sunedu')
    if not proxy_server:
        logger.info('\tError: Out of proxies!')
        logger.info('\tFinished')
        return

    processed = False
    retries = 0
    while not processed:
        if retries == max_retries:
            logger.info('\tGiving up!')
            # Update graduate status record
            graduate.status = 3  # Error
            graduate.save()
            break
        try:
            logger.info('\tUsing proxy {}'.format(proxy_server))
            driver = get_driver(proxy_server)
            record = scrap_and_recognize(driver, graduate)
            if record['id'] != graduate.id:
                record['status'] = 2  # Invalid

            # Update graduate fields and create graduate records
            graduate.status = record['status']
            graduate.save()
            if record['records']:
                for record in record['records']:
                    GraduateRecord.create(
                        graduate=graduate,
                        name=record['name'],
                        grade=record['grade'],
                        institution=record['institution'],
                    )

            processed = True
            logger.info('\tProcessed')
        except Exception as e:
            logger.info('\tError: {}'.format(e))
            processed = True
        finally:
            try:
                driver.quit()
            except Exception:
                pass

    logger.info('\tFinished')
Ejemplo n.º 4
0
def scrap_plate_number(vehicle):
    max_retries = 3
    logger.info('\tStarting')
    logger.info('\tQuerying {}'.format(vehicle.id))
    proxy_server = get_proxy(service='sunarp')
    if not proxy_server:
        logger.info('\tError: Out of proxies!')
        logger.info('\tFinished')
        return

    processed = False
    retries = 0
    while not processed:
        if retries == max_retries:
            logger.info('\tGiving up!')
            # Update vechicle status record
            vehicle.status = 3  # Error
            vehicle.save()
            break
        try:
            logger.info('\tUsing proxy {}'.format(proxy_server))
            driver = get_driver(proxy_server)
            record = scrap_and_recognize(driver, vehicle)
            if record['plate_number'] != vehicle.id:
                record['status'] = 2  # Invalid

            # Update vehicle fields
            vehicle.plate_number = record['plate_number']
            vehicle.serial_number = record['serial_number']
            vehicle.vin_number = record['vin_number']
            vehicle.engine_number = record['engine_number']
            vehicle.color = record['color']
            vehicle.make = record['make']
            vehicle.model = record['model']
            vehicle.valid_plate_number = record['valid_plate_number']
            vehicle.previous_plate_number = record['previous_plate_number']
            vehicle.state = record['state']
            vehicle.notes = record['notes']
            vehicle.branch = record['branch']
            vehicle.owners = record['owners']
            vehicle.image_path = record['image_path']
            vehicle.status = record['status']
            vehicle.save()

            processed = True
            logger.info('\tProcessed')
        except NoSuchElementException:
            logger.info('\tError: Element not found')
            try:
                label = driver.find_element_by_xpath(
                    '//span[contains(@id, "MainContent_lblWarning")]')
                if 'número máximo' in label.text:
                    logger.info('\tError: Max queries reached for {}'.format(
                        proxy_server))
                    # Save this invalid proxy in table and ask for another one
                    Proxy.create(service='sunarp', ip=proxy_server)
                    proxy_server = get_proxy(service='sunarp')
                    if not proxy_server:
                        logger.info('\tError: Out of proxies!')
                        processed = True
                    else:
                        retries += 1
                        logger.info('\tRetrying...')
            except NoSuchElementException:
                logger.info('\tError: Scraping problem')
                vehicle.status = 3  # Error
                vehicle.save()
                processed = True
            else:
                retries += 1
                logger.info('\tRetrying...')
        except JavascriptException:
            logger.info('\tError: Javascript')
            logger.info('\tRetrying...')
            retries += 1
        except AttributeError:
            # Probably something wrong with the license image
            logger.info('\tError: Invalid image')
            logger.info('\tRetrying...')
            retries += 1
        finally:
            try:
                driver.quit()
            except Exception:
                pass

    logger.info('\tFinished')