예제 #1
0
    def __init__(self, **kwargs):

        Amazon.CACHE_ROOT = kwargs.get('CACHE_ROOT', 'I:\\cache')
        Amazon.CACHE_EXPIRED_DAYS = kwargs.get('CACHE_EXPIRED_DAYS', 15)

        Amazon.max_workers = kwargs.get('max_workers', 1)
        Amazon.string_proxies = kwargs.get('string_proxies', [])

        if len(Amazon.string_proxies) > 0:
            Amazon.lr = LRequest(string_proxy=Amazon.string_proxies[0])

        Amazon.captcha = GsaCaptcha(ip='192.168.1.188', port='8000')

        self.executor = LThreadPoolExecutor(max_workers=Amazon.max_workers)
예제 #2
0
    def __init__(self, **kwargs):

        self.lr = LRequest(string_proxy=kwargs.get('string_proxy', ''))

        self.captcha = GsaCaptcha(ip=kwargs.get('gsa_ip', '192.168.1.188'), port=kwargs.get('gsa_port', '8000'))

        self.CACHE_ROOT = kwargs.get('cache_root', 'I:\\cache_amazon')
        self.CACHE_PAGES_ROOT = kwargs.get('cache_page', os.path.join(self.CACHE_ROOT, 'pages'))
        self.CACHE_IMAGES_ROOT = kwargs.get('cache_image', os.path.join(self.CACHE_ROOT, 'images'))

        if not os.path.exists(self.CACHE_ROOT): os.makedirs(self.CACHE_ROOT)
        if not os.path.exists(self.CACHE_PAGES_ROOT): os.makedirs(self.CACHE_PAGES_ROOT)
        if not os.path.exists(self.CACHE_IMAGES_ROOT): os.makedirs(self.CACHE_IMAGES_ROOT)

        self.domain = kwargs.get('domain', 'amazon.com')

        self.CACHE_EXPIRED_DAYS = kwargs.get('cache_expired_days', 15)
예제 #3
0
from lutils.lrequest import LRequest
from lutils.captcha.gsa_captcha import GsaCaptcha

logging.config.fileConfig('logging.conf')
logger = logging.getLogger('verbose')

string_proxies = [
    'socks4://192.168.1.188:1080',
    'socks4://192.168.1.188:1081',
    'socks4://192.168.1.188:1082',
    'socks4://192.168.1.188:1083',
    # 'socks4://192.168.1.188:1084',
    'socks4://192.168.1.188:1085',
]

captcha = GsaCaptcha(ip='192.168.1.188', port=8000)


def check_captcha(lr):
    if captcha is not None:
        captcha_img_ele = lr.xpath(
            '//form[contains(@action, "Captcha")]//img[contains(@src, "captcha")]'
        )
        if captcha_img_ele is not None:
            while 1:
                try:
                    if captcha_img_ele is not None:
                        logger.info('Need Captcha')

                        form = lr.get_forms()[0]
                        lr.load(captcha_img_ele.attrib['src'])