Esempio n. 1
0
    def _text(self, url):

        # 获取一个代理
        proxy = self.__proxy()

        # 已设置使用代理且已有代理
        if config.REQUEST_PROXY_RETRY and proxy:
            text = self.__request(proxy, url)

            # 代理已重试次数
            retried = 0
            # 总尝试次数
            try_total = 0
            while text is None:
                retried += 1
                try_total += 1
                # 重试次数已达限制,删除当前代理重获代理请求
                if retried > config.REQUEST_PROXY_RETRY:
                    self.__proxy_delete(proxy['ip'], proxy['port'])
                    proxy = self.__proxy()
                    # 清空代理重试次数
                    retried = 0
                text = self.__request(proxy, url)
                config.console_log('使用代理请求页面 %s ,总尝试第 %s 次' % (url, try_total),
                                   'white')
        else:
            text = self.__request(None, url)

        return text
Esempio n. 2
0
    def __request(self, proxy, url):
        """
        使用代理请求 url
        :param proxy:
        :param url:
        :return:
        """
        # 请求设置
        kwargs = {'timeout': 10, 'headers': config.get_http_header()}

        # 有代理使用
        if isinstance(proxy, dict) and proxy:
            kwargs['proxies'] = {
                'http': 'http://%s:%s' % (proxy['ip'], proxy['port']),
                'https': 'http://%s:%s' % (proxy['ip'], proxy['port'])
            }

        response = requests.get(url, **kwargs)
        if response.ok and response.status_code == 200:
            return response.text
        else:
            config.console_log(
                '请求返回的状态码: %s URL: %s 内容: %s' %
                (url, str(response.status_code), response.text), 'red')
            return None
Esempio n. 3
0
 def _text(self, url):
     try:
         response = requests.get(url, **{'timeout': 10, 'headers': config.get_http_header()})
         if response.ok:
             return response.text
         else:
             # todo 使用代理重新尝试下载
             config.console_log('请求返回的状态码: ' + str(response.status_code), 'red')
             return None
     except Timeout as e:
         config.console_log('请求超时: ' + str(e), 'red')
         return None
Esempio n. 4
0
def crawl_handle(protocal, proxy, queue_persistence):
    if protocal is 'http':
        http, h_anonymity, h_interval = connect('http://httpbin.org/get',
                                                proxy)
        if http:
            proxy['protocol'] = 'http'
            proxy['anonymity'] = h_anonymity
            proxy['speed'] = h_interval
            queue_persistence.put(proxy)
            config.console_log(
                '验证通过的 http 代理: ' + json.dumps(proxy, ensure_ascii=False),
                'green')
        else:
            config.console_log(
                '无效的 http 代理: ' + json.dumps(proxy, ensure_ascii=False), 'red')

    elif protocal is 'https':
        https, hs_anonymity, hs_interval = connect('https://httpbin.org/get',
                                                   proxy)
        if https:
            proxy['protocol'] = 'https'
            proxy['anonymity'] = hs_anonymity
            proxy['speed'] = hs_interval
            queue_persistence.put(proxy)
            config.console_log(
                '验证通过的 https 代理: ' + json.dumps(proxy, ensure_ascii=False),
                'green')
        else:
            config.console_log(
                '无效的https代理: ' + json.dumps(proxy, ensure_ascii=False), 'red')
Esempio n. 5
0
    def __proxy_delete(self, ip, port):
        """
        删除一个已爬代理
        :param ip:
        :param port:
        :return:
        """
        response = requests.get(
            'http://{}:{}/proxy/delete?ip={}&port={}'.format(
                config.WEB_API_IP, config.WEB_API_PORT, ip, port),
            **{'timeout': 5})

        if response.status_code == 204:
            config.console_log('删除代理成功 %s:%s' % (ip, port), 'green')
        else:
            config.console_log('删除代理失败 %s:%s' % (ip, port), 'red')
def process_report(text_content, html_content):

    global server_connected
    global server

    if config.config['method'] == 'mail':

        if config.config['mail_format'] == 'html':
            message = MIMEText(html_content, 'html', 'utf-8')
        else:
            message = MIMEText(text_content, 'plain', 'utf-8')

        message['Subject'] = 'Movies torrents digest'
        message['From'] = config.config['from']
        message['To'] = config.config['to']

        try:
            server = smtplib.SMTP(config.config['smtp_server'])
            server_connected = True
        except:
            config.log_message("Unexpected error while connecting to mail server:" + str(sys.exc_info()[0]), 'error')
            config.log_message("Printing report to console\n")
            config.console_log("\n")
            config.console_log(text_content)

        if server_connected:
            try:
                server.ehlo()
                server.starttls()
                server.login(config.config['username'], config.config['password'])
                server.sendmail(config.config['from'], config.config['to'], message.as_string())
                server.quit()
                config.log_message("Report sent by mail")
            except:
                config.log_message("Unexpected error while sending mail :" + str(sys.exc_info()[0]), 'error')
                config.log_message("Printing report to console")
                config.console_log("\n")
                config.console_log(text_content)
            finally:
                server.close()

    else:
        config.console_log("\n")
        config.console_log(text_content)