Example #1
0
def get_conn():
    """
    链接Mongodb
    """
    if not hasattr(g, 'mongodb_conn'):
        g.mongodb_conn = MongodbClient()
    return g.mongodb_conn
Example #2
0
 def valid_proxy(self, cycle=VALID_CHECK_CYCLE):
     """
     从数据库中拿到2/3半代理进行检查
     """
     conn = MongodbClient()
     tester = ProxyTester()
     while True:
         print('Refreshing ip...')
         count = int(5 * conn.get_nums / 6)
         if count == 0:
             print('Waiting for adding...')
             time.sleep(cycle)
             continue
         raw_proxies = conn.get(count)
         tester.set_raw_proxies(raw_proxies)
         tester.test()
         time.sleep(cycle)
Example #3
0
class ProxyTester(object):
    def __init__(self):
        self._raw_proxies = None

    def set_raw_proxies(self, proxies):
        self._raw_proxies = proxies
        self._conn = MongodbClient()

    async def test_single_proxy(self, proxy):
        """
        测试一个代理,如果有效,将他放入usable-proxies
        """
        scheme = 'http://'
        test_url = HTTPS_TEST_URL
        if isinstance(proxy, bytes):
            proxy = proxy.decode('utf-8')
        real_proxy = scheme + proxy

        async def test_proxy(https=True):
            name = 'https' if https else 'http'
            async with session.get(test_url, proxy=real_proxy,
                                   timeout=10) as response:
                if response.status == 200 or response.status == 429:
                    self._conn.put(proxy, https)
                    print('Valid {} proxy'.format(name), proxy)
                else:
                    print('Invalid {} status'.format(name), response.status,
                          proxy)
                    self._conn.delete(proxy)

        try:
            async with aiohttp.ClientSession() as session:
                try:
                    await test_proxy()
                except:
                    try:
                        test_url = HTTP_TEST_URL
                        await test_proxy(False)
                    except Exception as e:
                        self._conn.delete(proxy)
                        print('Invalid proxy', proxy, e)
                        # print('session error', e)
        except Exception as e:
            print(e)

    def test(self):
        """
        异步测试所有代理
        """
        print('Tester is working...')
        try:
            loop = asyncio.get_event_loop()
            tasks = [
                self.test_single_proxy(proxy) for proxy in self._raw_proxies
            ]
            loop.run_until_complete(asyncio.wait(tasks))
        except ValueError as e:
            time_format = '%Y-%m-%d %H:%M:%S'
            format_t = time.strftime(time_format)
            print(format_t, 'Async Error:', e)
Example #4
0
 def check_pool(self,
                lower_threshold=POOL_LOWER_THRESHOLD,
                upper_threshold=POOL_UPPER_THRESHOLD,
                cycle=POOL_LEN_CHECK_CYCLE):
     """
     如果代理数量少于最低阈值,添加代理
     """
     conn = MongodbClient()
     adder = PoolAdder(upper_threshold)
     while True:
         if max(conn.get_count) <= lower_threshold:
             adder.add_to_pool()
         time.sleep(cycle)
Example #5
0
 def __init__(self, threshold):
     self._threshold = threshold
     self._conn = MongodbClient()
     self._tester = ProxyTester()
     self._crawler = ProxyGetter()
Example #6
0
#!/usr/bin/python
# -*- coding:utf-8 -*-
# __author__ = 'yukun'

import json
from datetime import datetime

from .utils import parse_url
from db import MongodbClient


conn = MongodbClient()

def get_img_url():
    params = {
        'format': 'js',
        'idx': 0,
        'n': 1,
        'pid': 'hp'
    }
    base_url = 'https://www.bing.com/HPImageArchive.aspx'
    resp = parse_url(base_url, params).text
    result = json.loads(resp)
    if result:
        bing_url = 'https://www.bing.com'
        img_url = bing_url + result['images'][0]['url']

        img_copyright = result['images'][0]['copyright']
        return {'url': img_url, 'copyright': img_copyright}

    return None
Example #7
0
 def set_raw_proxies(self, proxies):
     self._raw_proxies = proxies
     self._conn = MongodbClient()