Example #1
0
def get_proxy(http_type='http'):
    """
    获取可以访问的ip
    """

    proxy = {'type': http_type, 'anonymity': 'high'}
    url = "https://www.forbes.com/innovation/"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'
    }
    timeout = 20
    while True:
        logger.info("Start proxy_pool.get_single_proxy")
        proxy_ip = proxy_pool.get_single_proxy(proxy)

        if proxy_ip is None:
            logger.info("proxy_pool.get_single_proxy return None")
            continue
        proxies = {
            'https': proxy_ip['ip:port']
        }
        try:
            r = requests.get(url, headers=headers, proxies=proxies, timeout=timeout)
            if r.text.find('Forbes Welcome') >= 0:
                logger.info('ip:%s for forbes' % proxy_ip['ip:port'])
                return proxies
        except Exception, e:
            logger.info('Proxy Exception:%s' % e)
Example #2
0
def get_proxy_http():
    proxy = {'type': 'http', 'anonymity': 'high'}
    proxy_ip = None
    while proxy_ip is None:
        proxy_ip = proxy_pool.get_single_proxy(proxy)
        if proxy_ip is None:
            logger.info("No proxy !!!!!!!!!!!!!!!!!!!")
            time.sleep(30)
    return proxy_ip
Example #3
0
def get_proxy(http_type):
    proxy = {'type': http_type, 'anonymity': 'high'}
    proxy_ip = None
    while proxy_ip is None:
        print("Start proxy_pool.get_single_proxy")
        proxy_ip = proxy_pool.get_single_proxy(proxy)
        if proxy_ip is None:
            print("proxy_pool.get_single_proxy return None")
        print(proxy_ip['ip:port'])
        return {proxy_ip['ip']: proxy_ip['port']}
Example #4
0
def get_proxy():
    #proxy = {'type': 'http', 'anonymity': 'high', 'ping': 1, 'transferTime': 1, 'country': 'cn'}
    proxy = {'type': 'http', 'anonymity': 'high'}
    proxy_ip = None
    while proxy_ip is None:
        proxy_ip = proxy_pool.get_single_proxy(proxy)
        if proxy_ip is None:
            time.sleep(60)
            #pass
    return proxy_ip
Example #5
0
def request(url,callback):
    # proxy = {'type': 'https', 'anonymity':'high', 'ping':1, 'transferTime':5}
    proxy = {'type': 'https', 'anonymity':'high'}
    proxy_ip = None
    while proxy_ip is None:
        proxy_ip = proxy_pool.get_single_proxy(proxy)
        if proxy_ip is None:
            time.sleep(60)

    http_client.fetch(url, callback, proxy_host=proxy_ip["ip"], proxy_port=int(proxy_ip["port"]))
Example #6
0
def request(url,callback):
    #proxy = {'type': 'http', 'anonymity':'high', 'ping':1, 'transferTime':5}
    proxy = {'type': 'http', 'anonymity':'high'}
    proxy_ip = None
    while proxy_ip is None:
        proxy_ip = proxy_pool.get_single_proxy(proxy)
        if proxy_ip is None:
            time.sleep(60)
    logger.info("crawler: %s",url)

    http_client.fetch(url, callback, headers=headers, proxy_host=proxy_ip["ip"], proxy_port=int(proxy_ip["port"]),
                      request_timeout=10, connect_timeout=10)
Example #7
0
def request(url, callback):
    global total
    proxy = {'type': 'http', 'anonymity': 'high'}
    proxy_ip = None
    while proxy_ip is None:
        proxy_ip = proxy_pool.get_single_proxy(proxy)
        if proxy_ip is None:
            time.sleep(60)
    http_client.fetch(url,
                      callback,
                      proxy_host=proxy_ip["ip"],
                      proxy_port=int(proxy_ip["port"]),
                      request_timeout=10)
Example #8
0
 def get_proxy(self, http_type):
     proxy = {'type': http_type, 'anonymity':'high'}
     proxy_ip = None
     while proxy_ip is None:
         logger.info("Start proxy_pool.get_single_proxy %s", self.num)
         proxy_ip = proxy_pool.get_single_proxy(proxy)
         if proxy_ip is None:
             logger.info("proxy_pool.get_single_proxy return None")
             if socket.socket.__module__ == "gevent.socket":
                 gevent.sleep(30)
             else:
                 time.sleep(30)
     return proxy_ip
Example #9
0
 def get_proxy(self):
     proxy = {
         "$or": [{
             'type': 'socks4'
         }, {
             'type': 'socks5'
         }],
         'anonymity': 'high'
     }
     proxy_ip = None
     while proxy_ip is None:
         proxy_ip = proxy_pool.get_single_proxy(proxy)
         if proxy_ip is None:
             time.sleep(30)
     return proxy_ip
Example #10
0
def get_proxy():
    proxy = {
        "$or": [{
            'type': 'socks4'
        }, {
            'type': 'socks5'
        }],
        'anonymity': 'high'
    }
    proxy_ip = None
    while proxy_ip is None:
        proxy_ip = proxy_pool.get_single_proxy(proxy)
        if proxy_ip is None:
            logger.info("No proxy !!!!!!!!!!!!!!!!!!!")
            time.sleep(30)
    return proxy_ip
Example #11
0
def request(url, callback):
    # proxy = {'type': 'https', 'anonymity':'high', 'ping':1, 'transferTime':5}
    if url.find("https") >= 0:
        proxy = {'type': 'https', 'anonymity': 'high'}
    else:
        proxy = {'type': 'http', 'anonymity': 'high'}
    proxy_ip = None
    while proxy_ip is None:
        proxy_ip = proxy_pool.get_single_proxy(proxy)
        if proxy_ip is None:
            time.sleep(60)

    # logger.info("Getting :%s",url)
    http_client.fetch(url,
                      callback,
                      proxy_host=proxy_ip["ip"],
                      proxy_port=int(proxy_ip["port"]),
                      request_timeout=10,
                      connect_timeout=10)
Example #12
0
def request(url, callback):
    #proxy = {'type': 'http', 'anonymity':'high', 'ping':1, 'transferTime':5}
    proxy = {'type': 'http', 'anonymity': 'high'}
    proxy_ip = None
    while proxy_ip is None:
        proxy_ip = proxy_pool.get_single_proxy(proxy)
        if proxy_ip is None:
            time.sleep(60)
    #logger.info(url)
    http_header = {
        'User-Agent':
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36'
    }

    http_client.fetch(url,
                      callback,
                      proxy_host=proxy_ip["ip"],
                      proxy_port=int(proxy_ip["port"]),
                      request_timeout=60,
                      headers=http_header)
Example #13
0
    def init_http_session(self,url):
        if url.lower().startswith("https"):
            http_type = "https"
        else:
            http_type = "http"

        proxy = {'type': http_type, 'anonymity':'high'}
        proxy_ip = None
        while proxy_ip is None:
            proxy_ip = proxy_pool.get_single_proxy(proxy)
            if proxy_ip is None:
                time.sleep(60)
        logger.info("Proxy IP(%s): %s" % (http_type, proxy_ip))

        if self.http_session is None:
            self.http_session = requests.Session()
        self.http_session.proxies={http_type:"%s://%s:%s" % (http_type, proxy_ip["ip"], proxy_ip["port"])}

        if self.header:
            self.http_session.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36"
Example #14
0
def request(name, callback):
    global total
    proxy = {'type': 'http', 'anonymity': 'high'}
    proxy_ip = None
    while proxy_ip is None:
        proxy_ip = proxy_pool.get_single_proxy(proxy)
        if proxy_ip is None:
            time.sleep(60)

    if name is None:
        total -= 1
        logger.info(total)
        if total <= 0:
            begin()
        return
    url = 'http://index.so.com/index.php?a=overviewJson&q=' + name + '&area=%E5%85%A8%E5%9B%BD'
    http_client.fetch(url,
                      callback,
                      proxy_host=proxy_ip["ip"],
                      proxy_port=int(proxy_ip["port"]),
                      request_timeout=10)
Example #15
0
def get_proxy():
    while True:
        try:
            item = proxy_pool.get_single_proxy(Rule)
            ip, port = item['ip'], item['port']
            ip_port = ip + ':' + str(port)
            logger.info('%s:%s' % (ip, port))
            url = "http://www.cyzone.cn/event"
            headers = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'
            }
            timeout = 10
            proxies = {
                'http': ip_port
            }
            r = requests.get(url, headers=headers, proxies=proxies, timeout=timeout)
            if r.text.find('cyzone') >= 0:
                logger.info('ip:%s for cyzone' % ip_port)
                return proxies
        except Exception, e:
            logger.info('Proxy Exception:%s' % e)
Example #16
0
def get_proxy2():
    while True:
        try:
            item = proxy_pool.get_single_proxy(Rule2)
            ip, port = item['ip'], item['port']
            ip_port = ip + ':' + str(port)
            print('%s:%s' % (ip, port))
            url = "https://www.baidu.com/"
            headers = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'
            }
            timeout = 10
            proxies = {
                'https': ip_port
            }
            r = requests.get(url, headers=headers, proxies=proxies, timeout=timeout)
            if r.text.find('hao123') >= 0:
                print('ip:%s for qimai' % ip_port)
                return ip, port
        except Exception, e:
            print('Proxy Exception:%s' % e)
Example #17
0
def get_session(proxy, new, agent):
    global http_session
    if new or http_session == None:
        http_session = requests.Session()
        #http_session.mount('http', HTTPAdapter(max_retries=5))

        if agent:
            user_agent = get_a_user_agent()
            # print user_agent
            http_session.headers["User-Agent"] = user_agent

        proxy_ip = None
        while proxy_ip is None:
            proxy_ip = proxy_pool.get_single_proxy(proxy)
            if proxy_ip is None:
                time.sleep(60)
        logger.info(proxy_ip)

        #http_session.proxies={proxy['type']:"http://%s:%s" % (proxy_ip["ip"], proxy_ip["port"])}
        http_session.proxies={"http":"http://%s:%s" % (proxy_ip["ip"], proxy_ip["port"]),
                              "https":"http://%s:%s" % (proxy_ip["ip"], proxy_ip["port"])}

    return http_session
Example #18
0
def request(url, callback):
    proxy = {"http_type": "Socks4"}
    proxy_ip = None
    while proxy_ip is None:
        proxy_ip = proxy_pool.get_single_proxy(proxy)
        #logger.info(proxy_ip)
        if proxy_ip is None:
            time.sleep(60)
    headers = {}
    headers[
        "User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36"
    headers["Host"] = "itunes.apple.com"
    headers["Accept-Language"] = "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3"
    headers["Accept-Encoding"] = ""

    if proxy["http_type"].lower() == "socks4":
        http_request = tornado.httpclient.HTTPRequest(
            url,
            prepare_curl_callback=prepare_curl_socks4,
            proxy_host=proxy_ip["ip"],
            proxy_port=int(proxy_ip["port"]),
            headers=headers,
            validate_cert=False,
            request_timeout=10,
            connect_timeout=10)
    else:
        http_request = tornado.httpclient.HTTPRequest(
            url,
            prepare_curl_callback=prepare_curl_socks5,
            proxy_host=proxy_ip["ip"],
            proxy_port=int(proxy_ip["port"]),
            headers=headers,
            request_timeout=10,
            connect_timeout=10)

    logger.info("Proxy: %s:%s", proxy_ip["ip"], proxy_ip["port"])
    http_client.fetch(http_request, callback)