コード例 #1
0
 def download(self,unfinished_item,need_mark_err=True):
     #对于单条item的下载处理
     #print(unfinished_item)
     try:
         url = unfinished_item[0]
         google_id = unfinished_item[1]
         #若查询到该文件名不存在,即没有被下载,则执行下载
         save_name = google_id + '.pdf'
         if save_name not in self.download_folder_files:
             resp = requests.get(
                 url=url, verify=False,
                 headers={'User-agent':get_one_random_ua()}
             )
             with open(
                 os.path.join(self.save_folder, save_name), 'wb'
             ) as pdf_file:
                 pdf_file.write(resp.content)
                 file_kb = os.path.getsize(self.save_folder+save_name)*1.0 / 1024
                 if file_kb > 3:
                     self.download_folder_files.append(save_name)
                     print('Downloader:\n\t'+ save_name + '( {} Kb ) wrote ok...'.format(file_kb))
                 else:
                     raise Exception('File size = {}k < 10k'.format(file_kb))
         #下载完成后,在数据库中做记录:已下载
         self.mark(google_id,ok=True)
         return True
     except Exception as e:
         if need_mark_err:
             self.mark(google_id,ok=False,err=e)
         print(str(e))
         return False
コード例 #2
0
def request_with_random_ua(url, timeout=3):
    for i in range(6):
        try:
            return requests.get(url=url,
                                timeout=timeout,
                                headers={'User-Agent': get_one_random_ua()})
        except Exception as e:
            print('[Error]request_with_random_ua :%s' % str(e))
    return None
コード例 #3
0
def test_port(port_num):
    proxies = {
        "http": "socks5://127.0.0.1:{}".format(port_num),
        "https": "socks5://127.0.0.1:{}".format(port_num)
    }
    try:
        r = requests.get(url="https://api.ipify.org/",
                         proxies=proxies,
                         timeout=10,
                         headers={'User-Agent': get_one_random_ua()})
        return r.text
    except:
        return None
コード例 #4
0
def request_with_proxy(url,
                       timeout=14,
                       use_ss=False,
                       sleep=15,
                       no_proxy_test=False):
    headers = {'User-Agent': get_one_random_ua()}
    if no_proxy_test:
        return requests.get(url, headers=headers, timeout=timeout)
    time.sleep(sleep)
    if not use_ss:
        '''
        for i in range(100):
            proxy_port = rand_port(9054, 9155, [])
            if test_port(proxy_port):
                #检测端口有效再request
                break
            if i==20:
                print('No available port...check tor')
                return None
        '''
        proxy_port = rand_port(9054, 9155, [])
        proxies = {
            "http": "socks5://127.0.0.1:{}".format(proxy_port),
            "https": "socks5://127.0.0.1:{}".format(proxy_port)
        }
        return requests.get(url,
                            proxies=proxies,
                            headers=headers,
                            timeout=timeout,
                            verify=False)
    else:
        #port_range = (1080, 1108)
        error_ports = [1094, 1098]
        port = rand_port(1080, 1108, error_ports)
        proxies = {
            "http": "socks5://127.0.0.1:{}".format(port),
            "https": "socks5://127.0.0.1:{}".format(port)
        }
        return requests.get(url,
                            proxies=proxies,
                            timeout=timeout,
                            headers=headers,
                            verify=False)
コード例 #5
0
def request_with_proxy(url,
                       gap_time=15,
                       timeout=14,
                       use_ss=False,
                       no_proxy_test=False,
                       use_self_pool=False):
    headers = {'User-Agent': get_one_random_ua()}
    if use_self_pool:
        return req_with_proxy_pool(url, headers)
    if no_proxy_test:
        return requests.get(url, headers=headers, timeout=timeout)
    time.sleep(gap_time)
    if not use_ss:
        proxy_port = rand_port(9054, 10055, [])
        #print('use port {}...'.format(proxy_port))
        proxies = {
            "http": "socks5://127.0.0.1:{}".format(proxy_port),
            "https": "socks5://127.0.0.1:{}".format(proxy_port)
        }
        return requests.get(url,
                            proxies=proxies,
                            headers=headers,
                            timeout=timeout,
                            verify=False)
    else:
        #port_range = (1080, 1108)
        error_ports = [1094, 1098]
        port = rand_port(1080, 1108, error_ports)
        proxies = {
            "http": "socks5://127.0.0.1:{}".format(port),
            "https": "socks5://127.0.0.1:{}".format(port)
        }
        return requests.get(url,
                            proxies=proxies,
                            timeout=timeout,
                            headers=headers,
                            verify=False)
コード例 #6
0
def request_with_random_ua(url, timeout=3):
    return requests.get(url=url,
                        timeout=timeout,
                        headers={'User-Agent': get_one_random_ua()})