Esempio n. 1
0
def download_pic(img):
    img_url = img.split('Θ')[-1]
    pic_name = img.split('Θ')[0] + '.' + img_url.split('.')[-1]
    while True:
        proxy_ip = {
            'http': 'http://' + cpn.get_dx_proxy_ip(),
            'https': 'https://' + cpn.get_dx_proxy_ip()
        }
        try:
            resp = requests.get(img_url, headers=pic_headers, proxies=proxy_ip, timeout=5)
            if resp is not None:
                print("下载:" + resp.request.url)
                with open(pic_save_dir + pic_name, "wb+") as f:
                    f.write(resp.content)
                return None
        except Exception as e:
            pass
Esempio n. 2
0
def get_ajax_data(data):
    while True:
        proxy_ip = {
            'http': 'http://' + cpn.get_dx_proxy_ip(),
            'https': 'https://' + cpn.get_dx_proxy_ip()
        }
        try:
            resp = requests.post(ajax_url, data=data, headers=ajax_headers, proxies=proxy_ip, timeout=5)
            if resp is not None:
                soup = cpn.get_bs(resp.text)
                lis = soup.findAll('li')
                for li in lis:
                    img = li.find('img', attrs={'class': 'cardImage'})['src'][:-4]
                    if not img == '':
                        name = li.find('p', attrs={'class': 'fz14 text cut'}).get_text().strip()
                        if name == '':
                            name = str(int(time.time()))
                        cpn.write_str_data(name + "Θ" + img, pic_urls_file)
                return None
        except Exception as e:
            print(threading.current_thread().name + "~" + str(e))
def catch_page_count():
    proxy_ip = {'https': 'https://' + cpn.get_dx_proxy_ip()}
    try:
        resp = requests.get(base_url, headers=headers, proxies=proxy_ip)
        if resp is not None:
            soup = cpn.get_bs(resp.text)
            # 获得最后一页页码
            last_page_count = soup.find('div',
                                        attrs={'class', 'pagination'
                                               }).findAll('a')[-2].get_text()
            return last_page_count
    except Exception as e:
        print(str(e))
def catch_page_count():
    while True:
        proxy_ip = {'http': 'http://' + cpn.get_dx_proxy_ip()}
        try:
            resp = requests.get(base_url, headers=headers, proxies=proxy_ip, timeout=5)
            if resp is not None:
                print(proxy_ip)
                soup = cpn.get_bs(resp.text)
                # 获得最后一页页码
                last_page_count = soup.find('div', attrs={'class', 'pagination'}).findAll('a')[-2].get_text()
                return last_page_count
        except Exception as e:
            pass
Esempio n. 5
0
def get_toppost100(params):
    while True:
        proxy_ip = {
            'http': 'http://' + cpn.get_dx_proxy_ip(),
            'https': 'https://' + cpn.get_dx_proxy_ip()
        }
        try:
            resp = requests.get(toppost100_url, params=params, headers=toppost100_headers, proxies=proxy_ip, timeout=5)
            if resp is not None:
                print("抓取:" + resp.request.url)
                soup = cpn.get_bs(resp.text)
                ul = soup.find('ul', attrs={'class': 'l-clearfix gridList workImageCards js-workTopList'})
                lis = ul.findAll('li')
                for li in lis:
                    img = li.find('img', attrs={'class': 'cardImage'})['src'][:-4]
                    if not img == '':
                        name = li.find('p', attrs={'class': 'fz14 text cut'}).get_text().strip()
                        if name == '':
                            name = str(int(time.time()))
                        cpn.write_str_data(name + "Θ" + img, pic_urls_file)
                return None
        except Exception as e:
            print(threading.current_thread().name + "~" + str(e))
def catch_ip(url):
    proxy_ip = {'https': 'https://' + cpn.get_dx_proxy_ip()}
    try:
        resp = requests.get(url, headers=headers, proxies=proxy_ip)
        if resp is not None:
            soup = cpn.get_bs(resp.text)
            trs = soup.find('table').findAll('tr')[1:]
            for tr in trs:
                if float(tr.find('div', attrs={'bar'})['title'][:-1]) > 1:
                    tds = tr.findAll('td')
                    cpn.write_xc_ip_file(tds[1].get_text() + ":" +
                                         tds[2].get_text())
    except Exception as e:
        print(str(e))
def catch_ip(url):
    while True:
        proxy_ip = {'http': 'http://' + cpn.get_dx_proxy_ip()}
        print(proxy_ip)
        try:
            resp = requests.get(url, headers=headers, proxies=proxy_ip,timeout=10)
            if resp is not None:
                soup = cpn.get_bs(resp.text)
                trs = soup.find('table').findAll('tr')[1:]
                for tr in trs:
                    if float(tr.find('div', attrs={'bar'})['title'][:-1]) > 1:
                        tds = tr.findAll('td')
                        cpn.write_xc_ip_file(tds[1].get_text() + ":" + tds[2].get_text())
        except Exception as e:
            pass