def downloadTs(self, thread_num): while not self.queue.empty(): ts = self.queue.get() try: print(thread_num, "下载ts", ts) ind = ts["tsInd"] res = Downloader.get(ts["url"]) self.data[ind] = res.content except Exception as e: print("下载异常", e, ts)
def getSiteInfo(siteId: int): url = GET_SITE_INFO_URL.format(siteId) s = Setting() s.setParams(headers=HEADERS) return Downloader.get(url, setting=s).json()["data"]["site"]
def main_downloader(): proxies = [ ["222.189.191.53", "9999"], ["182.111.64.7", "41766"], ["115.151.3.16", "9999"], ["121.233.206.151", "9999"], ["116.209.52.143", "9999"], ["1.198.72.234", "9999"], ["121.61.1.48", "9999"], ["183.148.133.22", "9999"], ["115.239.24.166", "9999"], ["110.52.235.226", "9999"], ["113.122.168.246", "9999"], ["59.62.165.99", "808"], ["218.91.112.42", "9999"], ["111.177.161.70", "9999"], ["110.52.235.231", "9999"], ["180.116.48.122", "9999"], ["113.122.168.23", "9999"], ["49.77.59.235", "8118"], ["110.52.235.173", "9999"], ["111.177.187.211", "9999"], ["124.94.192.206", "9999"], ["125.123.137.71", "9999"], ["121.61.1.222", "9999"], ["111.72.154.47", "9999"], ["125.123.138.26", "9999"], ["110.52.235.244", "9999"], ["121.61.24.254", "9999"], ["111.177.170.35", "9999"], ["42.53.73.131", "9999"], ["111.177.180.221", "9999"], ["111.177.170.11", "9999"], ["60.173.244.133", "41306"], ["116.209.59.131", "9999"], ["221.235.234.199", "9999"], ["110.52.235.76", "9999"], ["121.61.24.242", "9999"], ["112.87.69.158", "9999"], ["59.62.166.60", "9999"], ["59.62.166.172", "9999"], ["61.184.43.129", "9999"], ["110.52.235.70", "808"], ["116.209.56.164", "9999"], ["171.80.152.26", "9999"], ["110.52.235.79", "9999"], ["116.209.55.171", "9999"], ["116.209.52.190", "9999"], ["118.187.58.34", "53281"], ["110.52.235.67", "9999"], ["115.212.81.84", "8118"], ["121.31.158.51", "8123"], ["116.209.56.95", "9999"], ["116.209.56.179", "9999"], ["183.148.145.229", "9999"], ["121.61.3.223", "9999"], ["101.236.42.63", "8866"], ["111.176.31.69", "9999"], ["116.209.54.22", "9999"], ["116.209.57.233", "9999"], ["125.123.136.232", "9999"], ["27.29.95.209", "9999"], ["116.209.57.22", "9999"], ["112.85.174.44", "9999"], ["61.183.233.6", "54896"], ["116.209.59.150", "9999"], ["116.209.55.191", "9999"], ["116.209.56.125", "9999"], ["125.123.142.141", "9999"], ["59.62.167.130", "53128"], ["175.148.77.188", "1133"], ["116.209.52.177", "9999"], ["125.123.138.171", "9999"], ["111.181.65.0", "9999"], ["1.192.246.197", "9999"], ["111.177.179.8", "9999"], ["110.52.235.86", "9999"], ["120.35.12.105", "3128"], ["116.209.57.16", "9999"], ["59.45.16.10", "59156"], ["111.181.66.158", "9999"], ["112.85.130.51", "9999"], ["116.208.55.173", "9999"], ["115.151.5.177", "9999"], ["113.121.147.233", "9999"], ["171.80.0.190", "9999"], ["110.52.235.139", "9999"], ["121.61.3.176", "9999"], ["110.52.235.71", "9999"], ["110.52.235.114", "9999"], ["112.85.165.66", "9999"], ["116.209.59.174", "9999"], ["121.61.1.9", "9999"], ["112.85.174.93", "9999"], ["123.163.115.203", "9999"], ["180.119.141.144", "9999"], ["116.209.54.168", "9999"], ["116.209.58.45", "9999"], ["125.123.142.215", "9999"], ["110.52.235.196", "9999"], ] for proxy in proxies: setting = Setting() setting.set_proxies(proxy[0], proxy[1]) setting.timeout = 10 setting.repeat = 1 print("使用代理", proxy) try: res = Downloader.get("http://icanhazip.com", setting=setting) print("success", proxy, res.text, res.status_code) except Exception as e: print("fail")
from dio_core.network.downloader import Downloader from dio_core.utils import file_util, time_util rows = file_util.readRows( "/home/changshuai/PycharmProjects/dio_core/Test/Data/kill_job_urls.txt") for row in rows: url = "http://api.rhino.datatub.com/common/job/kill?job_id={}&token=5fa92f2a597cc60201780504be1028a7".format( row) res = Downloader.get(url) print(row, res.text, url) time_util.sleep(3)
def getTsUrls(self): res = Downloader.get(self.m3u8) result = text_util.get_all_match("#EXTINF:.*,\n(.*)", res.text) return zip(range(len(result)), result)
import jsonpath from dio_core.network.downloader import Downloader from dio_core.utils import json_util, url_util, time_util from dio_core_test.utils import text_util keyword = "女装" for i in range(100): html = Downloader.get( "https://shopsearch.taobao.com/browse/shop_search.htm?q={}&s={}". format(keyword, i * 20)).text data = json_util.to_python( text_util.get_first_match(html, "g_page_config = (.*);")) for shop in jsonpath.jsonpath(data, "$.mods.shoplist.data.shopItems.*"): if "shopIcon" in shop and "title" in shop["shopIcon"] and "天猫" in shop[ "shopIcon"]["title"]: print("天猫\t{}\t{}".format(url_util.patch_url(shop["shopUrl"]), shop["procnt"])) else: print("淘宝\t{}\t{}".format(url_util.patch_url(shop["shopUrl"]), shop["procnt"])) time_util.sleep(5)