def closeWorker(cookie, group="full"):
    """
    关闭 worker
    :return:
    """
    headers = {
        'Cookie': cookie,
        'Origin': "http://rhino.dev.datatub.com",
        'Accept-Encoding': "gzip, deflate",
        'Accept-Language': "zh-CN,zh;q=0.9",
        'User-Agent':
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36",
        'Content-Type': "application/x-www-form-urlencoded; charset=UTF-8",
        'Accept': "*/*",
        'Referer': "http://rhino.dev.datatub.com/",
        'X-Requested-With': "XMLHttpRequest",
        'Connection': "keep-alive"
    }
    setting = Setting()
    setting.headers = headers
    setting.request = "POST"

    res = Downloader(setting=setting).payload(CLOSE_WORKER_URL,
                                              "workerGroup={}".format(group))
    LOG.info("CLOSE WORKER 结果 {}".format(res.json()))
    if res.json()["code"] == 0:
        return True
def clearWorker(cookie):
    headers = {
        'Cookie': "{}".format(cookie),
        'User-Agent':
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
        'Host': "rhino.dev.datatub.com"
    }
    setting = Setting()
    setting.headers = headers
    setting.request = "POST"
    res = Downloader(setting=setting).request(CLEAR_WORKER_URL)
    LOG.info("Clear 结果 {}".format(res.json()))
    if res.json()["code"] == 0:
        return True
Beispiel #3
0
def init(url):
    soup = Downloader.get_with_bs4(url).soup
    m3u8_url = soup.select_one(".yunbofang video").attrs["src"]

    s = Setting()
    s.returnFailReq = True
    text = Downloader.get(m3u8_url, setting=s).text
    result = text_util.get_all_match("index\d+.ts", text)
    ts = result[-1]

    d = text_util.get_first_match(ts, "(\d+)")

    url_front = m3u8_url.split("index")[0]
    for i in range(int(d) + 1):
        q.put({"url": url_front + "index" + str(i) + ".ts", "ind": i})
Beispiel #4
0
def buildSite(body):
    """创建站点"""
    payload = url_util.urlencode(body)
    s = Setting(request="POST", headers=HEADERS)
    res = Downloader.payload(MAIN_URL, data=payload, setting=s)
    print(res.json())
    return res.json()["data"]["id"]
def selectWorker(cookie):
    """
    查询worker
    :return:
    """
    headers = {
        'Cookie': "{}".format(cookie),
        'User-Agent':
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
        'Host': "rhino.dev.datatub.com"
    }
    setting = Setting()
    setting.headers = headers
    json = Downloader(setting=setting).get_json(SELECT_WORKER_URL)
    LOG.log("SELECT 结果 {}".format(json))
    if json["code"] == 0:
        return True
def addWorker(workerNum, group, cookie):
    """
    添加 worker
    :return:
    """
    headers = {
        'Cookie': "{}".format(cookie),
        'User-Agent':
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
        'Host': "rhino.dev.datatub.com"
    }
    setting = Setting()
    setting.headers = headers
    json = Downloader(setting=setting).get_json(
        ADD_WORKER_URL.format(workerNum, group))
    LOG.info("ADD 结果 {}".format(json))
    if json["code"] == 0:
        return True
def login():
    headers = {
        'Host': "rhino.dev.datatub.com",
        'Content-Length': "35",
        'Accept': "*/*",
        'Origin': "http://rhino.dev.datatub.com",
        'X-Requested-With': "XMLHttpRequest",
        'User-Agent':
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36",
        'Content-Type': "application/x-www-form-urlencoded; charset=UTF-8",
        'Referer': "http://rhino.dev.datatub.com/login.html",
        'Accept-Encoding': "gzip, deflate",
        'Accept-Language': "zh-CN,zh;q=0.9",
        'Connection': "close"
    }

    payload = "userName=changshuai&passWord=123456"
    setting = Setting()
    setting.headers = headers
    setting.request = "POST"
    res = Downloader(setting=setting).payload(LOGIN_URL, payload)
    return res.cookies.get("JSESSIONID")
Beispiel #8
0
def getSiteInfo(siteId: int):
    url = GET_SITE_INFO_URL.format(siteId)
    s = Setting()
    s.setParams(headers=HEADERS)
    return Downloader.get(url, setting=s).json()["data"]["site"]
Beispiel #9
0
def testTp(body):
    payload = url_util.urlencode(body)
    s = Setting(request="POST", headers=HEADERS)
    res = Downloader.payload(TEST_TEMPLATE_URL, data=payload, setting=s)
    return res.json()["data"]
Beispiel #10
0
def buildTask(body):
    payload = url_util.urlencode(body)
    s = Setting(request="POST", headers=HEADERS)
    res = Downloader.payload(BUILD_TASK_URL, data=payload, setting=s)
    print(res.json())
    return res.json()["data"]["id"]
Beispiel #11
0
def buildTemplate(body):
    """创建模板"""
    payload = url_util.urlencode(body)
    s = Setting(request="POST", headers=HEADERS)
    res = Downloader.payload(BUILD_TEMPLATE_URL, data=payload, setting=s)
    return res.json()["data"]["id"]
SHOP_SEARCH_URL = "http://i.waimai.meituan.com/openh5/homepage/poilist?_={}"
FOOD_URL = "http://i.waimai.meituan.com/openh5/poi/food"
COMMENT_URL = "http://i.waimai.meituan.com/openh5/poi/comments"

# mongodb 配置
client = pymongo.MongoClient(host='localhost', port=27017)
db = client['meituan']
meituanwaimai_shop_list = db['meituanwaimai_shop_list_v1']
meituanwaimai_search_list = db['meituanwaimai_search_list']
meituanwaimai_food_list = db['meituanwaimai_food_list_v1']
meituanwaimai_comment_list = db['meituanwaimai_comment_list_v1']
decrypt_collection = db["meituanwaimai_decrypt"]

decrypt = decrypt_collection.find_one({"_id": 1})["match"]

setting = Setting()
setting.headers = {
    "Origin": "http://h5.waimai.meituan.com",
    "User-Agent":
    "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Mobile Safari/537.36",
    "Referer": "http://h5.waimai.meituan.com/waimai/mindex/home",
    "Host": "i.waimai.meituan.com",
    # "Cookie": '_lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; _lxsdk_cuid=16cae674b36c8-01bf1cb1fd7b27-29792349-5df1a-16cae674b36c8; _ga=GA1.3.2021950140.1566294101; _gid=GA1.3.138356466.1566294101; terminal=i; w_utmz="utm_campaign=(direct)&utm_source=5000&utm_medium=(none)&utm_content=(none)&utm_term=(none)"; w_uuid=23UFuT0gtxcARBkc-Il6MOl2sihlNPJY7at6eTLmNTIUTgjS2XMaI4KAEWZGc_jm; utm_source=0; wx_channel_id=0; JSESSIONID=ok0xdngf40ahhx0dbvhg0atd; webp=1; __mta=45921691.1566294104072.1566294104072.1566294104072.1; w_addr=; w_actual_lat=23124630; w_actual_lng=113361990; wm_order_channel=default; utm_source=; au_trace_key_net=default; iuuid=EDAF663058DC10DD9DA74BF73550627E0BA82B4963E47B9FAEA941817D9F2F1C; token=yDDjkm8ceYOgcuQ4qyaFMy3swNQAAAAA5wgAAHkBWapv9B8SL5PDs_OYrNT8vbNXv7Ua7Ty5u_9n3opjKT5T8ZXLpsrAm3n9Xf-cIg; mt_c_token=yDDjkm8ceYOgcuQ4qyaFMy3swNQAAAAA5wgAAHkBWapv9B8SL5PDs_OYrNT8vbNXv7Ua7Ty5u_9n3opjKT5T8ZXLpsrAm3n9Xf-cIg; oops=yDDjkm8ceYOgcuQ4qyaFMy3swNQAAAAA5wgAAHkBWapv9B8SL5PDs_OYrNT8vbNXv7Ua7Ty5u_9n3opjKT5T8ZXLpsrAm3n9Xf-cIg; userId=64012031; cssVersion=e09c1174; _lxsdk=EDAF663058DC10DD9DA74BF73550627E0BA82B4963E47B9FAEA941817D9F2F1C; openh5_uuid=EDAF663058DC10DD9DA74BF73550627E0BA82B4963E47B9FAEA941817D9F2F1C; uuid=EDAF663058DC10DD9DA74BF73550627E0BA82B4963E47B9FAEA941817D9F2F1C; w_token=yDDjkm8ceYOgcuQ4qyaFMy3swNQAAAAA5wgAAHkBWapv9B8SL5PDs_OYrNT8vbNXv7Ua7Ty5u_9n3opjKT5T8ZXLpsrAm3n9Xf-cIg; openh5_uuid=EDAF663058DC10DD9DA74BF73550627E0BA82B4963E47B9FAEA941817D9F2F1C,_lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; _lxsdk_cuid=16cae674b36c8-01bf1cb1fd7b27-29792349-5df1a-16cae674b36c8; _ga=GA1.3.2021950140.1566294101; _gid=GA1.3.138356466.1566294101; terminal=i; w_utmz="utm_campaign=(direct)&utm_source=5000&utm_medium=(none)&utm_content=(none)&utm_term=(none)"; w_uuid=23UFuT0gtxcARBkc-Il6MOl2sihlNPJY7at6eTLmNTIUTgjS2XMaI4KAEWZGc_jm; utm_source=0; wx_channel_id=0; JSESSIONID=ok0xdngf40ahhx0dbvhg0atd; webp=1; __mta=45921691.1566294104072.1566294104072.1566294104072.1; w_addr=; w_actual_lat=23124630; w_actual_lng=113361990; wm_order_channel=default; utm_source=; au_trace_key_net=default; iuuid=EDAF663058DC10DD9DA74BF73550627E0BA82B4963E47B9FAEA941817D9F2F1C; token=yDDjkm8ceYOgcuQ4qyaFMy3swNQAAAAA5wgAAHkBWapv9B8SL5PDs_OYrNT8vbNXv7Ua7Ty5u_9n3opjKT5T8ZXLpsrAm3n9Xf-cIg; mt_c_token=yDDjkm8ceYOgcuQ4qyaFMy3swNQAAAAA5wgAAHkBWapv9B8SL5PDs_OYrNT8vbNXv7Ua7Ty5u_9n3opjKT5T8ZXLpsrAm3n9Xf-cIg; oops=yDDjkm8ceYOgcuQ4qyaFMy3swNQAAAAA5wgAAHkBWapv9B8SL5PDs_OYrNT8vbNXv7Ua7Ty5u_9n3opjKT5T8ZXLpsrAm3n9Xf-cIg; userId=64012031; cssVersion=e09c1174; _lxsdk=EDAF663058DC10DD9DA74BF73550627E0BA82B4963E47B9FAEA941817D9F2F1C; openh5_uuid=EDAF663058DC10DD9DA74BF73550627E0BA82B4963E47B9FAEA941817D9F2F1C; uuid=EDAF663058DC10DD9DA74BF73550627E0BA82B4963E47B9FAEA941817D9F2F1C; w_token=yDDjkm8ceYOgcuQ4qyaFMy3swNQAAAAA5wgAAHkBWapv9B8SL5PDs_OYrNT8vbNXv7Ua7Ty5u_9n3opjKT5T8ZXLpsrAm3n9Xf-cIg; openh5_uuid=EDAF663058DC10DD9DA74BF73550627E0BA82B4963E47B9FAEA941817D9F2F1C; uuid=86f1d5d1-b229-4c12-a7bd-10dc5ef16e45; terminal=i; w_utmz="utm_campaign=(direct)&utm_source=5000&utm_medium=(none)&utm_content=(none)&utm_term=(none)"; utm_source=; au_trace_key_net=default; wm_order_channel=default; _lx_utm=utm_source%3D60066; cssVersion=d70fc3f0; w_actual_lat=23125752; w_actual_lng=113334715; w_latlng=0,0; w_token=yDDjkm8ceYOgcuQ4qyaFMy3swNQAAAAA5wgAAHkBWapv9B8SL5PDs_OYrNT8vbNXv7Ua7Ty5u_9n3opjKT5T8ZXLpsrAm3n9Xf-cIg; openh5_uuid=EDAF663058DC10DD9DA74BF73550627E0BA82B4963E47B9FAEA941817D9F2F1C; w_visitid=58b6dcfb-0534-4b2a-9faa-a6f00f06d908',
    "Accept": "application/json",
    "Content-Type": "application/x-www-form-urlencoded",
    "Accept-Language": "zh-CN,zh;q=0.9",
}

post_data_of_food_list = {
    "geoType":
    "2",
Beispiel #13
0
def main_downloader():
    proxies = [
        ["222.189.191.53", "9999"],
        ["182.111.64.7", "41766"],
        ["115.151.3.16", "9999"],
        ["121.233.206.151", "9999"],
        ["116.209.52.143", "9999"],
        ["1.198.72.234", "9999"],
        ["121.61.1.48", "9999"],
        ["183.148.133.22", "9999"],
        ["115.239.24.166", "9999"],
        ["110.52.235.226", "9999"],
        ["113.122.168.246", "9999"],
        ["59.62.165.99", "808"],
        ["218.91.112.42", "9999"],
        ["111.177.161.70", "9999"],
        ["110.52.235.231", "9999"],
        ["180.116.48.122", "9999"],
        ["113.122.168.23", "9999"],
        ["49.77.59.235", "8118"],
        ["110.52.235.173", "9999"],
        ["111.177.187.211", "9999"],
        ["124.94.192.206", "9999"],
        ["125.123.137.71", "9999"],
        ["121.61.1.222", "9999"],
        ["111.72.154.47", "9999"],
        ["125.123.138.26", "9999"],
        ["110.52.235.244", "9999"],
        ["121.61.24.254", "9999"],
        ["111.177.170.35", "9999"],
        ["42.53.73.131", "9999"],
        ["111.177.180.221", "9999"],
        ["111.177.170.11", "9999"],
        ["60.173.244.133", "41306"],
        ["116.209.59.131", "9999"],
        ["221.235.234.199", "9999"],
        ["110.52.235.76", "9999"],
        ["121.61.24.242", "9999"],
        ["112.87.69.158", "9999"],
        ["59.62.166.60", "9999"],
        ["59.62.166.172", "9999"],
        ["61.184.43.129", "9999"],
        ["110.52.235.70", "808"],
        ["116.209.56.164", "9999"],
        ["171.80.152.26", "9999"],
        ["110.52.235.79", "9999"],
        ["116.209.55.171", "9999"],
        ["116.209.52.190", "9999"],
        ["118.187.58.34", "53281"],
        ["110.52.235.67", "9999"],
        ["115.212.81.84", "8118"],
        ["121.31.158.51", "8123"],
        ["116.209.56.95", "9999"],
        ["116.209.56.179", "9999"],
        ["183.148.145.229", "9999"],
        ["121.61.3.223", "9999"],
        ["101.236.42.63", "8866"],
        ["111.176.31.69", "9999"],
        ["116.209.54.22", "9999"],
        ["116.209.57.233", "9999"],
        ["125.123.136.232", "9999"],
        ["27.29.95.209", "9999"],
        ["116.209.57.22", "9999"],
        ["112.85.174.44", "9999"],
        ["61.183.233.6", "54896"],
        ["116.209.59.150", "9999"],
        ["116.209.55.191", "9999"],
        ["116.209.56.125", "9999"],
        ["125.123.142.141", "9999"],
        ["59.62.167.130", "53128"],
        ["175.148.77.188", "1133"],
        ["116.209.52.177", "9999"],
        ["125.123.138.171", "9999"],
        ["111.181.65.0", "9999"],
        ["1.192.246.197", "9999"],
        ["111.177.179.8", "9999"],
        ["110.52.235.86", "9999"],
        ["120.35.12.105", "3128"],
        ["116.209.57.16", "9999"],
        ["59.45.16.10", "59156"],
        ["111.181.66.158", "9999"],
        ["112.85.130.51", "9999"],
        ["116.208.55.173", "9999"],
        ["115.151.5.177", "9999"],
        ["113.121.147.233", "9999"],
        ["171.80.0.190", "9999"],
        ["110.52.235.139", "9999"],
        ["121.61.3.176", "9999"],
        ["110.52.235.71", "9999"],
        ["110.52.235.114", "9999"],
        ["112.85.165.66", "9999"],
        ["116.209.59.174", "9999"],
        ["121.61.1.9", "9999"],
        ["112.85.174.93", "9999"],
        ["123.163.115.203", "9999"],
        ["180.119.141.144", "9999"],
        ["116.209.54.168", "9999"],
        ["116.209.58.45", "9999"],
        ["125.123.142.215", "9999"],
        ["110.52.235.196", "9999"],
    ]
    for proxy in proxies:
        setting = Setting()
        setting.set_proxies(proxy[0], proxy[1])
        setting.timeout = 10
        setting.repeat = 1
        print("使用代理", proxy)
        try:
            res = Downloader.get("http://icanhazip.com", setting=setting)
            print("success", proxy, res.text, res.status_code)
        except Exception as e:
            print("fail")
Beispiel #14
0
# @Time         : 18-2-13 下午8:46
# @Author       : DioMryang
# @File         : test_downloader.py
from unittest import TestCase

# @Description  :
from dio_core.network.downloader import Downloader
from dio_core.network.downloader.downloader import Setting

page = 0
while True:
    url = (
        "https://www.google.com/search?q=经贸磋商&tbs=cdr:1,cd_min:1/21/2019,cd_max:1/"
        + "28/2019&tbm=nws&start={}")
    setting = Setting()
    setting.set_proxies("116.31.102.3", "57003")

    soup = Downloader.get_with_bs4(url.format(page), setting=setting).soup

    result = soup.select(".l.lLrAF")
    if not result:
        break
    for aTag in soup.select(".l.lLrAF"):
        print(aTag["href"])
    page += 10
    print()
Beispiel #15
0
import time
import traceback

from dio_core.network.downloader.downloader import Downloader
from dio_core.network.downloader.downloader import Setting
from dio_core.utils import file_util, parse_util, json_util
from dio_core_test.utils import text_util

setting = Setting()
setting.headers["Cookie"] = ("DSCKID=91aecd4c-9d62-49ad-ae1b-9eb177c787ac; JSESSIONID=5AD6666BE97FEC415491055AFAFA60FE;"
                             " seraph.rememberme.cookie=13124%3A5ad60cddb478faeca22570e7f156f07e5138011a; atlassian.xsr"
                             "f.token=BP2B-R8C4-N6CQ-HZD4_9ab65e787a0932dd3b1abcc52a792e40d154415c_lin; jira.editor.use"
                             "r.mode=wysiwyg")
setting.htmlParse = True

rows = list(file_util.readRows("/home/changshuai/PycharmProjects/dio_core/dio_core_test/Data/JIRA_LIST.txt"))

allMsg = []

for url in rows:

    try:
        res = Downloader.get(url, setting)
        repeatsText = text_util.get_first_match(res.text, "WRM._unparsedData\[\"activity-panel-pipe-id\"\]=\"(.*)\";")
        repeats = repeatsText.encode("utf-8").decode("unicode-escape").encode("utf-8").decode("unicode-escape").replace("\\/", "/")
        soup = parse_util.get_bs4_soup(repeats.strip("\""))

        [_.extract() for _ in res.soup.select_one("#description-val").select(".user-hover")]
        msgInfo = {
            "title": res.soup.select_one("#summary-val").text.strip(),
            "id": text_util.get_first_match(url, "/(CP-\d+)").strip(),