Example #1
0
def getPCWebUrl(env, envheaders):
    result = []
    ssl = False
    pcweb = getCfgDict(env)
    headers = getCfgDict(envheaders)
    if pcweb.startswith("https"):
        ssl = True
    resp = getReq(pcweb, headers=headers, timeout=10)
    resp_html = resp.text
    # print(resp_html)
    soup = BeautifulSoup(resp_html, "html.parser")
    ### 获取banner中的url
    a_eles = soup.find_all(attrs={"class": "banner_link"})
    result = handleReuslt(result, a_eles, pcweb, ssl)

    # ## 获取新作登场url  http://qam.dongmanmanhua.cn/new
    # new = getReq(pcweb+"/new",headers=headers,timeout=10)
    # soup = BeautifulSoup(new.text, "html.parser")
    # # print(new.text)
    # a_eles = soup.find_all(attrs={"data-sc-name": "M_new-page_new-list-item"})
    # result = handleReuslt(result, a_eles, pcweb, ssl)

    ## 获取今日漫画url  https://www.dongmanmanhua.cn/dailySchedule

    dailySchedule = getReq(pcweb + "/dailySchedule",
                           headers=headers,
                           timeout=10)
    soup = BeautifulSoup(dailySchedule.text, "html.parser")
    a_eles = soup.find_all(attrs={"class": re.compile(r"daily_card_item.*?")})
    result = handleReuslt(result, a_eles, pcweb, ssl)

    ## 获取最爱的分类url https://www.dongmanmanhua.cn/genre
    ## https://qam.dongmanmanhua.cn/METROPOLIS/?sortOrder=READ_COUNT
    genre = getReq(pcweb + "/genre", headers=headers, timeout=10)
    soup = BeautifulSoup(genre.text, "html.parser")
    # genre = getReq(mweb + "/METROPOLIS",params={"sortOrder":"READ_COUNT"},headers=headers)
    # soup = BeautifulSoup(genre.text, "html.parser")
    a_eles = soup.find_all(attrs={"class": re.compile(r"card_item.*?")})
    result = handleReuslt(result, a_eles, pcweb, ssl)

    ## 获取排行榜url  https://www.dongmanmanhua.cn/top
    # top = getReq(pcweb + "/top",headers=headers,timeout=10)
    for i in range(1, 11):
        for goal in [
                "MALE10", "FEMALE10", "MALE20", "FEMALE20", "MALE30",
                "FEMALE30"
        ]:
            top = getReq(pcweb + "/top?rankingGenre=%s&target=%s" % (i, goal),
                         headers=headers,
                         timeout=10)
            soup = BeautifulSoup(top.text, "html.parser")
            a_eles = soup.find_all(
                attrs={
                    "data-sc-name": "PC_genre-rank-module_genre-rank-list-item"
                })
            result = handleReuslt(result, a_eles, pcweb, ssl)

    return pcweb, headers, handleReuslt(result, a_eles, pcweb, ssl)
Example #2
0
def getMWebUrl(env, envheaders):
    result = []
    ssl = False
    mweb = getCfgDict(env)
    headers = getCfgDict(envheaders)
    if mweb.startswith("https"):
        ssl = True
    resp = getReq(mweb, headers=headers, timeout=10)
    resp_html = resp.text
    # print(resp_html)
    soup = BeautifulSoup(resp_html, "html.parser")
    ### 获取banner中的url
    a_eles = soup.find_all(attrs={"data-sc-name": "M_discover-page_banner"})
    result = handleReuslt(result, a_eles, mweb, ssl)

    ## 获取新作登场url  http://qam.dongmanmanhua.cn/new
    new = getReq(mweb + "/new", headers=headers, timeout=10)
    soup = BeautifulSoup(new.text, "html.parser")
    # print(new.text)
    a_eles = soup.find_all(attrs={"data-sc-name": "M_new-page_new-list-item"})
    result = handleReuslt(result, a_eles, mweb, ssl)

    ## 获取今日漫画url  http://qam.dongmanmanhua.cn/dailySchedule

    dailySchedule = getReq(mweb + "/dailySchedule",
                           headers=headers,
                           timeout=10)
    soup = BeautifulSoup(dailySchedule.text, "html.parser")
    a_eles = soup.find_all(attrs={
        "data-sc-name":
        re.compile(r"M_today-title-list-page_week-.*?-list-item")
    })
    result = handleReuslt(result, a_eles, mweb, ssl)

    ## 获取最爱的分类url https://qam.dongmanmanhua.cn/genre?genre=METROPOLIS
    ## https://qam.dongmanmanhua.cn/METROPOLIS/?sortOrder=READ_COUNT
    genre = getReq(mweb + "/genre", headers=headers, timeout=10)
    soup = BeautifulSoup(genre.text, "html.parser")
    # genre = getReq(mweb + "/METROPOLIS",params={"sortOrder":"READ_COUNT"},headers=headers)
    # soup = BeautifulSoup(genre.text, "html.parser")
    a_eles = soup.find_all(
        attrs={
            "data-sc-name": re.compile(r"M_genre-page_.*?-genre-list-item")
        })
    result = handleReuslt(result, a_eles, mweb, ssl)

    ## 获取排行榜url  https://qam.dongmanmanhua.cn/top
    top = getReq(mweb + "/top", headers=headers, timeout=10)
    soup = BeautifulSoup(top.text, "html.parser")
    a_eles = soup.find_all(
        attrs={"data-sc-name": "M_rank-page_rank-list-item"})
    return mweb, headers, handleReuslt(result, a_eles, mweb, ssl)
Example #3
0
def getReqUrls(platform=5,
               headers="qamheaders",
               sqlkey="qamysql",
               webkey="qamweb"):
    urls = initUrls(platform, sqlkey, webkey)
    headdata = getCfgDict(headers)
    successCount = 0
    for url in urls:
        # try:
        #     resp = requests.get(url,headers=headdata,timeout=20)
        #     rsc = resp.status_code
        #     if rsc == 200:
        #         successCount+=1
        #         # print(resp.status_code,successCount,url)
        #     elif rsc == 503:
        #         resp = requests.get(url, headers=headdata, timeout=20)
        #     else:
        #         print(resp.status_code,url)
        # except Exception as e:
        #     print(e, url)
        successCount = handleByCode(url,
                                    headdata,
                                    successCount=successCount,
                                    count=0)
    print("*****共%s个URL,成功%s个!*****" % (len(urls), successCount))
Example #4
0
def initUrls(platform, sqlkey, webkey):
    cursor = getCursor(sqlkey)
    urls = []
    host = getCfgDict(webkey)
    tg = getTitleFromMysql(platform, cursor)
    for g, n, t, p in tg:
        ep = getEpisodeFromMysql(cursor, t)
        for e, p in ep:
            urls = createUrl(urls, host, g, n, t, e, p)
    print("*****获取URLS成功!*****")
    closeCursor(cursor)
    return urls
Example #5
0
def multiprocessHandle(platform=5,
                       headers="qamheaders",
                       sqlkey="qamysql",
                       webkey="qamweb"):
    headdata = getCfgDict(headers)
    urls = initUrls(platform=platform, sqlkey=sqlkey, webkey=webkey)
    pool = Pool()
    for url in urls:
        pool.apply_async(multiprocessTarget, (url, headdata))
    print("*****构建多进程请求完成!*****")
    print("*****开始多进程请求!*****")
    pool.close()
    pool.join()
    print("*****共%s个URL!*****,请求完成!" % len(urls))
Example #6
0
def getCursor(key):
    sqlcfg = getCfgDict(key)
    db = pymysql.connect(sqlcfg["host"], sqlcfg["user"], sqlcfg["passwd"],
                         sqlcfg["db"], sqlcfg["port"])
    cursor = db.cursor()
    return cursor
Example #7
0
def initDataFromDB():
    ms = getCfgDict("qamysql")
    conn = pymysql.connect(ms['host'], ms['user'], ms['passwd'], ms['db'])
    cursor = conn.cursor()