Beispiel #1
0
def dealHome(url):
    store = StoreData()
    res = requests.get(url)
    html = res.text.replace("data-src",
                            "src").replace('style="visibility: hidden;"', "")
    soup = BeautifulSoup(html, features="lxml")
    title = soup.select('.rich_media_title')[0].string
    author = soup.select('.account_nickname_inner')[0].string

    ret2 = getHomeJsonData(url, 0)
    print("****")
    # print(ret2)
    ret = []
    if len(ret2['result']) > 0:
        ret = ret2['result']
        # minid = max(ret2['msgid'])

    while len(ret2['result']) == 10:
        ret2 = getHomeJsonData(url, len(ret2['result']))
        if len(ret2['result']) == 0:
            continue
        ret += ret2['result']
        # print(ret2)
        # print(ret2['msgid'])
        # minid = max(ret2['msgid'])

    sdata = CleanResult(ret, author, title)
    # print(sdata)

    for val in sdata:
        store.addUrl(val)
    return sdata
Beispiel #2
0
def wxPdf(**kwargs):
    # print(kwargs)
    # print(kwargs['url'])
    # return
    if len(kwargs) > 0:
        print("******")
        url = kwargs['url']
        print(url)
        if url == "weixin":
            getPdf()
        else:
            folder = "面试精选"
            if len(kwargs) == 2:
                folder = kwargs['folder']
            # 传值生成pdf
            pdf = GenPdf()
            title = pdf.oldDeal(url, "", folder)
            store = StoreData()
            store.addUrl({
                'link': url,
                'folder': folder,
                'title': title,
                'msgid': '0',
                'turn': 0
            })
            store.updateUrlStateByMsg()
    else:
        getPdf()
Beispiel #3
0
def deal(url):
    store = StoreData()
    # 如果 homepage 采用另外的方法
    if url.find("homepage") > -1:
        return dealHome(url)

    ret1 = getFirstPage(url)
    # print(ret1)
    # sys.exit(0)
    # print('][][][][][[][[][][]')
    ret = ret1['result']
    flag = ret1['flag']
    author = ret1['author']
    title = ret1['title'].replace("#", '')

    if flag == 'max':
        minid = max(ret1['msgid'])
    elif flag == 'min':
        minid = min(ret1['msgid'])

    # 存储链接
    store.addAblum(url, author, title)
    # print(minid)
    ret2 = getJsonData(url, minid)
    # print("****")
    # print(ret2)
    if len(ret2['result']) > 0:
        ret += ret2['result']
        # minid = min(ret2['msgid'])
        if flag == 'max':
            minid = max(ret2['msgid'])
        elif flag == 'min':
            minid = min(ret2['msgid'])

    # while len(ret2['result']) == 10:
    while ret2['continue'] == "1":
        ret2 = getJsonData(url, minid)
        if len(ret2['result']) == 0:
            continue
        ret += ret2['result']
        print(colored("==============================", "cyan"))
        # print(ret2)
        # print(ret2['msgid'])
        # minid = min(ret2['msgid'])

        # if flag == 'max':
        #     minid = max(ret2['msgid'])
        # elif flag == 'min':
        #     minid = min(ret2['msgid'])

        minid = ret2['msgid'][-1]

    sdata = CleanResult(ret, author, title)
    # print(sdata)

    for val in sdata:
        store.addUrl(val)
    return sdata
Beispiel #4
0
            break
    # print(data)
    return


def genpdf(data):
    mystore = StoreData()
    # 传值生成pdf
    mypdf = GenPdf()
    mypdf.dealHtml(data['url'], str(data['turn']) + '-' + data['title'], data['folder'])
    mystore.updateUrlState(data['id'])
    return


if len(sys.argv) > 1:
    print("******")
    url = sys.argv[1]
    print(url)
    folder = "面试精选"
    if len(sys.argv) == 3:
        folder = sys.argv[2]
    # 传值生成pdf
    pdf = GenPdf()
    title = pdf.dealHtml(url, "", folder)
    store = StoreData()
    store.addUrl({'link': url, 'folder': folder, 'title': title, 'msgid': '0', 'turn': 0})
    store.updateUrlStateByMsg()
else:
    getPdf()
    # print(sys.argv[0])