def dealHome(url): store = StoreData() res = requests.get(url) html = res.text.replace("data-src", "src").replace('style="visibility: hidden;"', "") soup = BeautifulSoup(html, features="lxml") title = soup.select('.rich_media_title')[0].string author = soup.select('.account_nickname_inner')[0].string ret2 = getHomeJsonData(url, 0) print("****") # print(ret2) ret = [] if len(ret2['result']) > 0: ret = ret2['result'] # minid = max(ret2['msgid']) while len(ret2['result']) == 10: ret2 = getHomeJsonData(url, len(ret2['result'])) if len(ret2['result']) == 0: continue ret += ret2['result'] # print(ret2) # print(ret2['msgid']) # minid = max(ret2['msgid']) sdata = CleanResult(ret, author, title) # print(sdata) for val in sdata: store.addUrl(val) return sdata
def getPdf(): store = StoreData() # 查数据库 toPdfList, columns = store.getListFromParam('state=0') print(colored("weixin*-*-*-*-*---*---*--*-*-*weixin", "cyan")) # print(toPdfList) # print(columns) # print(type(toPdfList)) # 修改状态 data = [] i = 0 for val in toPdfList: dic = {} for key, name in enumerate(columns): # print(key) # print(name) # print(val[key]) dic[name] = val[key] genpdf(dic) data.append(dic) i += 1 if i == 100: break # print(data) return
def genpdf(data): mystore = StoreData() # 传值生成pdf mypdf = GenPdf() mypdf.dealHtml(data['url'], str(data['turn']) + '-' + data['title'], data['folder']) mystore.updateUrlState(data['id']) return
def wxPdf(**kwargs): # print(kwargs) # print(kwargs['url']) # return if len(kwargs) > 0: print("******") url = kwargs['url'] print(url) if url == "weixin": getPdf() else: folder = "面试精选" if len(kwargs) == 2: folder = kwargs['folder'] # 传值生成pdf pdf = GenPdf() title = pdf.oldDeal(url, "", folder) store = StoreData() store.addUrl({ 'link': url, 'folder': folder, 'title': title, 'msgid': '0', 'turn': 0 }) store.updateUrlStateByMsg() else: getPdf()
def deal(url): store = StoreData() # 如果 homepage 采用另外的方法 if url.find("homepage") > -1: return dealHome(url) ret1 = getFirstPage(url) # print(ret1) # sys.exit(0) # print('][][][][][[][[][][]') ret = ret1['result'] flag = ret1['flag'] author = ret1['author'] title = ret1['title'].replace("#", '') if flag == 'max': minid = max(ret1['msgid']) elif flag == 'min': minid = min(ret1['msgid']) # 存储链接 store.addAblum(url, author, title) # print(minid) ret2 = getJsonData(url, minid) # print("****") # print(ret2) if len(ret2['result']) > 0: ret += ret2['result'] # minid = min(ret2['msgid']) if flag == 'max': minid = max(ret2['msgid']) elif flag == 'min': minid = min(ret2['msgid']) # while len(ret2['result']) == 10: while ret2['continue'] == "1": ret2 = getJsonData(url, minid) if len(ret2['result']) == 0: continue ret += ret2['result'] print(colored("==============================", "cyan")) # print(ret2) # print(ret2['msgid']) # minid = min(ret2['msgid']) # if flag == 'max': # minid = max(ret2['msgid']) # elif flag == 'min': # minid = min(ret2['msgid']) minid = ret2['msgid'][-1] sdata = CleanResult(ret, author, title) # print(sdata) for val in sdata: store.addUrl(val) return sdata
break # print(data) return def genpdf(data): mystore = StoreData() # 传值生成pdf mypdf = GenPdf() mypdf.dealHtml(data['url'], str(data['turn']) + '-' + data['title'], data['folder']) mystore.updateUrlState(data['id']) return if len(sys.argv) > 1: print("******") url = sys.argv[1] print(url) folder = "面试精选" if len(sys.argv) == 3: folder = sys.argv[2] # 传值生成pdf pdf = GenPdf() title = pdf.dealHtml(url, "", folder) store = StoreData() store.addUrl({'link': url, 'folder': folder, 'title': title, 'msgid': '0', 'turn': 0}) store.updateUrlStateByMsg() else: getPdf() # print(sys.argv[0])