def getArticleList(app, nickname, start=0, total=5): sleepTime = 5 while start <= total: print("开始获取{}开始的文章列表".format(start)) articles = app.articles(nickname, begin="{}".format(start), count="5") tools.save_json("{}.json".format(nickname), articles) start += len(articles) print("公众号数据到抓取{}条,随机睡眠{}秒".format(len(articles), sleepTime)) time.sleep(sleepTime) sleepTime = 5 + random.randint(5, 15) print("总共抓取到{}篇文章元数据,已经保存文章元数据到本地.请下载".format(total))
import os from pprint import pprint from wechatarticles.ReadOutfile import Reader from wechatarticles import ArticlesAPI from wechatarticles import tools if __name__ == '__main__': official_cookie = "official_cookie" token = "token" appmsg_token = "appmsg_token" wechat_cookie = "wechat_cookie" nickname = "nickname" # 手动输入所有参数 test = ArticlesAPI(official_cookie=official_cookie, token=token, appmsg_token=appmsg_token, wechat_cookie=wechat_cookie) # 自定义爬取,每次爬取5篇以上 data = test.complete_info(nickname=nickname, begin="0") print(len(data)) pprint(data) # 自定义从某部分开始爬取,持续爬取,直至爬取失败为止,一次性最多爬取40篇(功能未测试,欢迎尝试) datas = test.continue_info(nickname=nickname, begin="0") tools.save_json("test.json", data)
from wechatarticles import ArticlesUrls, tools if __name__ == "__main__": # 模拟登录微信公众号平台,获取微信文章的url username = "******" password = "******" cookie = "cookie" token = "token" nickname = "nickname" query = "query" test = ArticlesUrls(username, password) # test = ArticlesUrls(cookie=cookie, token=token) articles_sum = test.articles_nums(nickname) artiacle_data = test.articles(nickname, begin="0", count="5") officical_info = test.official_info(nickname) articles_data_query = test.articles( nickname, query=query, begin="0", count="5") articles_sum_query = test.articles(nickname, query=query) print("articles_sum:", end=" ") print(articles_sum) print("artcles_data:") pprint(artiacle_data) print("officical_info:") pprint(officical_info) tools.save_json("test.json", artiacle_data)