def _compute_emojis(messages): full_text = '\n'.join([m.text for m in messages]) emojis = str_to_dict(full_text, _emoji_set) return { 'count': sum(emojis.values()), 'most_used': top((c, e) for e, c in emojis.items()) if emojis else None }
def crawl(self, offset=0): """ 更多历史消息文章 :return: """ url = "https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz=MzA3NTY3NjUzMg==&f=json&offset={offset}&count=10&is_ok=1&scene=124&uin=MzAzMDI4NDA4OQ%3D%3D&key=9701b8bf0b69875af2605bece0b84ac5791a9cc45d7defc08d4f127acdab2c3ae739842f9f0fd569da86b2db1f91f27c9dff5bab9e5116263572442d555a91bb1cb7b502a908e8443a130fcd032293e6&pass_ticket=ZaU5UjhVDMVvsOzkK4i3B%2FjZubj7dI36zAoLaLuE52eYyRmdeP68%2BFfs6uiXclfr&wxtoken=&appmsg_token=1072_6j04Jj7bozTq%252FaFAuTDRNbD8bM4iFMuP3Lbb1g~~&x5=0&f=json".format( offset=offset) headers = """Host: mp.weixin.qq.com Connection: keep-alive User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.5 WindowsWechat X-Requested-With: XMLHttpRequest Accept: */* Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MzA3NTY3NjUzMg==&scene=124&uin=MzAzMDI4NDA4OQ%3D%3D&key=9701b8bf0b69875af2605bece0b84ac5791a9cc45d7defc08d4f127acdab2c3ae739842f9f0fd569da86b2db1f91f27c9dff5bab9e5116263572442d555a91bb1cb7b502a908e8443a130fcd032293e6&devicetype=Windows+10+x64&version=62090529&lang=zh_CN&a8scene=7&pass_ticket=ZaU5UjhVDMVvsOzkK4i3B%2FjZubj7dI36zAoLaLuE52eYyRmdeP68%2BFfs6uiXclfr&winzoom=1 Accept-Encoding: gzip, deflate Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.5;q=0.4 Cookie: rewardsn=; wxtokenkey=777; wxuin=3030284089; devicetype=android-27; version=27000f8d; lang=zh_CN; pass_ticket=ZaU5UjhVDMVvsOzkK4i3B/jZubj7dI36zAoLaLuE52eYyRmdeP68Ffs6uiXclfr; wap_sid2=CLnu+aQLElx6WkYxZHVSVFZOWm01YWhJcjdKSWVLVXozSlNIdnlDY1I5bkpGQ1hMcFNSamZNMDVIZDU2TGQ5ek1TQkpjRVVkZ0tncXZXZDVvUXZza0VTcmhwRlUwakFFQUFBfjCp5KX5BTgNQJVO""" headers = utils.str_to_dict(headers) response = requests.get(url, headers=headers, verify=False) result = response.json() if result.get("ret") == 0: msg_list = result.get("general_msg_list") logger.info("抓取数据: offset=%s, data=%s" % (offset, msg_list)) self.save(msg_list) # 保存到数据库 has_next = result.get("can_msg_continue") if has_next == 1: next_offset = result.get("next_offset") time.sleep(3) self.crawl(next_offset) else: # 错误消息 logger.error("错误消息,请检查请求头") exit()
def crawl_more(self, offset=0): """ 爬取更多文章 :return: """ url = "https://mp.weixin.qq.com/mp/profile_ext?" \ "action=getmsg&" \ "__biz=MjM5MzgyODQxMQ==&" \ "f=json&" \ "offset={offset}&" \ "count=10&" \ "is_ok=1&" \ "scene=&" \ "uin=777&" \ "key=777&" \ "pass_ticket=lDUqy%2FK9AFwZZVe8RDrxTER0kM1SAjDpAhfBgj2bW4QyP8V2lsi9gMbN%2FTJ0Nq7w&" \ "wxtoken=&" \ "appmsg_token=935_GxAo%2BiWnvCj80gcRR5iWbvAetsVErB5CLhSNJg~~&" \ "x5=1&" \ "f=json".format(offset=offset) # appmsg_token 也是临时的 headers = """ Host: mp.weixin.qq.com Connection: keep-alive X-Requested-With: XMLHttpRequest User-Agent: Mozilla/5.0 (Linux; Android 5.1.1; 2014813 Build/LMY47V; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 MicroMessenger/6.5.23.1180 NetType/WIFI Language/zh_CN Accept: */* Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MzIwMTc4ODE0Mw==&devicetype=android-22&version=26051731&lang=zh_CN&nettype=WIFI&a8scene=7&session_us=gh_5138cebd4585&pass_ticket=wxW7ApnFNe01tZe42nkIH5EExbK%2BYA45O1NzaLk7uLZBPks8RzUA4gzD6hxU9V5n&wx_header=1 Accept-Encoding: gzip, deflate Accept-Language: zh-CN,en-US;q=0.8 Cookie: rewardsn=1872e6d4042552713dff; wxtokenkey=990d7740963fdbea6cf44ba78c4166b7f9025262edc22a0977c781f8073b54aa; wxuin=525477518; devicetype=android-22; version=26051731; lang=zh_CN; pass_ticket=wxW7ApnFNe01tZe42nkIH5EExbK+YA45O1NzaLk7uLZBPks8RzUA4gzD6hxU9V5n; wap_sid2=CI7NyPoBElxNWnRoazhJNk5CQkdYQ3NBRXZBNmVYbDVncS1yVkdsVjFQYUpCU3J6UGZ1WmxubC1wT3p4WkE4ZFY0dnRpaDdlZnJ1ank4dkdZbXpnNUd2V2hpbjNQS2NEQUFBfjCimMPRBTgMQJRO Q-UA2: QV=3&PL=ADR&PR=WX&PP=com.tencent.mm&PPVN=6.5.23&TBSVC=43602&CO=BK&COVC=043632&PB=GE&VE=GA&DE=PHONE&CHID=0&LCID=9422&MO= 2014813 &RL=720*1280&OS=5.1.1&API=22 Q-GUID: 9d4417681f44eeb4410b613d13b788cb Q-Auth: 31045b957cf33acf31e40be2f3e71c5217597676a9729f1b """ headers = utils.str_to_dict(headers) response = requests.get(url, headers=headers, verify=False) result = response.json() if result.get("ret") == 0: msg_list = result.get("general_msg_list") logger.info("抓取数据:offset=%s, data=%s" % (offset, msg_list)) self.save(msg_list) # 递归调用 has_next = result.get("can_msg_continue") if has_next == 1: next_offset = result.get("next_offset") # TODO 等待时间做成可配置 time.sleep(2) self.crawl_more(next_offset) else: # 错误消息 # {"ret":-3,"errmsg":"no session","cookie_count":1} logger.error("无法正确获取内容,请重新获取请求参数和请求头") exit()
def crawl(self, offset=0): """ 爬取更多文章 :return: """ url = "https://mp.weixin.qq.com/mp/profile_ext?" \ "action=getmsg&" \ "__biz=MjM5MzgyODQxMQ==&" \ "f=json&" \ "offset={offset}&" \ "count=10&" \ "is_ok=1&" \ "scene=&" \ "uin=777&" \ "key=777&" \ "pass_ticket=mXHYjLnkYux1rXx8BxNrZpgW4W+yLZxcuvpDWlxbBrjvJo3ECB+ckDAsy/TJJK6P&" \ "wxtoken=&" \ "appmsg_token=938_dFy7Mic8412%2BQG9szSTRTLb2u5DrwFqmTk4ZAg~~&" \ "x5=1&" \ "f=json".format(offset=offset) # appmsg_token 是临时的,也需要更新 # 从 Fiddler 获取最新的请求头参数 headers = """ Host: mp.weixin.qq.com Connection: keep-alive X-Requested-With: XMLHttpRequest User-Agent: Mozilla/5.0 (Linux; Android 7.0; MI 5 Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/043804 Mobile Safari/537.36 MicroMessenger/6.5.23.1180 NetType/WIFI Language/zh_CN Accept: */* Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5MzgyODQxMQ==&devicetype=android-24&version=26051732&lang=zh_CN&nettype=WIFI&a8scene=7&session_us=gh_c744c4d09c36&pass_ticket=zpU4AwNXTGS5LfBXFx4NCyMo5YTpSQo9RarrPG3tjhmMaGfORzykNNviX7IlM4i0&wx_header=1 Accept-Encoding: gzip, deflate Accept-Language: zh-CN,en-US;q=0.8 Cookie: pgv_pvi=1680185344; pgv_si=s6583349248; wxtokenkey=a40c0cde8d7c0a549e900a166819c80622ce7f12899bd6e25f5d5275ff18f7c6; rewardsn=9a0c2a83b30e5994c162; wxuin=528927841; devicetype=android-24; version=26051732; lang=zh_CN; pass_ticket=zpU4AwNXTGS5LfBXFx4NCyMo5YTpSQo9RarrPG3tjhmMaGfORzykNNviX7IlM4i0; wap_sid2=COGYm/wBElxsRzJDSS1ZTjlmLVFTRlBYZ3FiV2NBUGZHLUlnMzU5V3lEV1RsSHhJSVp2aWlZc1lxRW9NTnJfb1pzbUw5Zm9vMzhuZ0plU2N2X2lLRExsWGNSVjdDcW9EQUFBfjC4grfSBTgMQJRO Q-UA2: QV=3&PL=ADR&PR=WX&PP=com.tencent.mm&PPVN=6.5.23&TBSVC=43602&CO=BK&COVC=043804&PB=GE&VE=GA&DE=PHONE&CHID=0&LCID=9422&MO= MI5 &RL=1080*1920&OS=7.0&API=24 Q-GUID: ed3467186e1125bb3d28234d13b788cb Q-Auth: 31045b957cf33acf31e40be2f3e71c5217597676a9729f1b """ headers = utils.str_to_dict(headers) response = requests.get(url, headers=headers, verify=False) result = response.json() if result.get("ret") == 0: msg_list = result.get("general_msg_list") logger.info("抓取数据:offset=%s, data=%s" % (offset, msg_list)) self.save(msg_list) # 递归调用 has_next = result.get("can_msg_continue") if has_next == 1: next_offset = result.get("next_offset") time.sleep(2) self.crawl(next_offset) else: # 错误消息 # {"ret":-3,"errmsg":"no session","cookie_count":1} logger.error("无法正确获取内容,请重新从Fiddler获取请求参数和请求头") exit()
def onMessage(self, msg): if msg["type"] == 'chat' and hasattr(msg, "body"): cmd_list = str(msg.body).split(" ") cmd = cmd_list[0] args = cmd_list[1:] kwargs = {} if "=" in cmd: args = [] kwargs = utils.str_to_dict(" ".join(cmd_list[1:])) commands.get_command(cmd)(self, msg, *args, **kwargs)
def get_params(self, article_url): """ 获取到文章url上的请求参数 :param article_url: 文章 url :return: """ # url转义处理 article_url = html.unescape(article_url) """获取文章链接的参数""" url_params = utils.str_to_dict(urlsplit(article_url).query, "&", "=") return url_params
def crawl_latest_10(self): """ 爬取最近10条数据 :return: """ url = "https://mp.weixin.qq.com/mp/profile_ext?" \ "action=home&" \ "__biz=MjM5MzgyODQxMQ==&" \ "devicetype=android-24&" \ "version=26051633&" \ "lang=zh_CN&" \ "nettype=WIFI&" \ "a8scene=7&" \ "pass_ticket=oZQqv0KR7zhxAix1SHUFLwI7p%2FiKH2NPWIdEmZidhitAOdpf873t%2BLEZU9Hnxx%2FT&" \ "wx_header=1" headers = """ Host: mp.weixin.qq.com Connection: keep-alive Cache-Control: max-age=0 Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Linux; Android 5.1.1; 2014813 Build/LMY47V; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 MicroMessenger/6.5.23.1180 NetType/WIFI Language/zh_CN x-wechat-uin: NTI1NDc3NTE4 x-wechat-key: c37a3f1c3525d70e6537599058680a1c1d38d754815c6f101c7bf3fbc5bbcd19f8c54895c965c0d624b36d11da34033eb5109c5e40524df3109f43943505a19d0b3f68bacb0b77cbae35a251a1722f98 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/wxpic,image/sharpp,*/*;q=0.8 Accept-Encoding: gzip, deflate Accept-Language: zh-CN,en-US;q=0.8 Cookie: rewardsn=1872e6d4042552713dff; wxtokenkey=990d7740963fdbea6cf44ba78c4166b7f9025262edc22a0977c781f8073b54aa; wxuin=525477518; devicetype=android-22; version=26051731; lang=zh_CN; pass_ticket=wxW7ApnFNe01tZe42nkIH5EExbK+YA45O1NzaLk7uLZBPks8RzUA4gzD6hxU9V5n; wap_sid2=CI7NyPoBElxldXVmX1B4VVVJbXdNTnh4SDZ2YXlMcURDWENucDhvWmZoMktqRmQzYWJVRXV6b29TYmJWX2VscG4zekh2ZzVxWVhmcGpraDBLUEg3LTZZNmNXYXl2S1lEQUFBfjCQmMPRBTgMQJRO Q-UA2: QV=3&PL=ADR&PR=WX&PP=com.tencent.mm&PPVN=6.5.23&TBSVC=43602&CO=BK&COVC=043632&PB=GE&VE=GA&DE=PHONE&CHID=0&LCID=9422&MO= 2014813 &RL=720*1280&OS=5.1.1&API=22 Q-GUID: 9d4417681f44eeb4410b613d13b788cb Q-Auth: 31045b957cf33acf31e40be2f3e71c5217597676a9729f1b """ headers = utils.str_to_dict(headers) response = requests.get(url, headers=headers, verify=False) if '<title>验证</title>' in response.text: logger.error("无法正确获取内容,请重新获取请求参数和请求头") exit() rex = "msgList = '({.*?})'" pattern = re.compile(pattern=rex, flags=re.S) match = pattern.search(response.text) if match: msg_list = match.group(1) logger.info("抓取数据: %s" % msg_list) msg_list = html.unescape(html.unescape(msg_list)) self.save(msg_list) else: logger.warning("没有找到匹配的数据")
def test_str_to_dict(self): assert utils.str_to_dict(None) is None # Basic behavior for poorly formed inputs self.assertEquals(None, utils.str_to_dict(None)) self.assertEquals(None, utils.str_to_dict("")) self.assertEquals({}, utils.str_to_dict("no_vals")) # Unnested arguments lists are parsed expected_dict_simple = { "url": "http://foobar.com/justin", "args": "foo", "otherKey": "42" } self.assertEquals(expected_dict_simple, utils.str_to_dict(self.simple_url_input_string)) # Nested List Behavior expected_dict_nested = { "url": "http://foobar.com/justin", "args": ["foo", "justin", "bar"], "otherKey": "AnotherKey", "finalKey": "42" } self.assertEquals(expected_dict_nested, utils.str_to_dict(self.nested_url_input_string))
def crawl(self, offset=0): """ 爬取更多文章 :return: """ url = "https://mp.weixin.qq.com/mp/profile_ext"\ "?action=getmsg" \ "&__biz=MjM5MTY3OTYyMQ==" \ "&f=json"\ "&offset=10"\ "&count=10"\ "&is_ok=1"\ "&scene=124"\ "&uin=777"\ "&key=777"\ "&pass_ticket=wj2j8pp2XnstT6wupUoiIxRoY2JjT3FOCEPXdBYhs7CPwSzfTIogmELiy3YZ4mRs"\ "&wxtoken="\ "&appmsg_token=938_DJXrTDktnbNkCkp3oOCg-lVbWhhbemMp0EkBLA~~"\ "&x5=0"\ "&f=json" headers = """ Host: mp.weixin.qq.com Accept-Encoding: br, gzip, deflate Cookie: devicetype=iOS11.2.1; lang=zh_CN; pass_ticket=wj2j8pp2XnstT6wupUoiIxRoY2JjT3FOCEPXdBYhs7CPwSzfTIogmELiy3YZ4mRs; version=16060124; wap_sid2=CLzPvfkEEnBnRTdYc3Uxa1B4NzcxdUR3T3pyNEUwYVBRb09SQlk0MWk1bTVkdVdkUlpSdTh4QUExdWNVYkVxLTN0NnVVWVZhYk5VRXJCT1hpSmI1N0FCaDNmVURBclVSSlhOcDZXZDNpTXJPZkQ0NVd1S3FBd0FBMJ3S1tIFOAxAlE4=; wxuin=1328506812; wxtokenkey=f9b3cef513031f0f50b8c9d88862ea0a8e44ae03f2dabb3e1e7d6b6f68c2233c; bk_token=fc1a7e18-e120-4f0a-a419-e1650f99b989; is_login=wx; is_wechat=1; platform=qq; pgv_pvid=6280301858; _scan_has_moon=1; eas_sid=Q1g5I1q201w0y4d0o2W5z3N4j4; _ga=GA1.2.557769603.1510924852; pac_uid=0_64f45f141b0a7; sd_cookie_crttime=1508507376988; sd_userid=58871508507376987 Connection: keep-alive Accept: */* User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_1 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Mobile/15C153 MicroMessenger/6.6.1 NetType/WIFI Language/zh_CN Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5MTY3OTYyMQ==&scene=124&devicetype=iOS11.2.1&version=16060124&lang=zh_CN&nettype=WIFI&a8scene=3&fontScale=100&pass_ticket=wj2j8pp2XnstT6wupUoiIxRoY2JjT3FOCEPXdBYhs7CPwSzfTIogmELiy3YZ4mRs&wx_header=1 Accept-Language: zh-cn X-Requested-With: XMLHttpRequest """ headers = utils.str_to_dict(headers) response = requests.get(url, headers=headers, verify=False) result = response.json() if result.get("ret") == 0: msg_list = result.get("general_msg_list") logger.info("抓取数据:offset=%s, data=%s" % (offset, msg_list)) # 递归调用 has_next = result.get("can_msg_continue") if has_next == 1: next_offset = result.get("next_offset") time.sleep(2) self.crawl(next_offset) else: # 错误消息 # {"ret":-3,"errmsg":"no session","cookie_count":1} logger.error("无法正确获取内容,请重新从Fiddler获取请求参数和请求头") exit()
def run(self): # 翻页地址 page_url = "https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz={}&f=json&offset={}&count=10&is_ok=1&scene=&uin=777&key=777&pass_ticket={}&wxtoken=&appmsg_token=" + self.appmsg_token + "&x5=0f=json" # 将 cookie 字典化 wx_dict = utils.str_to_dict(self.cookie, join_symbol='; ', split_symbol='=') # 请求地址 response = requests.get(page_url.format(self.biz, self.begin_page_index * 10, wx_dict['pass_ticket']), headers=self.headers, verify=False) # 将文章列表字典化 articles = self.article_list(response.text) info = Articles(self.appmsg_token, self.cookie) result = [] for a in articles['list']: if 'app_msg_ext_info' in a.keys( ) and '' != a.get('app_msg_ext_info').get('content_url', ''): read_num, old_like_num, like_num = info.read_like_nums( a.get('app_msg_ext_info').get('content_url')) result.append( str(self.num) + '条,' + a.get('app_msg_ext_info').get('title') + ',' + str(read_num) + ',' + str(old_like_num) + ',' + str(like_num)) time.sleep(2) if 'app_msg_ext_info' in a.keys(): for m in a.get('app_msg_ext_info').get( 'multi_app_msg_item_list', []): read_num, old_like_num, like_num = info.read_like_nums( m.get('content_url')) result.append( str(self.num) + '条的副条,' + m.get('title') + ',' + str(read_num) + ',' + str(old_like_num) + ',' + str(like_num)) time.sleep(3) self.num = self.num + 1 self.write_file(result) self.is_exit_or_continue() # 递归调用 self.run()
def crawl(self, offset=0): """ 爬取更多文章 :return: """ url = "https://mp.weixin.qq.com/mp/profile_ext"\ "?action=getmsg" \ "&__biz=MjM5MTY3OTYyMQ==" \ "&f=json"\ "&offset=10"\ "&count=10"\ "&is_ok=1"\ "&scene=124"\ "&uin=777"\ "&key=777"\ "&pass_ticket=wj2j8pp2XnstT6wupUoiIxRoY2JjT3FOCEPXdBYhs7CPwSzfTIogmELiy3YZ4mRs"\ "&wxtoken="\ "&appmsg_token=938_DJXrTDktnbNkCkp3oOCg-lVbWhhbemMp0EkBLA~~"\ "&x5=0"\ "&f=json".format(offset=offset) # appmsg_token 是临时的 headers = """ Host: mp.weixin.qq.com Accept-Encoding: gzip, deflate Cookie: ts_uid=3175878595; devicetype=iOS10.3.3; lang=zh_CN; pass_ticket=YZe3bIk+CZTU9OKVtY18FMsPmra+SBBXM1/JKTMgppKJ/0V3B99XDPcwlChD+3GL; version=16060120; wap_sid2=CIDUopEDElxKbHZCekJBVEJJT0FZb1VQSTZBZDlqNllaLXJpX0ptdWplVDVHTlF4UmlzdmxwNE85VWNJanJuYlhkOTJpZkxBU0ROSU1US0taNzVwZVNXNkpTY1dHYWtEQUFBfjDA6pTSBTgMQJRO; wxuin=841525760; wxtokenkey=0764bcd88cd2a131bbb205daa5f78bbd51a67bafef98327188df0288e2450363; ua_id=0QjBcaeG8vk11IhBAAAAAGHV4Cqd2-aR9dxpfQrd_L8=; pgv_pvid=7585005942; sd_cookie_crttime=1514388301744; sd_userid=79621514388301744 Connection: keep-alive Accept: */* User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Mobile/14G60 MicroMessenger/6.6.1 NetType/WIFI Language/zh_CN Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5MzgyODQxMQ==&devicetype=iOS10.3.3&version=16060120&lang=zh_CN&nettype=WIFI&a8scene=7&session_us=gh_c744c4d09c36&fontScale=100&pass_ticket=YZe3bIk%2BCZTU9OKVtY18FMsPmra%2BSBBXM1%2FJKTMgppKJ%2F0V3B99XDPcwlChD%2B3GL&wx_header=1 Accept-Language: zh-cn X-Requested-With: XMLHttpRequest """ headers = utils.str_to_dict(headers) response = requests.get(url, headers=headers, verify=False) result = response.json() if result.get("ret") == 0: msg_list = result.get("general_msg_list") logger.info("抓取数据:offset=%s, data=%s" % (offset, msg_list)) self.save(msg_list) # 递归调用 has_next = result.get("can_msg_continue") if has_next == 1: next_offset = result.get("next_offset") time.sleep(2) self.crawl(next_offset) else: # 错误消息 # {"ret":-3,"errmsg":"no session","cookie_count":1} logger.error("无法正确获取内容,请重新从Fiddler获取请求参数和请求头") exit()
def __init__(self): self.headers = str_to_dict(config.HEADERS) self.url_mp3 = "https://music.163.com/weapi/song/enhance/player/url?csrf_token=" self.url_comments = "https://music.163.com/weapi/v1/resource/comments/R_SO_4_{}?crsf_token=" self.url_lyric = "https://music.163.com/weapi/song/lyric?csrf_token=" self.url_detail = 'http://music.163.com/api/song/detail?ids=[{}]' self.params_mp3 = '{"ids":"[%s]","br":128000,"csrf_token":""}' self.params_comments = '{"rid":"R_SO_4_%s","offset":"%s","total":"true","limit":"%s","csrf_token":""}' self.params_lyric = '{"id":"%s","lv":-1,"tv":-1,"csrf_token":""}' self.song = "https://music.163.com/#/song?id={}" self.request = requests.Session()
def crawl(self, offset=0): """ 爬取更多文章 :return: """ url = "https://mp.weixin.qq.com/mp/profile_ext?" \ "action=getmsg&" \ "__biz=MjM5MjAxNDM4MA==&" \ "f=json&" \ "offset={offset}&" \ "count=10&" \ "is_ok=1&" \ "scene=126&" \ "uin=777&" \ "key=777&" \ "pass_ticket=NqOMSpSgkqt7TbKCa5PTGIIpnzJAF62XnhxKoXaomMNOZXeRivMUoIIh4SEk2vEl&" \ "wxtoken=&" \ "appmsg_token=1001_2i83IvTNI8q300WZIViVq4aBpcsB_dQcmtYBVw~~&" \ "x5=0&" \ "f=json".format(offset=offset) # 请将appmsg_token和pass_ticket替换成你自己的 headers = """ Host: mp.weixin.qq.com Accept-Encoding: br, gzip, deflate Cookie: devicetype=iOS12.1.4; lang=zh_CN; pass_ticket=NqOMSpSgkqt7TbKCa5PTGIIpnzJAF62XnhxKoXaomMNOZXeRivMUoIIh4SEk2vEl; version=17000329; wap_sid2=CITbl7QCElxDcVVSblZ5Y3NnVkwxblE4NUZsNGNOVm9Dd05YVnprZWQtdFVSYVQxYk9Edmx0bEVBZWNuWXhyVmRXVjctczZfX1BCdVphZzNBZV9YRmJPNTYwTUF2ZWtEQUFBfjCDqNLkBTgNQJVO; wxuin=646311300; wxtokenkey=777; rewardsn=; pgv_pvid=8969838003; pgv_pvi=60331008; _scan_has_moon=1; ts_uid=3719156268; tvfe_boss_uuid=5682fb061e2059e3; sd_cookie_crttime=1545398450253; sd_userid=35071545398450253 Connection: keep-alive Accept: */* User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/16D57 MicroMessenger/7.0.3(0x17000321) NetType/WIFI Language/zh_CN Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5MjAxNDM4MA==&scene=126&bizpsid=1553240795&sessionid=1553240795&subscene=0&devicetype=iOS12.1.4&version=17000329&lang=zh_CN&nettype=WIFI&a8scene=0&fontScale=100&pass_ticket=NqOMSpSgkqt7TbKCa5PTGIIpnzJAF62XnhxKoXaomMNOZXeRivMUoIIh4SEk2vEl&wx_header=1 Accept-Language: zh-cn X-Requested-With: XMLHttpRequest """ headers = utils.str_to_dict(headers) response = requests.get(url, headers=headers, verify=False) result = response.json() if result.get("ret") == 0: msg_list = result.get("general_msg_list") logger.info("抓取数据:offset=%s, data=%s" % (offset, msg_list)) self.save(msg_list) # 递归调用 has_next = result.get("can_msg_continue") if has_next == 1: next_offset = result.get("next_offset") time.sleep(2) self.crawl(next_offset) else: # 错误消息 # {"ret":-3,"errmsg":"no session","cookie_count":1} logger.error("无法正确获取内容,请重新从Fiddler获取请求参数和请求头") exit()
def crawl(self, offset=0): """ 爬取更多文章 :return: """ url = "https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&"\ "__biz=MzI2OTA3MTA5Mg==&"\ "f=json&"\ "offset={offset}&"\ "count=10&"\ "is_ok=1&"\ "scene=126&"\ "uin=777&"\ "key=777&"\ "pass_ticket=Hbqp97LrbNKf6hgWuXAc%2FoWp9JnaRMgX3yq1ipOP4jxPoVnmT43AB6HTH38c4prF&"\ "wxtoken=&"\ "appmsg_token=1003_%252FsjVJ0I%252F2UkPyb6pioMmJ3EukhtGKS9urruwRw~~&"\ "x5=0&"\ "f=json".format(offset=offset) headers = """ Host:mp.weixin.qq.com Accept-Encoding:br, gzip, deflate Cookie:devicetype=iOS12.2; lang=zh_CN; pass_ticket=Hbqp97LrbNKf6hgWuXAc/oWp9JnaRMgX3yq1ipOP4jxPoVnmT43AB6HTH38c4prF; version=1700032a; wap_sid2=CKuk/6UHElxyVVlQaE9JZDczaExNRXhBai1CQVlrRGNjT0l2RTVtdVdEcldQaThkZmdldjhEVmVsNlFhZ0FNdXl5SWFCYnlsLW9FMl9CRDFSdENQcUZDOUlXSWp2ZXNEQUFBfjCe75HlBTgNQJVO; wxuin=1958728235; wxtokenkey=777; rewardsn=; pgv_pvid=9658881596 Connection:keep-alive Accept:*/* User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/7.0.3(0x17000321) NetType/WIFI Language/zh_CN Referer:https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MzI2OTA3MTA5Mg==&scene=126&bizpsid=0&subscene=0&devicetype=iOS12.2&version=1700032a&lang=zh_CN&nettype=WIFI&a8scene=0&fontScale=94&pass_ticket=Hbqp97LrbNKf6hgWuXAc%2FoWp9JnaRMgX3yq1ipOP4jxPoVnmT43AB6HTH38c4prF&wx_header=1 Accept-Language:zh-cn X-Requested-With:XMLHttpRequest """ headers = utils.str_to_dict(headers) response = requests.get(url, headers=headers, verify=False) result = response.json() if result.get("ret") == 0: msg_list = result.get("general_msg_list") logger.info("抓取数据:offset=%s, data=%s" % (offset, msg_list)) self.save(msg_list) # 递归调用 has_next = result.get("can_msg_continue") if has_next == 1: next_offset = result.get("next_offset") time.sleep(2) self.crawl(next_offset) else: # 错误消息 # {"ret":-3,"errmsg":"no session","cookie_count":1} logger.error("无法正确获取内容,请重新从Fiddler获取请求参数和请求头") exit()
def get_hyperparam_summary(self): """ Creates a dataframe of parameter settings per iteration of the hpo """ # Create a new empty dataframe for storing parameters per iteration bayes_params_df = pd.DataFrame( columns=self.hpo.best_params.keys(), index=list(range(len(self.hpo.hyperopt_summary)))) # Add the results with each parameter a different column for i, params in enumerate(self.hpo.hyperopt_summary['params']): bayes_params_df.loc[i, :] = list(str_to_dict(params).values()) for colname in self.hpo.hyperopt_summary.columns.tolist(): if colname != "params": bayes_params_df[colname] = self.hpo.hyperopt_summary[colname] return bayes_params_df
def crawl(self, offset=0): """ 爬取更多文章 :return: """ url = "https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz=MjM5MTA5NjAyMA==&f=json&offset={offset}&count=10&is_ok=1&scene=126&uin=777&key=777&pass_ticket=mG5PIqAHg2jAY%2B6NQAJtsyEedBCeybGVZzm4RzZefynPWKfgdSyywrnG6soSsQ0w&wxtoken=&appmsg_token=996_7dDcPuBAmv2HxZhYfS74socUK_MniYtv4x-VIg~~&x5=0&f=json".format( offset=offset) # appmsg_token 也是临时的 headers = """ Host: mp.weixin.qq.com Connection: keep-alive User-Agent: Mozilla/5.0 (Linux; Android 4.4.2; OPPO R11 Build/NMF26X) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/30.0.0.0 Mobile Safari/537.36 MMWEBID/2090 MicroMessenger/7.0.3.1400(0x27000334) Process/toolsmp NetType/WIFI Language/zh_CN X-Requested-With: XMLHttpRequest Accept: */* Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5MTA5NjAyMA==&scene=126&bizpsid=0&devicetype=android-19&version=27000334&lang=zh_CN&nettype=WIFI&a8scene=3&pass_ticket=mG5PIqAHg2jAY%2B6NQAJtsyEedBCeybGVZzm4RzZefynPWKfgdSyywrnG6soSsQ0w&wx_header=1 Accept-Encoding: gzip,deflate Accept-Language: zh-AU,en-US;q=0.8 Cookie: rewardsn=; wxtokenkey=777; wxuin=2711770575; devicetype=android-19; version=27000334; lang=zh_CN; pass_ticket=mG5PIqAHg2jAY+6NQAJtsyEedBCeybGVZzm4RzZefynPWKfgdSyywrnG6soSsQ0w; wap_sid2=CM+riY0KElxjX1BQWTRrcjdFb2E3ZlU0V0kxbUZvdDRhNWl4ZHZkX3JRdzBPb2RaNkhOX05QcF9jQUFKd003X09hMUZJZDJVX2pPbDZQMW12QVdEUmhmUksxLU1CLVFEQUFBfjDo5JrjBTgNQJVO """ headers = utils.str_to_dict(headers) response = requests.get(url, headers=headers, verify=False) result = response.json() if result.get("ret") == 0: msg_list = result.get("general_msg_list") logger.info("抓取数据:offset=%s, data=%s" % (offset, msg_list)) self.save(msg_list) # 递归调用 has_next = result.get("can_msg_continue") if has_next == 1: next_offset = result.get("next_offset") time.sleep(2) self.crawl(next_offset) else: # 错误消息 # {"ret":-3,"errmsg":"no session","cookie_count":1} logger.error("无法正确获取内容,请重新从Fiddler获取请求参数和请求头") exit()
def parse_detail(self, id): try: logging.info("解析歌曲id为{}的信息".format(id)) r = requests.get(self.url_detail.format(id), headers=str_to_dict(config.HEADERS)) content = r.text content = json.loads(content) if bool(content["songs"]): songs = content["songs"][0] name = songs["name"] artists = songs["artists"][0] singer = artists['name'] data = { "name": name, "singer": singer, } logging.info("歌曲名:{}-歌手名:{}".format(data["name"], data["singer"])) return data else: logging.info("没有歌曲id为{}信息".format(id)) except: self.parse_detail(id)
def update_post(post): '''post 是从 mongodb 中提取一条数据进行更新''' data_url_params = { '__biz': 'MjM5MzgyODQxMQ==', 'appmsg_type': '9', 'mid': '2650367540', 'sn': 'ef9c6353a9255dbc00e2beac7f449dad', 'idx': '1', 'scene': '27', 'title': 'Python%E5%A5%87%E6%8A%80%E6%B7%AB%E5%B7%A7%EF%BC%8C%E7%9C%8B%E7%9C%8B%E4%BD%A0%E7%9F%A5%E9%81%93%E5%87%A0%E4%B8%AA', 'ct': '1511410410', 'abtest_cookie': 'AwABAAoADAANAAcAJIgeAGSIHgD8iB4A7IkeAAaKHgAPih4AU4oeAAAA', 'devicetype': 'android-24', 'version': '/mmbizwap/zh_CN/htmledition/js/appmsg/index3a9713.js', 'f': 'json', 'r': '0.04959653583814139', 'is_need_ad': '0', 'comment_id': '1411699821', 'is_need_reward': '1', 'both_ad': '0', 'reward_uin_count': '24', 'msg_daily_idx': '1', 'is_original': '0', 'uin': '777', 'key': '777', 'pass_ticket': 'zpU4AwNXTGS5LfBXFx4NCyMo5YTpSQo9RarrPG3tjhmMaGfORzykNNviX7IlM4i0', 'wxtoken': '1922467438', 'clientversion': '26051732', 'appmsg_token': '938_0n0in1TAhMHhtZ7zXIOyxTxYXZEFW7ez7tXTmochNzKXa19P3wxK6-C-yM1omM_h7gSMZJmyv7glw98g', 'x5': '1' } # appmsg_token 记得用最新的 # url转义处理 content_url = html.unescape(post.content_url) # 截取content_url的查询参数部分 content_url_params = urlsplit(content_url).query # 将参数转化为字典类型 content_url_params = utils.str_to_dict(content_url_params, "&", "=") # 更新到data_url data_url_params.update(content_url_params) body = "is_only_read=1&req_id=0414NBNjylwrVHDydtl3ufse&pass_ticket=zpU4AwNXTGS5LfBXFx4NCyMo5YTpSQo9RarrPG3tjhmMaGfORzykNNviX7IlM4i0&is_temp_url=0" data = utils.str_to_dict(body, "&", "=") # 通过Fiddler 获取 最新的值 headers = """ Host: mp.weixin.qq.com Connection: keep-alive Content-Length: 137 Origin: https://mp.weixin.qq.com X-Requested-With: XMLHttpRequest User-Agent: Mozilla/5.0 (Linux; Android 7.0; MI 5 Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/043804 Mobile Safari/537.36 MicroMessenger/6.5.23.1180 NetType/WIFI Language/zh_CN Content-Type: application/x-www-form-urlencoded; charset=UTF-8 Accept: */* Referer: https://mp.weixin.qq.com/s?__biz=MjM5MzgyODQxMQ==&mid=2650367558&idx=1&sn=b8be74571b14f78d80c062ded89b2d4a&chksm=be9cdd1289eb5404f1b423a135ce5adf5d2cf802a09014b6f407c96b40fdc88ce6e4e0ed8665&scene=27&ascene=0&devicetype=android-24&version=26051732&nettype=WIFI&abtest_cookie=AwABAAoADAANAAcAJIgeAGSIHgD8iB4A7IkeAAaKHgAPih4AU4oeAAAA&lang=zh_CN&pass_ticket=zpU4AwNXTGS5LfBXFx4NCyMo5YTpSQo9RarrPG3tjhmMaGfORzykNNviX7IlM4i0&wx_header=1 Accept-Encoding: gzip, deflate Accept-Language: zh-CN,en-US;q=0.8 Cookie: pgv_pvi=1680185344; pgv_si=s6583349248; rewardsn=9a0c2a83b30e5994c162; wxtokenkey=71bdfbb7fad39d08d2eb2dece479971297781391293c3c913e74f0f1c4c16971; wxuin=528927841; devicetype=android-24; version=26051732; lang=zh_CN; pass_ticket=zpU4AwNXTGS5LfBXFx4NCyMo5YTpSQo9RarrPG3tjhmMaGfORzykNNviX7IlM4i0; wap_sid2=COGYm/wBElxvYUdHZDdpUlExT2h3MnVTMS1nendPdUlZZ1BsU2h3ZUhibGNNQTRMS0t1dXhPQS1YUHNZNGdhQXk2Z0F0WkF0U3dGVUlYNnBHdlVTVEk0aHBOMktXS29EQUFBfjCyjLfSBTgNQAE= Q-UA2: QV=3&PL=ADR&PR=WX&PP=com.tencent.mm&PPVN=6.5.23&TBSVC=43602&CO=BK&COVC=043804&PB=GE&VE=GA&DE=PHONE&CHID=0&LCID=9422&MO= MI5 &RL=1080*1920&OS=7.0&API=24 Q-GUID: ed3467186e1125bb3d28234d13b788cb Q-Auth: 31045b957cf33acf31e40be2f3e71c5217597676a9729f1b """ headers = utils.str_to_dict(headers) data_url = "https://mp.weixin.qq.com/mp/getappmsgext" r = requests.post(data_url, data=data, verify=False, params=data_url_params, headers=headers) result = r.json() if result.get("appmsgstat"): post['read_num'] = result.get("appmsgstat").get("read_num") post['like_num'] = result.get("appmsgstat").get("like_num") post['reward_num'] = result.get("reward_total_count") post['u_date'] = datetime.now() logger.info("「%s」read_num: %s like_num: %s reward_num: %s" % (post.title, post['read_num'], post['like_num'], post['reward_num'])) post.save() else: logger.warning(u"没有获取的真实数据,请检查请求参数是否正确,返回的数据为:data=%s" % r.text) exit()
def update_post(self, post): post_url_params = { '__biz': 'MzI2OTA3MTA5Mg==', 'mid': '2651793256', 'idx': '1', 'sn': '91666add0a2f4dd320fa8c6065455cc4', 'chksm': 'f11e60f3c669e9e5a03ee6124e103d84f2fe802d464c8ca35c9259d728fc1ab199c7713fd7e3', 'scene': '4' } data_url_params = { '__biz': 'MzI2OTA3MTA5Mg==', 'appmsg_type': '9', 'mid': '2651793256', 'sn': '91666add0a2f4dd320fa8c6065455cc4', 'idx': '1', 'scene': '21', 'title': '%25E5%2588%2586%25E5%25BC%2580%25E5%25A4%259A%25E5%25B9%25B4%25E7%259A%2584%25E6%2583%2585%25E4%25BE%25A3%25E5%2586%258D%25E8%25A7%2581%25E9%259D%25A2%25EF%25BC%259A%25E8%25B6%258A%25E6%2598%25AF%25E4%25B8%258D%25E7%2594%2598%25E5%25BF%2583%25E7%259A%2584%25E7%2588%25B1%25E6%2583%2585%25EF%25BC%258C%25E8%25B6%258A%25E8%25A6%2581%25E4%25BA%25B2%25E6%2589%258B%25E4%25BA%2586%25E6%2596%25AD', 'ct': '1553356619', 'abtest_cookie': 'BAABAAoACwASABMABQAjlx4AVpkeAM2ZHgDZmR4A3JkeAAAA', 'devicetype': 'iOS12.2', 'version': '1700032a', 'f': 'json', 'r': '0.6048423341041006', 'is_need_ad': '1', 'both_ad': '0', 'reward_uin_count': '24', 'msg_daily_idx': '1', 'is_original': '0', 'uin': '777', 'key': '777', 'pass_ticket': 'Hbqp97LrbNKf6hgWuXAc%25252FoWp9JnaRMgX3yq1ipOP4jxPoVnmT43AB6HTH38c4prF', 'wxtoken': '204390160', 'clientversion': '26060030', 'appmsg_token': '1003_%252FsjVJ0I%252F2UkPyb6pioMmJ3EukhtGKS9urruwRw~~', 'x5': '1' } # url转义处理 content_url = html.unescape(post.content_url) # 截取content_url的查询参数部分 content_url_params = urlsplit(content_url).query # 将参数转化为字典类型 content_url_params = utils.str_to_dict(content_url_params, "&", "=") # 更新到data_url data_url_params.update(content_url_params) body = "is_only_read=1&req_id=2900i1sqRlQwikp0KEVJieW4&pass_ticket=Hbqp97LrbNKf6hgWuXAc%25252FoWp9JnaRMgX3yq1ipOP4jxPoVnmT43AB6HTH38c4prF&is_temp_url=0" data = utils.str_to_dict(body, "&", "=") headers = """ Host:mp.weixin.qq.com Accept-Encoding:br, gzip, deflate Cookie:devicetype=iOS12.2; lang=zh_CN; pass_ticket=Hbqp97LrbNKf6hgWuXAc/oWp9JnaRMgX3yq1ipOP4jxPoVnmT43AB6HTH38c4prF; version=1700032a; wap_sid2=CKuk/6UHElxyVVlQaE9JZDczaExNRXhBai1CQVlrRGNjT0l2RTVtdVdEcldQaThkZmdldjhEVmVsNlFhZ0FNdXl5SWFCYnlsLW9FMl9CRDFSdENQcUZDOUlXSWp2ZXNEQUFBfjCe75HlBTgNQJVO; wxuin=1958728235; wxtokenkey=777; rewardsn=; pgv_pvid=9658881596 Connection:keep-alive Accept:*/* User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/7.0.3(0x17000321) NetType/WIFI Language/zh_CN Referer:https://mp.weixin.qq.com/s?__biz=MzI2OTA3MTA5Mg==&mid=2651793256&idx=1&sn=91666add0a2f4dd320fa8c6065455cc4&chksm=f11e60f3c669e9e5a03ee6124e103d84f2fe802d464c8ca35c9259d728fc1ab199c7713fd7e3&scene=4&subscene=126&ascene=0&devicetype=iOS12.2&version=1700032a&nettype=WIFI&abtest_cookie=BAABAAoACwASABMABQAjlx4AVpkeAM2ZHgDZmR4A3JkeAAAA&lang=zh_CN&fontScale=94&pass_ticket=Hbqp97LrbNKf6hgWuXAc%2FoWp9JnaRMgX3yq1ipOP4jxPoVnmT43AB6HTH38c4prF&wx_header=1 Accept-Language:zh-cn X-Requested-With:XMLHttpRequest """ headers = utils.str_to_dict(headers) url = "https://mp.weixin.qq.com/mp/getappmsgext" r = requests.post(url, data=data, verify=False, params=data_url_params, headers=headers) result = r.json() if result.get("appmsgstat"): post['read_num'] = result.get("appmsgstat").get("read_num") post['like_num'] = result.get("appmsgstat").get("like_num") post['reward_num'] = result.get("reward_total_count") post['u_date'] = datetime.now() logger.info("「%s」read_num: %s like_num: %s reward_num: %s" % (post.title, post['read_num'], post['like_num'], post['reward_num'])) post.save() else: logger.warning(u"没有获取的真实数据,请检查请求参数是否正确,data=%s" % r.text)
def update_post(post): # 文章url参数 # title参数不用管,没有影响 data_url_params_url = "__biz=MjM5MzgyODQxMQ==&" \ "appmsg_type=9&" \ "mid=2650367799&" \ "sn=afd47703d8bfd41f547c3916a25a4922&" \ "idx=1&" \ "scene=38&" \ "title=%E6%8E%A8%E8%8D%90%E5%87%A0%E4%B8%AA%E5%85%AC%E4%BC%97%E5%8F%B7%EF%BC%88%E6%96%87%E6%9C%AB%E5%BD%A9%E8%9B%8B%EF%BC%89&" \ "ct=1515730434&" \ "abtest_cookie=BAABAAgACgAMAA0ACgCehh4AlYoeAKCKHgCxih4AuooeAL+KHgDGih4AyooeANiKHgDdih4AAAA=&devicetype=android-24&" \ "version=/mmbizwap/zh_CN/htmledition/js/appmsg/index3b1748.js&" \ "f=json&" \ "r=0.32110940912764097&" \ "is_need_ad=1&" \ "comment_id=4119591256&" \ "is_need_reward=0&" \ "both_ad=0&" \ "reward_uin_count=0&" \ "msg_daily_idx=1&" \ "is_original=0&" \ "uin=777&" \ "key=777&" \ "pass_ticket=IlqxjPEmgZu1FF%25252BoLPoEacmQD%25252Bpo%25252BSAKlMeJmuJgvxrZqkHisNKGim9X4iizUGDy&" \ "wxtoken=1044847233&devicetype=android-24&" \ "clientversion=26060135&" \ "appmsg_token=939_faosuyskmbgJ7K6WTQdsnO_RZ5I4iaY5qFSIpAp9fCvhtU1OKrI8rZ2vJHTu_c40JGJqPPbMvqn-UG1p&" \ "x5=1&f=json" data_url_params = utils.str_to_dict(data_url_params_url, "&", "=") # url转义处理,文档:https://www.cnblogs.com/xuxn/archive/2011/08/12/parse-html-escape-characters-in-python.html content_url = html.unescape(post.content_url) # 分拆url,提取query的信息,资料地址:http://blog.51cto.com/yucanghai/1695439 content_url_params = urlsplit(content_url).query # 将参数转化为字典 content_url_params = utils.str_to_dict(content_url_params, "&", "=") # 使用字典update方法,更新文章url的参数 data_url_params.update(content_url_params) body = "is_only_read=1&" \ "req_id=03230SZyTR8kQlPVkKwxbt1A&" \ "pass_ticket=mXHYjLnkYux1rXx8BxNrZpgW4W%25252ByLZxcuvpDWlxbBrjvJo3ECB%25252BckDAsy%25252FTJJK6P&" \ "is_temp_url=0" # 将body的值转化为字典 data = utils.str_to_dict(body, "&", "=") headers = """ Host: mp.weixin.qq.com Connection: keep-alive Content-Length: 155 Origin: https://mp.weixin.qq.com X-Requested-With: XMLHttpRequest User-Agent: Mozilla/5.0 (Linux; Android 7.0; Mi Note 2 Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/043807 Mobile Safari/537.36 MicroMessenger/6.6.1.1220(0x26060135) NetType/WIFI Language/zh_CN Content-Type: application/x-www-form-urlencoded; charset=UTF-8 Accept: */* Referer: https://mp.weixin.qq.com/s?__biz=MjM5MzgyODQxMQ==&mid=2650367799&idx=1&sn=afd47703d8bfd41f547c3916a25a4922&chksm=be9cdc6389eb5575f75e424c2351969e3d6b066cb1d60466d4865381f8de5d41867ca2a526b1&scene=38&ascene=0&devicetype=android-24&version=26060135&nettype=WIFI&abtest_cookie=BAABAAgACgAMAA0ACgCehh4AlYoeAKCKHgCxih4AuooeAL%2BKHgDGih4AyooeANiKHgDdih4AAAA%3D&lang=zh_CN&pass_ticket=IlqxjPEmgZu1FF%2BoLPoEacmQD%2Bpo%2BSAKlMeJmuJgvxrZqkHisNKGim9X4iizUGDy&wx_header=1 Accept-Encoding: gzip, deflate Accept-Language: zh-CN,en-US;q=0.8 Cookie: sd_userid=18861489362350793; sd_cookie_crttime=1489362350793; tvfe_boss_uuid=2e9655075626eb61; RK=CaObR3LaR5; gid=82fdcd05-0c3d-415b-bcdd-87cbe92d7cc4; pgv_pvi=2601810944; pac_uid=0_932f1aa5f4e35; ua_id=kE9kVdDwMCCFio0aAAAAAOmF24i23FqG2pg7TGWEqiQ=; ptcz=20c41fc2cfc570cec0dcf5cf052e240af82cfe5b2839f4ae1280b7e3d71a81cb; pt2gguin=o0364580936; o_cookie=364580936; pgv_pvid=8564607811; rewardsn=; wxtokenkey=90c34bc971c6ce5a8df3de87ab70143e0d6711e65604c5feedb0b6b425311912; wxuin=2076741001; devicetype=android-24; version=26060135; lang=zh_CN; pass_ticket=IlqxjPEmgZu1FF+oLPoEacmQD+po+SAKlMeJmuJgvxrZqkHisNKGim9X4iizUGDy; wap_sid2=CImbot4HElw4RE5GUGcyWFFQV1drdFJ5cXpZMG5yMm9qWWtwZFRoQml0TFZUVnpXVGU1RThIS3hJejhsSzFZUWw3cTdGbXVjTTdWZkpVVGVNTmhCR3l3U2FBMURGYXNEQUFBfjCzw/HSBTgNQAE= Q-UA2: QV=3&PL=ADR&PR=WX&PP=com.tencent.mm&PPVN=6.6.1&TBSVC=43602&CO=BK&COVC=043807&PB=GE&VE=GA&DE=PHONE&CHID=0&LCID=9422&MO= MiNote2 &RL=1080*1920&OS=7.0&API=24 Q-GUID: 2af3a0dfa8f770bff552a88a13b788cb Q-Auth: 31045b957cf33acf31e40be2f3e71c5217597676a9729f1b """ headers = utils.str_to_dict(headers) # 文章url data_url = "https://mp.weixin.qq.com/mp/getappmsgext" r = requests.post(data_url, data=data, verify=False, params=data_url_params, headers=headers) # 保存数据 result = r.json() if result.get("appmsgstat"): # 阅读数 post['read_num'] = result.get("appmsgstat").get("read_num") # 点赞数 post['like_num'] = result.get("appmsgstat").get("like_num") # 赞赏数 post['reward_num'] = result.get("appmsgstat").get( "reward_total_count") # post['u_date'] = datetime.now() logger.info("%s read_num: %s like_num: %s reward_num: %s" % (post.title, post['read_num'], post['like_num'], post['reward_num'])) post.save() else: logger.warning(u"出错") exit()
def crawl(self, offset=0): # 如果运行失败,请修改url某些关键值和headers,下面有对比 url = "https://mp.weixin.qq.com/mp/profile_ext?" \ "action=getmsg&" \ "__biz=MjM5MzgyODQxMQ==&" \ "f=json&" \ "offset={offset}&" \ "count=10&" \ "is_ok=1&" \ "scene=124&" \ "uin=777&" \ "key=777&" \ "pass_ticket=IlqxjPEmgZu1FF%2BoLPoEacmQD%2Bpo%2BSAKlMeJmuJgvxrZqkHisNKGim9X4iizUGDy&wxtoken=&" \ "appmsg_token=939_hihttu0x83ppBkaiAcL5WMcghKBKviEePk53yg~~&x5=1&f=json".format(offset=offset) url2 = "https://mp.weixin.qq.com/mp/profile_ext?" \ "action=getmsg&" \ "__biz=MjM5MzgyODQxMQ==&" \ "f=json&" \ "offset={offset}&" \ "count=10&" \ "is_ok=1&" \ "scene=124&" \ "uin=777&key=777&" \ "pass_ticket=IlqxjPEmgZu1FF%" \ "2BoLPoEacmQD%" \ "2Bpo%2BSAKlMeJmuJgvxrZqkHisNKGim9X4iizUGDy&wxtoken=&" \ "appmsg_token=939_oSn%" \ "252BfxCfRbQkDYBJvf52NedMZCifsAekP2Am7g~~&" \ "x5=1&f=json".format(offset=offset) url1 = "https://mp.weixin.qq.com/mp/profile_ext?" \ "action=getmsg&" \ "__biz=MjM5MzgyODQxMQ==&" \ "f=json&" \ "offset={offset}&" \ "count=10&is_ok=1&" \ "scene=124&" \ "uin=777&key=777&" \ "pass_ticket=9%" \ "2F6LknoxSlydxsdNc5ecV9AmhvAln77E5CSdIKk%2BZoSh7xDCXPx6RXPty5pMV7Vz&wxtoken=&" \ "appmsg_token=939_5uFg4avbgDmzVEO0qQT_gALmbvliqJeaQDCp4g~~&" \ "x5=1&" \ "f=json".format(offset=offset) headers = """ Host: mp.weixin.qq.com Connection: keep-alive X-Requested-With: XMLHttpRequest User-Agent: Mozilla/5.0 (Linux; Android 7.0; Mi Note 2 Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/043807 Mobile Safari/537.36 MicroMessenger/6.6.1.1220(0x26060135) NetType/WIFI Language/zh_CN Accept: */* Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5MzgyODQxMQ==&scene=124&devicetype=android-24&version=26060135&lang=zh_CN&nettype=WIFI&a8scene=3&pass_ticket=IlqxjPEmgZu1FF%2BoLPoEacmQD%2Bpo%2BSAKlMeJmuJgvxrZqkHisNKGim9X4iizUGDy&wx_header=1 Accept-Encoding: gzip, deflate Accept-Language: zh-CN,en-US;q=0.8 Cookie: sd_userid=18861489362350793; sd_cookie_crttime=1489362350793; tvfe_boss_uuid=2e9655075626eb61; RK=CaObR3LaR5; gid=82fdcd05-0c3d-415b-bcdd-87cbe92d7cc4; pgv_pvi=2601810944; pac_uid=0_932f1aa5f4e35; ua_id=kE9kVdDwMCCFio0aAAAAAOmF24i23FqG2pg7TGWEqiQ=; ptcz=20c41fc2cfc570cec0dcf5cf052e240af82cfe5b2839f4ae1280b7e3d71a81cb; pt2gguin=o0364580936; o_cookie=364580936; pgv_pvid=8564607811; rewardsn=; wxtokenkey=40c4e47bfcfbdb3be81209fce95dd5ac82cb3adacf5b6b224f97bba30c96dabb; wxuin=2076741001; devicetype=android-24; version=26060135; lang=zh_CN; pass_ticket=IlqxjPEmgZu1FF+oLPoEacmQD+po+SAKlMeJmuJgvxrZqkHisNKGim9X4iizUGDy; wap_sid2=CImbot4HElxoZThTLWNjNnBYUHcxMHU1c2ROU21SalFmSHl3ajlrczNJRU5UVVJfUDNjeEtaZVVqbDBYTTlqU2xTVVpqcXB5cGdUOGlSS3c1RUpMX2lMQ21QNk1nS3NEQUFBfjCu0vHSBTgMQJRO Q-UA2: QV=3&PL=ADR&PR=WX&PP=com.tencent.mm&PPVN=6.6.1&TBSVC=43602&CO=BK&COVC=043807&PB=GE&VE=GA&DE=PHONE&CHID=0&LCID=9422&MO= MiNote2 &RL=1080*1920&OS=7.0&API=24 Q-GUID: 2af3a0dfa8f770bff552a88a13b788cb Q-Auth: 31045b957cf33acf31e40be2f3e71c5217597676a9729f1b """ headers = utils.str_to_dict(headers) response = requests.get(url, headers=headers, verify=False) result = response.json() if result.get("ret") == 0: msg_list = result.get("general_msg_list") logger.info("抓取数据:offset=%s, data=%s" % (offset, msg_list)) # 保存文章数据 self.save(msg_list) has_next = result.get("can_msg_continue") if has_next == 1: # 递归 next_offset = result.get("next_offset") time.sleep(2) self.crawl(next_offset) else: # 错误消息 logging.error("无法正确获得内容") exit()
def update_post(post): """ post 参数是从mongodb读取出来的一条数据 稍后就是对这个对象进行更新保存 :param post: :return: """ data_url = "https://mp.weixin.qq.com/mp/getappmsgext?" \ "f=json&uin=777&" \ "key=777&" \ "pass_ticket=mxvGMDk3GtQtB%25252Fz7%25252FamxY8wfvoTGUfjVEBxYjf4M2oAEyk15qbbpyu6tf%25252BxoQE91&" \ "wxtoken=777&" \ "devicetype=android-19&" \ "cientversion=26051732&" \ "appmsg_token=954_MOJLemLCd3Gz0Aal3PPp0Qmza2kN9ibwtA0IyosHLQlOgxiRlgtrWkHFwC4lgSeOA0FmtshLamC-6ysv&x5=1&" \ "f=json" #后续需要加载文章的url,刷新appmsg_token和pass_ticket # url转义处理 content_url = html.unescape(post.content_url) # 截取content_url的查询参数部分 content_url_params = urlsplit(content_url).query # 将参数转化为字典类型 content_url_params = utils.str_to_dict(content_url_params, "&", "=") body = 'r=0.9146884263687687&__biz=MzUzNTcwNDkxNA%3D%3D&appmsg_type=9&mid=2247484203&sn=a0dbce888297a156a4e9c0542094e286&idx=1&scene=38&title=%25E5%25A4%259A%25E5%25B0%2591%25E4%25BA%25BA%25E5%259B%25A0%25E4%25B8%25BA%25E7%2594%25B5%25E5%25BD%25B1%25E5%258E%25BB%25E4%25BA%2586%25E8%25A5%25BF%25E8%2597%258F&ct=1524824855&abtest_cookie=BAABAAoACwAMAA0ABwA8ix4Ad4seAPKMHgBkjR4Af40eACaOHgAxjh4AAAA%3D&devicetype=android-19&version=%2Fmmbizwap%2Fzh_CN%2Fhtmledition%2Fjs%2Fappmsg%2Findex3d6703.js&is_need_ticket=0&is_need_ad=1&comment_id=0&is_need_reward=0&both_ad=0&reward_uin_count=0&send_time=&msg_daily_idx=1&is_original=0&is_only_read=1&req_id=0100A6NrRHtU5GqTevtLXpPn&pass_ticket=mxvGMDk3GtQtB%25252Fz7%25252FamxY8wfvoTGUfjVEBxYjf4M2oAEyk15qbbpyu6tf%25252BxoQE91&is_temp_url=0&item_show_type=undefined' data = utils.str_to_dict(body, "&", "=") data.update(content_url_params) #将content_url中的参数更新到body参数中 # 通过Fiddler 获取 最新的值 headers = """ Host: mp.weixin.qq.com Connection: keep-alive Content-Length: 796 Origin: https://mp.weixin.qq.com X-Requested-With: XMLHttpRequest User-Agent: Mozilla/5.0 (Linux; Android 4.4.2; GT-I9508 Build/KOT49H; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/044028 Mobile Safari/537.36 MicroMessenger/6.5.23.1180 NetType/WIFI Language/zh_CN Content-Type: application/x-www-form-urlencoded; charset=UTF-8 Accept: */* Referer: https://mp.weixin.qq.com/s?__biz=MzUzNTcwNDkxNA==&mid=2247484203&idx=1&sn=a0dbce888297a156a4e9c0542094e286&chksm=fa802716cdf7ae001b3e2b539fc790b997bee12233d1f5a6cb5add4b6a9df979fd586d6b7191&scene=38&ascene=0&devicetype=android-19&version=26051732&nettype=WIFI&abtest_cookie=BAABAAoACwAMAA0ABwA8ix4Ad4seAPKMHgBkjR4Af40eACaOHgAxjh4AAAA%3D&lang=zh_CN&pass_ticket=mxvGMDk3GtQtB%2Fz7%2FamxY8wfvoTGUfjVEBxYjf4M2oAEyk15qbbpyu6tf%2BxoQE91&wx_header=1 Accept-Encoding: gzip, deflate Accept-Language: zh-CN,en-US;q=0.8 Cookie: rewardsn=; wxtokenkey=777; wxuin=2077110005; devicetype=android-19; version=26051732; lang=zh_CN; pass_ticket=mxvGMDk3GtQtB/z7/amxY8wfvoTGUfjVEBxYjf4M2oAEyk15qbbpyu6tf+xoQE91; wap_sid2=CPXduN4HElxKRHdkalRfS3hya3FLamVMa0FzMFNCeFR5eV95d0ZQZzdZQUw4TjFYMHY4RVFiQWtaRVN5TlAwTEEtTkNWd3NURnJ1V2VCUTRXemdSMjdXZzN5Z3E2Ym9EQUFBfjDhgZ3XBTgNQAE= Q-UA2: QV=3&PL=ADR&PR=WX&PP=com.tencent.mm&PPVN=6.5.23&TBSVC=43602&CO=BK&COVC=044028&PB=GE&VE=GA&DE=PHONE&CHID=0&LCID=9422&MO= GT-I9508 &RL=1080*1920&OS=4.4.2&API=19 Q-GUID: dfbda8a63deb21e36f9d6f1113b788cb Q-Auth: 31045b957cf33acf31e40be2f3e71c5217597676a9729f1b """ headers = utils.headers_to_dict(headers) r = requests.post(data_url, data=data, verify=False, params=data_url_params, headers=headers) result = r.json() if result.get("appmsgstat"): post['read_num'] = result.get("appmsgstat").get("read_num") post['like_num'] = result.get("appmsgstat").get("like_num") post['reward_num'] = result.get( "reward_total_count") #只有文章有赞赏的时候才会有此字段 post['u_date'] = datetime.now() logger.info("「%s」read_num: %s like_num: %s reward_num: %s" % (post.title, post['read_num'], post['like_num'], post['reward_num'])) post.save() else: logger.warning(u"没有获取的真实数据,请检查请求参数是否正确,返回的数据为:data=%s" % r.text) exit()
if __name__ == '__main__': crawler = WeiXinCrawler() offset = 10 url = "https://mp.weixin.qq.com/mp/profile_ext?" \ "action=getmsg&" \ "__biz=MjM5MjAxNDM4MA==&" \ "f=json&" \ "offset={offset}&" \ "count=10&" \ "is_ok=1&" \ "scene=126&" \ "uin=777&" \ "key=777&" \ "pass_ticket=NqOMSpSgkqt7TbKCa5PTGIIpnzJAF62XnhxKoXaomMNOZXeRivMUoIIh4SEk2vEl&" \ "wxtoken=&" \ "appmsg_token=1001_2i83IvTNI8q300WZIViVq4aBpcsB_dQcmtYBVw~~&" \ "x5=0&" \ "f=json" # 请将appmsg_token和pass_ticket替换成你自己的 headers = """ Host: mp.weixin.qq.com Accept-Encoding: br, gzip, deflate Cookie: devicetype=iOS12.1.4; lang=zh_CN; pass_ticket=NqOMSpSgkqt7TbKCa5PTGIIpnzJAF62XnhxKoXaomMNOZXeRivMUoIIh4SEk2vEl; version=17000329; wap_sid2=CITbl7QCElxDcVVSblZ5Y3NnVkwxblE4NUZsNGNOVm9Dd05YVnprZWQtdFVSYVQxYk9Edmx0bEVBZWNuWXhyVmRXVjctczZfX1BCdVphZzNBZV9YRmJPNTYwTUF2ZWtEQUFBfjCDqNLkBTgNQJVO; wxuin=646311300; wxtokenkey=777; rewardsn=; pgv_pvid=8969838003; pgv_pvi=60331008; _scan_has_moon=1; ts_uid=3719156268; tvfe_boss_uuid=5682fb061e2059e3; sd_cookie_crttime=1545398450253; sd_userid=35071545398450253 Connection: keep-alive Accept: */* User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/16D57 MicroMessenger/7.0.3(0x17000321) NetType/WIFI Language/zh_CN Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5MjAxNDM4MA==&scene=126&bizpsid=1553240795&sessionid=1553240795&subscene=0&devicetype=iOS12.1.4&version=17000329&lang=zh_CN&nettype=WIFI&a8scene=0&fontScale=100&pass_ticket=NqOMSpSgkqt7TbKCa5PTGIIpnzJAF62XnhxKoXaomMNOZXeRivMUoIIh4SEk2vEl&wx_header=1 Accept-Language: zh-cn X-Requested-With: XMLHttpRequest""" headers = utils.str_to_dict(headers) #crawler.crawl(url,headers,10)
def receive_and_process_new_question(self): data = self.client.recv() self.current_question = utils.str_to_dict(data) self.set_question(self.current_question["question"]) self.ui.txt_answer.insert(INSERT, "")
def update_post(post): """ post 参数是从mongodb读取出来的一条数据 稍后就是对这个对象进行更新保存 :param post: :return: """ # 这个参数是我从Fiddler中拷贝出 URL,然后提取出查询参数部分再转换成字典对象 # 稍后会作为参数传给request.post方法 data_url_params = { '__biz': 'MzA3NTY3NjUzMg==', 'appmsg_type': '9', 'mid': '2652567742', 'sn': 'c1e80723b9982b85e825e1070dff2cf3', 'idx': '1', 'scene': '38', 'title': '%E9%80%89%E6%88%91%E6%B2%A1%E9%94%99%EF%BC%81%E5%B7%9D%E5%A4%A7%E9%94%A6%E6%B1%9F%E6%8B%8D%E4%BA%86%E6%8B%8D%E4%BD%A0', 'ct': '1595660850', 'abtest_cookie': '', 'devicetype': 'Windows 10 x64', 'version': '62090529', 'f': 'json', 'r': '0.815280901005569', 'is_need_ad': '0', 'comment_id': '1443991147038441474', 'is_need_reward': '1', 'both_ad': '0', 'reward_uin_count': '24', 'msg_daily_idx': '1', 'is_original': '0', 'uin': 'MzAzMDI4NDA4OQ==', 'key': '1682b2315ca9ee496ced1069afbde4e355182c3cc85404049b57d7403fc7434524ed3579f779a4426cfbcd977a1047432db2e171f317e5c2f9ddea95c1f462e8f24e5590746562ab482cd133b73a7682', 'pass_ticket': 'ZaU5UjhVDMVvsOzkK4i3B/jZubj7dI36zAoLaLuE52eYyRmdeP68+Ffs6uiXclfr', 'wxtoken': '777', 'clientversion': '62090529', 'appmsg_token': '1072_WbRi1PaTNQtHiysKbI2hg1LUqGTnT1OboYeqO0MRHgalIVUArGz-Q8bsEnuoNL3swxRUvL-HSZUgMxFT', 'x5': '0' } # appmsg_token 记得用最新的 # url转义处理 content_url = html.unescape(post.content_url) # 截取content_url的查询参数部分 content_url_params = urlsplit(content_url).query # 将参数转化为字典类型 content_url_params = utils.str_to_dict(content_url_params, "&", "=") # 更新到data_url data_url_params.update(content_url_params) body = "r=0.815280901005569&__biz=MzA3NTY3NjUzMg%3D%3D&appmsg_type=9&mid=2652567742&sn=c1e80723b9982b85e825e1070dff2cf3&idx=1&scene=38&title=%25E9%2580%2589%25E6%2588%2591%25E6%25B2%25A1%25E9%2594%2599%25EF%25BC%2581%25E5%25B7%259D%25E5%25A4%25A7%25E9%2594%25A6%25E6%25B1%259F%25E6%258B%258D%25E4%25BA%2586%25E6%258B%258D%25E4%25BD%25A0&ct=1595660850&abtest_cookie=&devicetype=Windows%2010%20x64&version=62090529&is_need_ticket=0&is_need_ad=0&comment_id=1443991147038441474&is_need_reward=0&both_ad=0&reward_uin_count=0&send_time=&msg_daily_idx=1&is_original=0&is_only_read=1&req_id=0422J3pAP3U6ZfMyvrA9qY2z&pass_ticket=ZaU5UjhVDMVvsOzkK4i3B%2FjZubj7dI36zAoLaLuE52eYyRmdeP68%2BFfs6uiXclfr&is_temp_url=0&item_show_type=0&tmp_version=1&more_read_type=0&appmsg_like_type=2&related_video_sn=&related_video_num=4&vid=&is_pay_subscribe=0&pay_subscribe_uin_count=0&has_red_packet_cover=0&album_id=1296223588617486300&album_video_num=5" data = utils.str_to_dict(body, "&", "=") # 通过Fiddler 获取 最新的值 headers = """ Host: mp.weixin.qq.com Connection: keep-alive Content-Length: 919 Origin: https://mp.weixin.qq.com X-Requested-With: XMLHttpRequest User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.5 WindowsWechat Content-Type: application/x-www-form-urlencoded; charset=UTF-8 Accept: */* Referer: https://mp.weixin.qq.com/s?__biz=MzA3NTY3NjUzMg==&mid=2652567742&idx=1&sn=c1e80723b9982b85e825e1070dff2cf3&chksm=8482b6a5b3f53fb3970b3b3701415037bf633b06da98d2dc4c63d476e9a5ce7bd8762d5b27a0&scene=38&key=1682b2315ca9ee496ced1069afbde4e355182c3cc85404049b57d7403fc7434524ed3579f779a4426cfbcd977a1047432db2e171f317e5c2f9ddea95c1f462e8f24e5590746562ab482cd133b73a7682&ascene=7&uin=MzAzMDI4NDA4OQ%3D%3D&devicetype=Windows+10+x64&version=62090529&lang=zh_CN&exportkey=A%2Fuw0CgT2zB13kyRA3OogXM%3D&pass_ticket=ZaU5UjhVDMVvsOzkK4i3B%2FjZubj7dI36zAoLaLuE52eYyRmdeP68%2BFfs6uiXclfr&winzoom=1 Accept-Encoding: gzip, deflate Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.5;q=0.4 Cookie: rewardsn=; wxtokenkey=777; wxuin=3030284089; devicetype=Windows10x64; version=62090529; lang=zh_CN; pass_ticket=ZaU5UjhVDMVvsOzkK4i3B/jZubj7dI36zAoLaLuE52eYyRmdeP68+Ffs6uiXclfr; wap_sid2=CLnu+aQLElxUNi05blptTWUxVDN3dnI0blRoUXJVcWhOSmpIMWJWWW55SThUcGpHREtQMjJNRWtZMDRtOEhxMUJyNGZfWDZXbUd5MWtReGxXWkd0NFZDNTZMOFFEekFFQUFBfjD16aX5BTgNQAE= """ headers = utils.str_to_dict(headers) data_url = "https://mp.weixin.qq.com/mp/getappmsgext" r = requests.post(data_url, data=data, verify=False, params=data_url_params, headers=headers) result = r.json() if result.get("appmsgstat"): post['read_num'] = result.get("appmsgstat").get("read_num") post['like_num'] = result.get("appmsgstat").get("like_num") post['reward_num'] = result.get("reward_total_count") post['u_date'] = datetime.now() logger.info("「%s」read_num: %s like_num: %s reward_num: %s" % (post.title, post['read_num'], post['like_num'], post['reward_num'])) post.save() else: # data={"base_resp":{"ret":301,"errmsg":"default"}}这是微信的反扒机制 logger.warning(u"没有获取的真实数据,请检查请求参数是否正确,返回的数据为:data=%s" % r.text) exit()
def update(self, post): post_url_params = { '__biz': 'MjM5MzgyODQxMQ==', 'mid': '2650367149', 'idx': '1', 'sn': '5b9bc4a8029e7eb9b8a4b71d06524da9', 'chksm': 'be9cdff989eb56ef143d5b03fab7e825f08ea6a96d041aa1da50e78e765a75e60d49b42d9bf6', 'scene': '27' } url_params = { '__biz': 'MjM5MzgyODQxMQ==', 'appmsg_type': '9', 'mid': '2650367680', 'sn': '2e8ef8bcf4dc176c46376508cb5a8fa7', 'idx': '1', 'scene': '21', 'title': '%E5%85%B3%E4%BA%8E%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F%E7%9A%845%E4%B8%AA%E5%B0%8F%E8%B4%B4%E5%A3%AB', 'ct': '1513900976', 'abtest_cookie': 'AwABAAoADAANAAcAJIgeALuIHgDhiB4A/IgeAPqJHgAZih4ATYoeAAAA', 'devicetype': 'android-24', 'version': '/mmbizwap/zh_CN/htmledition/js/appmsg/index3a9713.js', 'f': 'json', 'r': '0.7675446466698528', 'is_need_ad': '1', 'comment_id': '3799137919', 'is_need_reward': '1', 'both_ad': '0', 'reward_uin_count': '24', 'msg_daily_idx': '1', 'is_original': '0', 'uin': '777', 'key': '777', 'pass_ticket': 'J1PFXucN0v4vmF19Pkngffyo4CvzTAkiJNdFJN9uQNIMVLMBFeSl6P8zbfwBJ4sO', 'wxtoken': '204390160', 'clientversion': '26060030', 'appmsg_token': '937_D8gMA6eZWUYVZo6QUXO6keTPdtbgwSEexQWAhnI8XvC1V1BMh3m05cmSURoPtkr5ppr0iDTw7bWgBkMr', 'x5': '1' } from urllib.parse import urlsplit import html url_params.update( utils.str_to_dict2( urlsplit(html.unescape(post.content_url)).query, "&", "=")) body = "is_only_read=1&req_id=2900i1sqRlQwikp0KEVJieW4&pass_ticket=J1PFXucN0v4vmF19Pkngffyo4CvzTAkiJNdFJN9uQNIMVLMBFeSl6P8zbfwBJ4sO&is_temp_url=0" data = utils.str_to_dict2(body, "&", "=") headers = """ Host: mp.weixin.qq.com Accept-Encoding: br, gzip, deflate Cookie: devicetype=iOS11.2.1; lang=zh_CN; pass_ticket=wj2j8pp2XnstT6wupUoiIxRoY2JjT3FOCEPXdBYhs7CPwSzfTIogmELiy3YZ4mRs; version=16060124; wap_sid2=CLzPvfkEEnBnRTdYc3Uxa1B4NzcxdUR3T3pyNEUwYVBRb09SQlk0MWk1bTVkdVdkUlpSdTh4QUExdWNVYkVxLTN0NnVVWVZhYk5VRXJCT1hpSmI1N0FCaDNmVURBclVSSlhOcDZXZDNpTXJPZkQ0NVd1S3FBd0FBMJ3S1tIFOAxAlE4=; wxuin=1328506812; wxtokenkey=f9b3cef513031f0f50b8c9d88862ea0a8e44ae03f2dabb3e1e7d6b6f68c2233c; bk_token=fc1a7e18-e120-4f0a-a419-e1650f99b989; is_login=wx; is_wechat=1; platform=qq; pgv_pvid=6280301858; _scan_has_moon=1; eas_sid=Q1g5I1q201w0y4d0o2W5z3N4j4; _ga=GA1.2.557769603.1510924852; pac_uid=0_64f45f141b0a7; sd_cookie_crttime=1508507376988; sd_userid=58871508507376987 Connection: keep-alive Accept: */* User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_1 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Mobile/15C153 MicroMessenger/6.6.1 NetType/WIFI Language/zh_CN Referer: https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5MTY3OTYyMQ==&scene=124&devicetype=iOS11.2.1&version=16060124&lang=zh_CN&nettype=WIFI&a8scene=3&fontScale=100&pass_ticket=wj2j8pp2XnstT6wupUoiIxRoY2JjT3FOCEPXdBYhs7CPwSzfTIogmELiy3YZ4mRs&wx_header=1 Accept-Language: zh-cn X-Requested-With: XMLHttpRequest """ headers = utils.str_to_dict(headers) url = "https://mp.weixin.qq.com/mp/getappmsgext" r = requests.post(url, data=data, verify=False, params=url_params, headers=headers) result = r.json() if result.get("appmsgstat"): post['read_num'] = result.get("appmsgstat").get("read_num") post['like_num'] = result.get("appmsgstat").get("like_num") post['reward_num'] = result.get("reward_total_count") post['u_date'] = datetime.now() logger.info("「%s」read_num: %s like_num: %s reward_num: %s" % (post.title, post['read_num'], post['like_num'], post['reward_num'])) post.save() else: logger.warning(u"没有获取的真实数据,请检查请求参数是否正确,data=%s" % r.text)
def update_post(post): """ :param post: :type: object mongodb中读取出来的一条数据 :return: """ data_url_params = { '__biz': 'MjM5MzgyODQxMQ==', 'appmsg_type': '9', 'mid': '2650367961', 'sn': 'f519e5549538ac753ff8887707421df5', 'idx': '1', 'scene': '38', 'title': post.title, 'ct': '1519553936', 'abtest_cookie': 'AgABAAoADAAKAJmKHgCmih4AzooeAOqKHgAoix4APoseAEmLHgCNix4AoIseAKeLHgAAAA==', 'devicetype': 'android-22', 'version': '/mmbizwap/zh_CN/htmledition/js/appmsg/index3baf4b.js', 'f': 'json', 'r': '0.5196830451803642', 'is_need_ad': '1', 'comment_id': '1290332442', 'is_need_reward': '0', 'both_ad': '0', 'reward_uin_count': '0', 'msg_daily_idx': '1', 'is_original': '0', 'uin': '777', 'key': '777', 'pass_ticket': 'zTKHNEdnkkZnPKGrHZa9HSJG%252BFfcj38wC8ciLpAWzMsVRX2crPclnU2gSLX6h1EX', 'wxtoken': '1574516728', 'devicetype': 'android-22', 'clientversion': '26060339', 'appmsg_token': appmsg_token, 'x5': '1', 'f': 'json' } content_params = dict() # url转义 content_url = html.unescape(post.content_url) # 截取content_url查询参数部分 content_url_params = urlsplit(content_url).query.split('&') # 更新到content_url_params for cup in content_url_params: k, v = cup.split('=', 1) content_params[k] = v data_url_params.update(content_params) data_url = 'https://mp.weixin.qq.com/mp/getappmsgext' headers = ''' Host: mp.weixin.qq.com Connection: keep-alive Content-Length: 143 Origin: https://mp.weixin.qq.com X-Requested-With: XMLHttpRequest User-Agent: Mozilla/5.0 (Linux; Android 5.1.1; SM801 Build/LMY47V; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/043909 Mobile Safari/537.36 MicroMessenger/6.6.3.1260(0x26060339) NetType/WIFI Language/zh_CN Content-Type: application/x-www-form-urlencoded; charset=UTF-8 Accept: */* Referer: https://mp.weixin.qq.com/s?__biz=MjM5MzgyODQxMQ==&mid=2650367961&idx=1&sn=f519e5549538ac753ff8887707421df5&chksm=be9cdc8d89eb559b396a5cb61a25844a1da42c8e65ef225683c91d0f94cc2de507099ca25ec4&scene=38&ascene=0&devicetype=android-22&version=26060339&nettype=WIFI&abtest_cookie=AgABAAoADAAKAJmKHgCmih4AzooeAOqKHgAoix4APoseAEmLHgCNix4AoIseAKeLHgAAAA%3D%3D&lang=zh_CN&pass_ticket=zTKHNEdnkkZnPKGrHZa9HSJG%2BFfcj38wC8ciLpAWzMsVRX2crPclnU2gSLX6h1EX&wx_header=1 Accept-Encoding: gzip, deflate Accept-Language: zh-CN,en-US;q=0.8 Cookie: {cookie} Q-UA2: QV=3&PL=ADR&PR=WX&PP=com.tencent.mm&PPVN=6.6.3&TBSVC=43603&CO=BK&COVC=043909&PB=GE&VE=GA&DE=PHONE&CHID=0&LCID=9422&MO= SM801 &RL=1080*1920&OS=5.1.1&API=22 Q-GUID: 1cd3b3a6ff71cc42acdc78d213b788cb Q-Auth: 31045b957cf33acf31e40be2f3e71c5217597676a9729f1b '''.format(cookie=cookie) headers = utils.str_to_dict(headers) # 藏在返回的text中 body = { 'is_only_read': '1', 'req_id': '0414', 'NBNjylwrVHDydtl3ufse': None, 'pass_ticket': 'zpU4AwNXTGS5LfBXFx4NCyMo5YTpSQo9RarrPG3tjhmMaGfORzykNNviX7IlM4i0', 'is_temp_url': 0 } r = requests.post(data_url, data=data_url_params, params=body, headers=headers, verify=False) result = r.json() if result.get('appmsgstat'): print(result.get('appmsgstat')) post['read_num'] = result.get("appmsgstat").get("read_num") post['like_num'] = result.get("appmsgstat").get("like_num") post['reward_num'] = result.get("reward_total_count") post['u_date'] = datetime.now() logger.info("「%s」read_num: %s like_num: %s reward_num: %s" % (post.title, post['read_num'], post['like_num'], post['reward_num'])) post.save() else: logger.error(u"没有获取的真实数据,请检查请求参数是否正确,返回的数据为:data=%s" % r.text) exit()
def __init__(self): self.host_info = utils.str_to_dict( pexpect.run("/bin/sh -c 'cat /etc/*-release'").decode(), '\r\n', '=')
def update(self, post): post_url_params = { '__biz': 'MjM5MzgyODQxMQ==', 'mid': '2650367149', 'idx': '1', 'sn': '5b9bc4a8029e7eb9b8a4b71d06524da9', 'chksm': 'be9cdff989eb56ef143d5b03fab7e825f08ea6a96d041aa1da50e78e765a75e60d49b42d9bf6', 'scene': '27' } url_params = { '__biz': 'MjM5MzgyODQxMQ==', 'appmsg_type': '9', 'mid': '2650367680', 'sn': '2e8ef8bcf4dc176c46376508cb5a8fa7', 'idx': '1', 'scene': '21', 'title': '%E5%85%B3%E4%BA%8E%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F%E7%9A%845%E4%B8%AA%E5%B0%8F%E8%B4%B4%E5%A3%AB', 'ct': '1513900976', 'abtest_cookie': 'AwABAAoADAANAAcAJIgeALuIHgDhiB4A/IgeAPqJHgAZih4ATYoeAAAA', 'devicetype': 'android-24', 'version': '/mmbizwap/zh_CN/htmledition/js/appmsg/index3a9713.js', 'f': 'json', 'r': '0.7675446466698528', 'is_need_ad': '1', 'comment_id': '3799137919', 'is_need_reward': '1', 'both_ad': '0', 'reward_uin_count': '24', 'msg_daily_idx': '1', 'is_original': '0', 'uin': '777', 'key': '777', 'pass_ticket': 'J1PFXucN0v4vmF19Pkngffyo4CvzTAkiJNdFJN9uQNIMVLMBFeSl6P8zbfwBJ4sO', 'wxtoken': '204390160', 'clientversion': '26060030', 'appmsg_token': '937_D8gMA6eZWUYVZo6QUXO6keTPdtbgwSEexQWAhnI8XvC1V1BMh3m05cmSURoPtkr5ppr0iDTw7bWgBkMr', 'x5': '1' } from urllib.parse import urlsplit import html url_params.update( utils.str_to_dict( urlsplit(html.unescape(post.content_url)).query, "&", "=")) body = "is_only_read=1&req_id=2900i1sqRlQwikp0KEVJieW4&pass_ticket=J1PFXucN0v4vmF19Pkngffyo4CvzTAkiJNdFJN9uQNIMVLMBFeSl6P8zbfwBJ4sO&is_temp_url=0" data = utils.str_to_dict(body, "&", "=") headers = """ Host: mp.weixin.qq.com Connection: keep-alive Content-Length: 137 Origin: https://mp.weixin.qq.com X-Requested-With: XMLHttpRequest User-Agent: Mozilla/5.0 (Linux; Android 7.0; M1 E Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 MicroMessenger/6.6.1200(0x26060030) NetType/WIFI Language/zh_CN Content-Type: application/x-www-form-urlencoded; charset=UTF-8 Accept: */* Referer: https://mp.weixin.qq.com/s?__biz=MjM5MzgyODQxMQ==&mid=2650367680&idx=1&sn=2e8ef8bcf4dc176c46376508cb5a8fa7&chksm=be9cdd9489eb54822dc5993ff71050ca9011aff07fdf642b3eccdee7e20dc2efad9f21fb1a63&scene=21&ascene=7&devicetype=android-24&version=26060030&nettype=WIFI&abtest_cookie=AwABAAoADAANAAcAJIgeALuIHgDhiB4A%2FIgeAPqJHgAZih4ATYoeAAAA&lang=zh_CN&pass_ticket=J1PFXucN0v4vmF19Pkngffyo4CvzTAkiJNdFJN9uQNIMVLMBFeSl6P8zbfwBJ4sO&wx_header=1 Accept-Encoding: gzip, deflate Accept-Language: zh-CN,en-US;q=0.8 Cookie: pgv_info=ssid=s2190841734; pgv_pvid=9172712625; rewardsn=a520f9c4f8c2c14d9ab0; wxtokenkey=902a8ac15e846d9a567021f9652cec8ddd60662aee0c86db3cb47e638f2a4bf5; wxuin=525477518; devicetype=android-24; version=26060030; lang=zh_CN; pass_ticket=J1PFXucN0v4vmF19Pkngffyo4CvzTAkiJNdFJN9uQNIMVLMBFeSl6P8zbfwBJ4sO; wap_sid2=CI7NyPoBElw4bFowVktRcDNGZFBkSzRxeDNRS1BZclFQYTZXa1hWNWg4THVFN21tVnVJQ1YtZjk5Qml2RjkxTThqcFZJZUFCenZ2cnpiUXhiN2dXcVI4X1pWYUJCS2tEQUFBfjCHspTSBTgNQAE= Q-UA2: QV=3&PL=ADR&PR=WX&PP=com.tencent.mm&PPVN=6.6.0&TBSVC=43602&CO=BK&COVC=043632&PB=GE&VE=GA&DE=PHONE&CHID=0&LCID=9422&MO= M1E &RL=1080*1920&OS=7.0&API=24 Q-GUID: 0fd685fa8c515a30dd9f7caf13b788cb Q-Auth: 31045b957cf33acf31e40be2f3e71c5217597676a9729f1b """ headers = utils.str_to_dict(headers) url = "https://mp.weixin.qq.com/mp/getappmsgext" r = requests.post(url, data=data, verify=False, params=url_params, headers=headers) result = r.json() if result.get("appmsgstat"): post['read_num'] = result.get("appmsgstat").get("read_num") post['like_num'] = result.get("appmsgstat").get("like_num") post['reward_num'] = result.get("reward_total_count") post['u_date'] = datetime.now() logger.info("「%s」read_num: %s like_num: %s reward_num: %s" % (post.title, post['read_num'], post['like_num'], post['reward_num'])) post.save() else: logger.warning(u"没有获取的真实数据,请检查请求参数是否正确,data=%s" % r.text)
def update_post(post): """ post 参数是从mongodb读取出来的一条数据 稍后就是对这个对象进行更新保存 :param post: :return: """ data_url = "https://mp.weixin.qq.com/mp/getappmsgext?" \ "f=json&uin=777&" \ "key=777&" \ "pass_ticket=mxvGMDk3GtQtB%25252Fz7%25252FamxY8wfvoTGUfjVEBxYjf4M2oAEyk15qbbpyu6tf%25252BxoQE91&" \ "wxtoken=777&" \ "devicetype=android-19&" \ "cientversion=26051732&" \ "appmsg_token=954_MOJLemLCd3Gz0Aal3PPp0Qmza2kN9ibwtA0IyosHLQlOgxiRlgtrWkHFwC4lgSeOA0FmtshLamC-6ysv&x5=1&" \ "f=json" #后续需要加载文章的url,刷新appmsg_token和pass_ticket # url转义处理 content_url = html.unescape(post.content_url) # 截取content_url的查询参数部分 content_url_params = urlsplit(content_url).query # 将参数转化为字典类型 content_url_params = utils.str_to_dict(content_url_params, "&", "=") body = 'r=0.9146884263687687&__biz=MzUzNTcwNDkxNA%3D%3D&appmsg_type=9&mid=2247484203&sn=a0dbce888297a156a4e9c0542094e286&idx=1&scene=38&title=%25E5%25A4%259A%25E5%25B0%2591%25E4%25BA%25BA%25E5%259B%25A0%25E4%25B8%25BA%25E7%2594%25B5%25E5%25BD%25B1%25E5%258E%25BB%25E4%25BA%2586%25E8%25A5%25BF%25E8%2597%258F&ct=1524824855&abtest_cookie=BAABAAoACwAMAA0ABwA8ix4Ad4seAPKMHgBkjR4Af40eACaOHgAxjh4AAAA%3D&devicetype=android-19&version=%2Fmmbizwap%2Fzh_CN%2Fhtmledition%2Fjs%2Fappmsg%2Findex3d6703.js&is_need_ticket=0&is_need_ad=1&comment_id=0&is_need_reward=0&both_ad=0&reward_uin_count=0&send_time=&msg_daily_idx=1&is_original=0&is_only_read=1&req_id=0100A6NrRHtU5GqTevtLXpPn&pass_ticket=mxvGMDk3GtQtB%25252Fz7%25252FamxY8wfvoTGUfjVEBxYjf4M2oAEyk15qbbpyu6tf%25252BxoQE91&is_temp_url=0&item_show_type=undefined' data = utils.str_to_dict(body, "&", "=") data.update(content_url_params) #将content_url中的参数更新到body参数中 # 通过Fiddler 获取 最新的值 headers = { 'Host': 'mp.weixin.qq.com', 'Accept-Encoding': 'br, gzip, deflate', 'Cookie': 'devicetype=iOS11.4.1; lang=en; pass_ticket=aGWzzp8+zyir2DKPLDnrceAi21LIqICuCOJi4d46Qnc3H4YWQtybMQQwha0k6Vv5; version=16070227; wap_sid2=CI/I96QLElxaSEFaaVcwTVc5N3I0d1Uwa2k1d19ibWlCX3pvV1pRWFVQa3I3WXl4SWxsbzJxeDBLTGR0VlRRS01sRkdjbklKUHh2VzRjemgwb3poallYdy1fU3pfY3dEQUFBfjDE5KjcBTgNQJVO; wxuin=3030246415; wxtokenkey=777; rewardsn=; pgv_pvid=7650693240', 'Connection': 'keep-alive', 'Accept': '*/*', 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15G77 MicroMessenger/6.7.2 NetType/WIFI Language/en', 'Referer https': '//mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5MjA5MTQyMA==&scene=126&devicetype=iOS11.4.1&version=16070227&lang=en&nettype=WIFI&a8scene=0&fontScale=94&pass_ticket=aGWzzp8%2Bzyir2DKPLDnrceAi21LIqICuCOJi4d46Qnc3H4YWQtybMQQwha0k6Vv5&wx_header=1', 'Accept-Language': 'en-us', 'X-Requested-With': 'XMLHttpRequest', } #headers = utils.headers_to_dict(headers) r = requests.post(data_url, data=data, verify=False, params=data_url_params, headers=headers) result = r.json() if result.get("appmsgstat"): post['read_num'] = result.get("appmsgstat").get("read_num") post['like_num'] = result.get("appmsgstat").get("like_num") post['reward_num'] = result.get( "reward_total_count") #只有文章有赞赏的时候才会有此字段 post['u_date'] = datetime.now() logger.info("「%s」read_num: %s like_num: %s reward_num: %s" % (post.title, post['read_num'], post['like_num'], post['reward_num'])) post.save() else: logger.warning(u"没有获取的真实数据,请检查请求参数是否正确,返回的数据为:data=%s" % r.text) exit()
def detail(self, article_url): # 文章链接 article_url = "http://mp.weixin.qq.com/s?__biz=MjM5MzgyODQxMQ==&mid=2650367644&idx=1&sn=9951edf4e9bfebcdaa7dd66a639befea&chksm=be9cddc889eb54de36a00865dcd15f9cf906d1868430dd62f1fe550b55e713b333344811e717&scene=27#wechat_redirect" header = """ Host: mp.weixin.qq.com Cookie: devicetype=iOS10.3.3; lang=zh_CN; pass_ticket=bh2aZVyo+yUXTHI+3G7VDrZTFJH7e41TRdHFcHjOjyqrCJe2rpXirBD4QSKU2maB; version=16060021; wap_sid2=CIDUopEDElxKMk5jQVZpM3lWd3RVQXVZYmV1QVA5Z3l3d3hWdFJGQzNhV3BuU2VxOEN2NEtyaVQ5QVlIc2FMVFZlV1VRRnJlZzZyTjlXNHU0MWFpcFpkUHI5R21GYWNEQUFBfjDYhMnRBTgMQJRO; wxuin=841525760; wxtokenkey=a401a4f12436d7404b2488792b57e05efdb4a67082b39db7405eaf1b43d8bd79; ua_id=seRYVLVNcjYoZPzpAAAAACA8ySAXhkrd89FL3uvLbt8=; _scan_has_moon=1; pt2gguin=o0253421576; ptcz=3d9558280f480d9453cc13b78b32059793c778a1e8aa723ce7b2f5e9744f606b; pgv_pvid=7330882815; pgv_pvi=8857346048; sd_cookie_crttime=1510571099034; sd_userid=34241510571099034; pvid=6161617834; RK=7JMfU7Y+Gq X-WECHAT-KEY: 63f29c76b0873f93d1e09f3041a4fee49f792cb4ccca7588574e6f054f357a611cacf9e5787641eae77bc1fd78d80f5c713d53a79e144091a768ec05fcf75e3ba88681ccaa05fdba037fc9b60cc2f86a X-WECHAT-UIN: ODQxNTI1NzYw Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Mobile/14G60 MicroMessenger/6.6.0 NetType/WIFI Language/zh_CN Accept-Language: zh-cn Accept-Encoding: gzip, deflate Connection: keep-alive """ # print(article_url) # response = requests.get(article_url, headers=utils.str_to_dict(header)) # print(response.text) # appmsg_token, = utils.extract_text(response.text, r"appmsg_token.*?\"(\S+)\"") # print(appmsg_token) # response.text data_url = "https://mp.weixin.qq.com/mp/getappmsgext" # appmsg_token = "935_NOFCGGSMbypif53YGURwVY2zSD6xcJ6N_0kYBbA3uOc7G6f172hKMrEYEuF2aoAgCZfZqy2vfiqpaOKH" data_param = { 'comment_id': '4200886237', 'is_need_reward': '1', 'reward_uin_count': '27', 'msg_daily_idx': '1', 'is_original': '0', 'uin': '777', 'key': '777', 'pass_ticket': 'bh2aZVyo%25252ByUXTHI%25252B3G7VDrZTFJH7e41TRdHFcHjOjyqrCJe2rpXirBD4QSKU2maB', 'wxtoken': '1082715157', 'appmsg_token': '935_ale1QIchYa5yhL23YfFID3P9orUrZgludl8x0DM-Kzji1H3GmguOr5xpUVCpsKh1G5EmVd2msa4m2dRq', } article_param = utils.str_to_dict(urlsplit(article_url).query, "&", "=") data_param.update(article_param) body = "is_only_read=1&req_id=1412MureIWcNGlE3ILXVOGp2&" \ "pass_ticket={pass_ticket}".format(pass_ticket=data_param.get("pass_ticket")) headers = """ Host: mp.weixin.qq.com Accept: */* X-Requested-With: XMLHttpRequest Accept-Language: zh-cn Accept-Encoding: gzip, deflate Content-Type: application/x-www-form-urlencoded; charset=UTF-8 Origin: https://mp.weixin.qq.com User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Mobile/14G60 MicroMessenger/6.6.0 NetType/WIFI Language/zh_CN Connection: keep-alive Referer: https://mp.weixin.qq.com/s?__biz=MjM5MzgyODQxMQ==&mid=2650367634&idx=1&sn=ec23c954f7adad842d706c2ec687a35e&chksm=be9cddc689eb54d07cd27f260f8ce7bf48d5702dc59a7f9b32725a7fd02d8b4a34f58382e4f5&scene=27&ascene=7&devicetype=iOS10.3.3&version=16060021&nettype=WIFI&abtest_cookie=AwABAAoADAANAAoAJIgeAEyIHgBiiB4A2ogeAPyIHgAOiR4Ab4keAPCJHgD4iR4AB4oeAAAA&lang=zh_CN&fontScale=100&pass_ticket=bh2aZVyo%2ByUXTHI%2B3G7VDrZTFJH7e41TRdHFcHjOjyqrCJe2rpXirBD4QSKU2maB&wx_header=1 Content-Length: 149 Cookie: devicetype=iOS10.3.3; lang=zh_CN; pass_ticket=bh2aZVyo+yUXTHI+3G7VDrZTFJH7e41TRdHFcHjOjyqrCJe2rpXirBD4QSKU2maB; version=16060021; wap_sid2=CIDUopEDElxKMk5jQVZpM3lWd3RVQXVZYmV1QVA3TUdNVktTLVVrQTV4Q3dRWkI4Vkh5aXJKbDBsZlM1ZEFlWS1IWnpqWl8wQzhoeUtZR0xRQXh3ZHB1d3IwRDVMS2NEQUFBfjDihMnRBTgNQAE=; wxtokenkey=a733e2774b3089c814379cad23098030c1568e435e09146d5595c8a044f1770b; wxuin=841525760; ua_id=seRYVLVNcjYoZPzpAAAAACA8ySAXhkrd89FL3uvLbt8=; _scan_has_moon=1; pt2gguin=o0253421576; ptcz=3d9558280f480d9453cc13b78b32059793c778a1e8aa723ce7b2f5e9744f606b; pgv_pvid=7330882815; pgv_pvi=8857346048; sd_cookie_crttime=1510571099034; sd_userid=34241510571099034; pvid=6161617834; RK=7JMfU7Y+Gq """ headers = utils.str_to_dict(headers) response = requests.post(data_url, headers=headers, data=body, params=data_param, verify=False) result = response.json() print(result)