def upload_image(self, image_name, image_data): resp = self.session.get('https://mp.dayu.com/dashboard/index').text w_re = re.compile(r'"wmid":"(.*?)",', re.S) wmid = w_re.findall(resp)[0] if not wmid: raise CustomException('检查账号密码') # sign s_re = re.compile(r'"nsImageUploadSign":"(.*?)",', re.S) sign = s_re.findall(resp)[0] if not sign: self.logger.error("Failed to get sign") raise CustomException('检查账号密码') us_re = re.compile(r'"weMediaName":"(.*?)",', re.S) user_name = us_re.findall(resp)[0] wmname = parse.urlencode({'wmname': user_name}) url = 'https://ns.dayu.com/article/imageUpload?appid=website&fromMaterial=0&wmid={0}&{1}&sign={2}'.format( wmid, wmname, sign ) res = self.session.post(url, files={ 'upfile': (image_name, image_data, 'image/jpeg', { 'id': 'WU_FILE_0', 'name': image_name, 'type': 'image/jpeg', "lastModifiedDate": 'Tue Jun 12 2018 19:22:30 GMT+0800 (中国标准时间)', 'size': ''})} ).json() if res['code'] == 0: return res['data']['imgInfo']['url'] else: raise CustomException('上传图片失败, 检查账号密码')
def login(user, pswd, **kw): SoHu.logger.info("") session = requests.session() try: millis = int(round(time.time() * 1000)) hl = hashlib.md5() hl.update(pswd.encode("utf-8")) pswd = hl.hexdigest() url = "https://passport.sohu.com/user/tologin" session.get(url) baseurl = "https://passport.sohu.com/apiv2/login" def get_cpptcha(user): co_url = "https://passport.sohu.com/apiv2/picture_captcha?userid=" + user response = session.get(co_url, verify=False) with open('captcah1.jpeg', 'wb') as f: f.write(response.content) get_cpptcha(user) cid, result = verify_captcha('captcah1.jpeg', 1005) if result == "看不清": get_cpptcha(user) cid, result = verify_captcha('captcah1.jpeg', 1005) data = { "domain": "sohu.com", "callback": "passport200029113275484305756_cb" + str(millis), "appid": "9998", "userid": user, "password": pswd, "persistentcookie": "0", "captcha": result } if os.path.exists("captcah1.jpeg"): os.remove("captcah1.jpeg") resp = session.post( baseurl, data=data, verify=False, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"}) pat = re.compile(r'"msg":"(.*?)"') msg = pat.findall(resp.text) if not msg[0]: raise CustomException('检查账号密码') session.get("https://passport.sohu.com/mypassport/index", verify=False) cookies = session.cookies.get_dict() cookies["date"] = str(int(time.time())) if 'lastdomain' in cookies: times = str(int(time.time() * 1000)) res = session.get( "https://mp.sohu.com/mp-accounts/accounts/list?_=%s" % times, verify=False ).json() nick_name = res['list'][0]['accountslist'][0]['nickName'] return cookies, nick_name except Exception as e: raise CustomException(e) finally: session.close()
def upload_image(self, image_name, image_data): self.logger.info("") try: resp = self.session.post( 'https://mp.toutiao.com/tools/upload_picture/?type=ueditor&pgc_watermark=1&action=uploadimage&encode=utf-8', files={'upfile': ( image_name, image_data, 'image/jpeg', {'type': 'image/jpeg'})} ).json() if resp['state'] == 'SUCCESS': return resp['url'] else: raise CustomException('上传图片失败') except: raise CustomException('上传图片失败,检查账号或密码')
def query_article_data(self, title): """获取单篇文章阅读""" resp = self.session.get( 'http://mp.sina.com.cn/aj/article/list?shownew=1&showtype=all&list=1&page=1&pagesize=10&_=1541991946453&callback=Zepto1541991931895', verify=False) resp.encoding = 'utf8' json_re = re.compile(r'Zepto.*?\((.*?)\)', re.S) try: article = json.loads(json_re.findall(resp.text)[0])['data'] except Exception as e: logging.error(e) raise CustomException("cookies is fail ") for art in article: if title != art['articleTitle']: continue else: data = dict(publish_time=art['articlePubulisTime'], read_num=art['articleReadCount'], recomment_num=art['articleReadCount'], comment_num=art['articleCommentCount'], share_num=-1, like_num=-1, follow_num=-1, collect_num=-1) return data return ''
def read_count( self, stime=datetime.datetime.now() - datetime.timedelta(days=int(7)), etime=datetime.datetime.now() - datetime.timedelta(days=int(1))): self.logger.info("") res = self.session.get( "http://fhh.ifeng.com/hapi/account/query").json() self.logger.info(res) user_name = res['data']['weMediaName'] res = self.session.get( "http://fhh.ifeng.com/api/statistics/findstatlist", params="stime=%s&etime=%s&type=article&channel=0" % (stime.strftime("%Y-%m-%d"), etime.strftime("%Y-%m-%d"))).json() if not res['success']: raise CustomException('获取阅读数失败') read_list = [] for data in res['data']['rows']: readcount = { "day_time": data["date"], "user_name": user_name, "recomment_num": data["ev"], "read_num": data["pv"], "comment_num": data["commentNum"], "share_num": data["shareNum"], "collect_num": data["collectNum"] } # logging.error(data) read_list.append(readcount) return read_list
def read_count(self, sd=datetime.datetime.now() - datetime.timedelta(days=7), ed=datetime.datetime.now() - datetime.timedelta(days=1)): self.logger.info("") def_ret = [] response = self.session.get('https://mp.yidianzixun.com/#/Home') response.encoding = 'utf8' name_re = re.compile(r'"media_name":"(.*?)"', re.S) user_name = name_re.findall(response.text) if not user_name: raise CustomException('重新登陆') user_name = user_name[0] resp = self.session.get( "http://mp.yidianzixun.com/api/source-data", params="date=%s&retdays=%s&_rk=dfb3e71523797641095" % (sd.strftime("%Y-%m-%d"), (ed - sd).days)).json() datas = resp.get('result', {}) or {} read_list = [] for dt, data in datas.items(): data = dict( user_name=user_name, day_time=dt, recomment_num=data['viewDoc'], # 推荐 read_num=data['clickDoc'], # 阅读 share_num=data['shareDoc'], # 分享 collect_num=data['likeDoc']) # 收藏 # logging.error(data) read_list.append(data) return read_list
def read_count(self, start_date=datetime.datetime.now() - datetime.timedelta(days=int(7)), end_date=datetime.datetime.now() - datetime.timedelta(days=int(1))): self.logger.info("") times = str(int(time.time() * 1000)) res = self.session.get( "https://mp.sohu.com/mp-accounts/accounts/list?_=%s" % times, verify=False, timeout=30 ).json() user_name = res['list'][0]['accountslist'][0]['nickName'] accountId = res['list'][0]['accountslist'][0]['id'] read_url = "https://mp.sohu.com/v3/users/stat/overall?type=1&startDate={}&endDate={}&accountId={}&_={}"\ .format(start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d"), accountId, times) resp = self.session.get(read_url, timeout=30, verify=False) self.logger.info(resp.text) resp = json.loads(resp.text) if resp['status'] != 'ok': raise CustomException("账号异常") read_list = [] for data in resp['result']['list']: readcount = { "day_time": data["date"], "recomment_num": data["diff"], "user_name": user_name, "read_num": data["diff"]} read_list.append(readcount) return read_list
def submit_article(self, da_id, utoken): resp = self.session.post( 'https://mp.dayu.com/dashboard/submit-article', data={ "dataList[0][_id]": da_id, "dataList[0][isDraft]": "1", "dataList[0][reproduce]": "", "curDaySubmit": "false", "utoken": utoken, }, headers={ "Host": "mp.dayu.com", "Connection": "keep-alive", "Accept": "application/json, text/javascript, */*; q=0.01", "utoken": utoken, "Origin": "https://mp.dayu.com", "X-Requested-With": "XMLHttpRequest", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36", "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", "Accept-Language": "zh-CN,zh;q=0.9", } ).json() try: if resp['data']['_id']: return True except: raise CustomException('提交失败,检查文章内容') else: self.logger.error(resp) return False
def read_count(self, start_day=datetime.datetime.now() - datetime.timedelta(days=int(7)), end_day=datetime.datetime.now() - datetime.timedelta(days=int(1))): self.logger.info("account: %s" % self.account.account) res = self.session.get( "https://baijiahao.baidu.com/builder/app/appinfo").json() if res['errno'] != 0: raise CustomException(res['errmsg']) username = res['data']['user']['name'] res = self.session.post( "https://baijiahao.baidu.com/builder/author/statistic/appStatistic", data={ "type": "news", "is_yesterday": "false", "start_day": start_day.strftime("%Y%m%d"), "end_day": end_day.strftime("%Y%m%d"), "stat": "0"}).json() read_list = [] for data in res['data']['list']: if int(data['recommend_count']) == -1: continue readcount = { "day_time": time.strftime('%Y-%m-%d', time.strptime(data['event_day'], "%Y%m%d")), "user_name": username, "like_num": data.get("likes_count", 0), "recomment_num": data.get("recommend_count", 0), "read_num": data.get("view_count", 0), "comment_num": data.get("comment_count", 0), "share_num": data.get("share_count", 0), "collect_num": data.get("collect_count", 0) } read_list.append(readcount) # logging.info(readcount) return read_list
def rep_video(self, content, video): v_re = re.compile(r'src="(.*?)"') urls = v_re.findall(video) try: for url in urls: resp = self.session.post( 'https://card.weibo.com/article/v3/aj/editor/plugins/video', data=dict(url=url)).json() if resp['code'] == 100000: v_id = resp['data']['id'] card = '<card data-card-id="{}" data-card-type="video"></card>'.format( v_id) content = content.replace(video, card) else: raise CustomException('正文中:%s' % resp['msg']) return content except: raise CustomException('重新登录,cookie 失效')
def upload_image(self, image_name, image_data): resp = self.session.post( 'https://baijiahao.baidu.com/builderinner/api/content/file/upload', files={ 'media': (image_name, image_data, 'image/jpeg', { 'is_waterlog': '1', 'save_material': '1', 'type': 'image', "no_compress": '0', 'app_id': '1594805746441778', })} ).json() if resp['error_code'] != 0: raise CustomException('上传失败') return resp['ret']['no_waterlog_bos_url']
def upload_image(self, image_name, image_data): self.logger.info('') files = { 'file': (image_name, image_data, 'image/jpeg') } img_rep = self.session.post( 'http://mp.sogou.com/api/articles/temp-image', files=files ) if 'url' in img_rep.text: resp_json = img_rep.json() img_url = resp_json.get('url') return img_url else: raise CustomException('上传图片失败,请检查账号密码')
def read_count( self, startday=datetime.datetime.now() - datetime.timedelta(days=7), endday=datetime.datetime.now() - datetime.timedelta(days=1)): self.logger.info("") def_ret = [] resp = self.session.get( "http://mp.sina.com.cn/", headers={ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36" }, verify=False).text pat = re.compile('<span class="user_box_a_s">(.*?)</span>', re.S) res = pat.findall(resp) if not res: raise CustomException('统计失败') user_name = res[0].strip() res = self.session.get( "http://mp.sina.com.cn/aj/statistic/general", params="date_start=%s&date_end=%s" % (startday.strftime("%Y-%m-%d"), endday.strftime("%Y-%m-%d")), verify=False) res = json.loads(res.text[1:-1]) if res['status'] != 1: raise CustomException('统计失败,请检查账号密码') read_list = [] for count in res['data']['time']: count_data = dict(user_name=user_name, day_time=count["date"], recomment_num=count["info"]["exposure"], read_num=count["info"]["read"], comment_num=count["info"]["comment"], collect_num=count["info"]["collect"]) read_list.append(count_data) return read_list
def login(user, pswd, **kw): # 普通登录 SouGou.logger.info(user) session = requests.session() try: login_url = 'http://mp.sogou.com/api/login' data = { 'email': user, 'pwd': pswd } login_rep = session.post(login_url, headers=SouGou.headers, data=data) rep_dict = json.loads(login_rep.text) if not rep_dict.get('nextUrl'): raise CustomException('检查账号密码') cookies = session.cookies.get_dict() name_rep = session.get( "http://mp.sogou.com/api/if-login?status=81", headers=SouGou.headers).json() user_name = name_rep.get('name') SouGou.logger.info('login success') return cookies, user_name except Exception as e: SouGou.logger.error(e) raise CustomException(e) finally: session.close()
def parse_code(session, fp, user): session.headers.update({ 'Referer': 'https://sso.toutiao.com/', 'Origin': 'https://sso.toutiao.com' }) url = 'https://verify.snssdk.com/get' params = { "external": "", "fp": fp, "aid": "24", "lang": "zh", "app_name": "sso", "iid": "0", "vc": "1.0", "did": "0", "ch": "pc_slide", "os": "2", "challenge_code": "1105", } loop = 3 while loop: resp = session.get(url, params=params) TouTiao.logger.error(resp.text) resp_json = resp.json() if resp_json['ret'] == 200: ids = resp_json['data']['id'] big_img_url = resp_json['data']['question']['url1'] s_img_url = resp_json['data']['question']['url2'] tip_y = resp_json['data']['question']['tip_y'] mode = resp_json['data']['mode'] resp1 = session.get(big_img_url) resp2 = session.get(s_img_url) target = f'{user}bg.jpg' template = f'{user}gap.png' with open(target, 'wb') as f: f.write(resp1.content) with open(template, 'wb') as f: f.write(resp2.content) com_x = TouTiao.match(target, template) reply = TouTiao.sliding_track(com_x, tip_y) if os.path.exists(target): os.remove(target) if os.path.exists(template): os.remove(template) return reply, ids, mode else: loop -= 1 raise CustomException('获取验证码图片失败')
def upload_image(self, image_name, image_data): self.logger.info(image_name) files = { 'file': (image_name, image_data, 'image/jpeg') } resp = self.session.post( 'https://mp.sohu.com/commons/upload/file', files=files, verify=False ) if resp.status_code == 200: resp_json = resp.json() img_url = resp_json.get('url') if not img_url: raise CustomException("%s" % resp_json['msg']) url = 'https:' + img_url return url
def upload_image(self, image_name, image_data): url = 'http://mp.163.com/api/v2/upload/nos/water' m = MultipartEncoder( fields={ 'uploadtype': 'cms', 'from': 'neteasecode_mp', 'bucketName': 'dingyue', 'watername': '0', 'field': (image_name, image_data, 'image/jpeg') }) self.session.headers.update({'Content-Type': m.content_type}) resp = self.session.post(url, data=m) resp_json = resp.json() if resp_json.get('code') == 1: return resp_json.get('data').get('ourl') else: raise CustomException('上传图片失败')
def login(user, pswd, **kw): proxies = Proxy.proxy() print(proxies) session = requests.session() try: resp = session.post( 'https://api.open.uc.cn/cas/custom/login/commit?custom_login_type=common', data={ "client_id": "328", "redirect_uri": "https://mp.dayu.com/login/callback", "display": "mobile", "change_uid": "1", "transmission_mode": "pm", "": "登录", "login_name_new": "", "has_captcha": "false", "sig": "", "session_id": "", "token": "", "key": "", "nc_appkey": "CF_APP_uc_login_h5", "origin": "*", "login_name": user, "password": pswd, "captcha_val": "", "captcha_id": ""}, headers={ "user-agent": "Mozilla/5.0 (iPhone 84; CPU iPhone OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.0 MQQBrowser/7.8.0 Mobile/14G60 Safari/8536.25 MttCustomUA/2 QBWebViewType/1 WKType/1"}, proxies=proxies, verify=False, timeout=5 ).json() if resp['status'] != 20000: DaYu.logger.error("Faield to login dayu") raise CustomException('登陆失败') # 请求js获取登陆页面 session.headers.update( {"Connection": "close"}) res = session.get( 'https://mp.dayu.com/login-url?redirect_url=%2Fmobile%2Fdashboard%2Findex%3Fuc_biz_str%3DS%3Acustom%257CC%3Aweb_default%257CK%3Atrue&st={}&display=mobile&_=0.8983112526997874 '.format( resp['data'], headers={ 'Referer': 'https://mp.dayu.com/mobile/index?uc_biz_str=S:custom%7CC:web_default%7CK:true', "user-agent": "Mozilla/5.0 (iPhone 84; CPU iPhone OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.0 MQQBrowser/7.8.0 Mobile/14G60 Safari/8536.25 MttCustomUA/2 QBWebViewType/1 WKType/1" }, proxies=proxies, verify=False) ).json() logging.info(res) home_url = res['data']['redirectUrl'] session.get(home_url) cookies = session.cookies.get_dict() home = session.get('https://mp.dayu.com/dashboard/index').text us_re = re.compile(r'"weMediaName":"(.*?)",', re.S) user_name = us_re.findall(home) return cookies, user_name[0] finally: session.close()
def fetch_code(user, pswd, loop=3): session = requests.session() cookies = TouTiao.init_cookie() fp = cookies.get('s_v_web_id', '') if not fp: TouTiao.logger.error('获取初始化cookies失败') return False session.cookies = requests.cookies.cookiejar_from_dict(cookies) session.headers.update({ "User-Agent": Base.UA }) resp = TouTiao.get_mobile_code(session, user, fp) # init click fetch toutiao_code if resp.json()['error_code'] == 1105: while loop: TouTiao.logger.error(f'break_code:{loop}次') code_resp = TouTiao.break_code(session, fp, user) # 获取验证吗 if code_resp['ret'] == 200: resp = TouTiao.get_mobile_code(session, user, fp) resp_json = resp.json() TouTiao.logger.error(resp_json) if resp_json['error_code'] == 0: TouTiao.logger.info(f'{user}获取mobile——code成功') rs = RedisApi() cookies = session.cookies.get_dict() rs.set_value(user, json.dumps(cookies), ex=300) return True elif resp_json['error_code'] == 1206: # 验证太频繁 TouTiao.logger.error(resp_json['description']) raise CustomException(resp_json['description']) else: time.sleep(1) TouTiao.logger.error(f'重试:{loop}') TouTiao.logger.error('刷新滑块') loop -= 1 else: time.sleep(1) TouTiao.logger.error(f'滑动失败,重试{loop}') loop -= 1 return False else: rs = RedisApi() cookies = session.cookies.get_dict() rs.set_value(user, json.dumps(cookies), ex=300) return True
def upload_image(self, image_name, image_data): self.logger.info("") resp = self.session.post( 'http://mp.sina.com.cn/aj/upload?type=article&json=1', files={ 'img': (image_name, image_data, 'image/jpeg', { 'id': 'WU_FILE_0', 'name': image_name, 'type': 'image/jpeg', "lastModifiedDate": 'Tue Jun 12 2018 19:22:30 GMT+0800 (中国标准时间)', 'size': '', }) }, verify=False).json() if resp['errno'] == 1: return resp['data'] raise CustomException('上传图片失败,请检查账号密码')
def query_article_data(self, title): """获取单篇文章阅读""" url4 = 'http://mp.sogou.com/api/1/articles?status=1' rep4 = self.session.get(url4, headers=self.headers).json() if rep4.get('list'): for content in rep4['list']: if title == content['title']: data = dict( read_num=content['readingNum'], recomment_num=content['recommendedNum'], comment_num=content['commentsNum'], share_num=content['forwardingNum'], collect_num=content['collectionNum'], publish_time=self.time_stamp(content['updatedAt']), like_num=-1, follow_num=-1 ) return data raise CustomException('Cookies过期')
def fetch_article_status(self, title): url = '' re_a = re.compile('<[a-zA-Z\/!].*?>', re.S) resp = self.session.get( 'https://baijiahao.baidu.com/builder/article/lists?type=&collection=&pageSize=10¤tPage=1&search=' ).json() if resp['errno'] != 0: raise CustomException('账号异常') articles = resp['data']['list'] res = [2, '没查询到该文章', url] for art in articles: if title != art['title']: continue if art['status'] == 'rejected': url = art['url'] res = 5, re_a.sub('', art['audit_msg']).replace(' ', ''), url elif art['status'] == 'publish': url = art['url'] res = 4, '', url return res
def query_article_data(self, title): """获取单篇文章阅读""" resp = self.session.get( 'http://fhh.ifeng.com/api/article/list?isOriginal=1&operationStatus=4&pageSize=30&pageNumber=1&_=1541678014839' ).json() if resp['success']: for art in resp['data']['rows']: if title != art['title']: continue else: data = dict(read_num=art['readNum'], recomment_num=art['recommendNum'], comment_num=art['commentNum'], share_num=art['shareNum'], collect_num=art['collectNum'], publish_time=art['updateTime'], like_num=-1, follow_num=-1) return data raise CustomException('失败')
def break_code(ss, session, post_data): flag = 5 while flag: time.sleep(1) BaiJia.logger.info('重试{}次'.format(flag)) callback = re.findall(r'callback=(.*?)&codeString', ss)[0] get_code = re.findall(r'codeString=(.*?)&userName', ss)[0] if not get_code: result = '' else: code = session.get("https://passport.baidu.com/cgi-bin/genimage", params=get_code, stream=True) if code.status_code == 200: with open("code.png", 'wb') as f: f.write(code.content) cid, result = verify_captcha('code.png', 5000) if os.path.exists('code.png'): os.remove('code.png') post_data['verifycode'] = result post_data['codestring'] = get_code post_data['callback'] = callback time.sleep(0.5) ss = session.post(url='https://passport.baidu.com/v2/api/?login', data=post_data) ss = ss.text BaiJia.logger.info(flag) BaiJia.logger.info(ss) if 'err_no=0' in ss or 'err_no=23' in ss or 'err_no=400037' in ss or 'err_no=5' in ss: cookies = session.cookies.get_dict() return cookies else: flag -= 1 err = re.compile(r'"err_no=(.*?)&').findall(ss)[0] BaiJia.logger.info(err) if err == "120021": msg = '请用手机短信登录' elif err == "257": msg = '请输入验证码' else: msg = '账号密码不对' raise CustomException(msg)
def fetch_article_status(self, title): fetch_url = 'http://mp.sogou.com/api/1/articles?status=' rep = self.session.get(fetch_url, headers=self.headers).json() if rep.get('list'): url = '' status = 2 cause = '没查询到该文章' for content in rep['list']: if title == content['title']: if content['statusLabel'] == '已发布': status = 4 cause = '' elif content['statusLabel'] == '审核中': cause = '审核中' elif content['statusLabel'] == '未通过': status = 5 cause = content['remark'] url = content['url'] return status, cause, url return status, cause, url raise CustomException('Cookies过期')
def publish(self, title, content, category, flag=1): """ :param title: :param content: :param category: :return: status, article_id, cause """ self.logger.info('') result = re.compile(r'<img src="(.*?)".*?>', re.S).findall(content) cover = '' if result: cover = result[0] url = "https://mp.sohu.com/v3/news/publish" data = { "title": title, "brief": "", "content": content, "channelId": self.channelList[category][0], "categoryId": self.channelList[category][1], "id": 0, "userColumnId": 0, "isOriginal": 0, "cover": cover } fw_response = self.session.post( url=url, verify=False, data=data, headers=self.headers).text self.logger.info(fw_response) try: int(fw_response) status = 2 cause = '' return status, cause except Exception as e: cause = e if '{"msg":"'in fw_response: ss = json.loads(fw_response) cause = ss['msg'] raise CustomException(cause)
def read_count( self, start=datetime.datetime.now() - datetime.timedelta(days=int(7)), end=datetime.datetime.now() - datetime.timedelta(days=int(1))): self.logger.info("") pv_resp = self.session.get( 'http://mp.163.com/wemedia/data/content/article/pv/list.do?', params='_=1524046010181&start=%s&end=%s' % (start.strftime('%Y-%m-%d'), end.strftime('%Y-%m-%d'))).json() if pv_resp['code'] != 1: raise CustomException('账号或密码不正确') read_list = [] for read in pv_resp['data']['list']: data = dict( day_time=read['statDate'], # 时间 user_name=read['tname'], # 用户名字 read_num=read['readCount'], # 阅读 share_num=read['shareCount'], # 分享 follow_num=read['commentCount'], # 跟帖 recomment_num=read['recommendCount'], # 推荐 ) read_list.append(data) return read_list
def read_count(self, start_day=datetime.datetime.now() - datetime.timedelta(days=7), end_day=datetime.datetime.now() - datetime.timedelta(days=1)): home = self.session.get('https://mp.dayu.com/dashboard/index').text us_re = re.compile(r'"weMediaName":"(.*?)",', re.S) user_name = us_re.findall(home) if not user_name: self.logger.error('获取用户名失败,检查cookie') raise CustomException('检查账号密码') user_name = user_name[0] read_resp = self.session.get( 'https://mp.dayu.com/api/stat/article/all/daylist', params='beginDate=%s&endDate=%s&origin=manual&_=1527488958270' % ( start_day.strftime('%Y-%m-%d'), end_day.strftime('%Y-%m-%d')) ).json() log_resp = self.session.get( 'https://mp.dayu.com/dashboard/feature/star/starinfo', params='beginDate={}&endDate={}'.format( start_day.strftime('%Y%m%d'), end_day.strftime('%Y%m%d')), ).json() read_list = [] read_obj_list = read_resp['data']['list'] log_obj_list = log_resp['data'] for i, read in enumerate(read_obj_list): log = log_obj_list[i] read_list.append(dict( user_name=user_name, day_time=read['date'], recomment_num=read['show_pv'], read_num=read['click_pv'], comment_num=read['cmt_pv'], share_num=read['share_pv'], collect_num=read['fav_pv'], like_num=read['like_pv'], log=log['total_score'] )) return read_list
def read_count(self, starttime=datetime.datetime.now() - datetime.timedelta(days=7), endtime=datetime.datetime.now() - datetime.timedelta(days=1)): self.logger.info("") self.session.headers = { "Accept-Language": "zh-CN,zh;q=0.9", "Host": "mp.toutiao.com", "Connection": "keep-alive", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36", } home = self.session.get( 'https://mp.toutiao.com/get_media_info/', headers=self.headers, ).json() self.logger.info(home) user_name = home['data']['user']['screen_name'] # logging.error(user_name) if not user_name: raise CustomException('账号异常 %s' % self.account.account) self.session.headers['Referer'] = 'https://mp.toutiao.com/profile_v3/index/content-analysis/overview' res = self.session.get( 'https://mp.toutiao.com/statistic/content_daily_stat/?start_date={}&end_date={}&pagenum=1'.format( starttime.strftime("%Y-%m-%d"), endtime.strftime("%Y-%m-%d")), headers=self.headers, ).json() self.logger.info(res) resp = res['data']['data_list'] read_list = [] for i in range(len(resp) - 1, -1, -1): if int(resp[i]['impression_count']) == 0 and int(resp[i]['go_detail_count']) == 0: # 表示没更新 continue read_list.append(dict(day_time=resp[i]['date'], user_name=user_name, recomment_num=resp[i]['impression_count'], read_num=resp[i]['go_detail_count'], collect_num=resp[i]['share_count'], follow_num=resp[i]['repin_count'])) return read_list
def query_article_data(self, title): """获取单篇文章阅读""" resp = self.session.get( 'https://baijiahao.baidu.com/builder/article/lists?type=&collection=&pageSize=10¤tPage=1&search=' ).json() if resp['errno'] != 0: raise CustomException(resp['errmsg']) arts = resp['data']['list'] for art in arts: if title != art['title']: continue else: data = dict( read_num=art['read_amount'], recomment_num=art['rec_amount'], comment_num=int(art['comment_amount']), share_num=art['share_amount'], like_num=int(art['like_amount']), collect_num=art['collection_amount'], publish_time=art['updated_at'], follow_num=-1 ) return data return ''