def is_login(self, cookie): param = int(time.time() * 1000) url = 'http://baitong.baidu.com/request.ajax?path=appads/GET/basicinfo&reqid=%s_0' % param cookie = '; '.join( ['%s=%s' % (e.get('name'), e.get('value')) for e in cookie]) headers = { 'Accept': "*/*", 'Content-Type': "application/x-www-form-urlencoded", 'Cookie': cookie, 'Host': "baitong.baidu.com", 'Origin': "http://baitong.baidu.com", 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36", } data = { 'path': 'appads/GET/basicinfo', 'params': {}, 'eventId': '%s_0' % param } res = post(url, data=json.dumps(data), headers=headers) if not res.get('is_success'): # 网络异常 return {'succ': False, 'msg': res.get('msg')} data = json.loads(res.get('msg').content) # logger.info(data) if data.get('status') != 200: # 登陆失败,两个key: retdesc, retcode return {'succ': False, 'msg': 'login failed'} else: return {'succ': True, 'msg': 'login success'}
def is_login(self, cookie): url = 'https://developer.huawei.com/consumer/cn/service/apcs/app/gwService' cookie = '; '.join( ['%s=%s' % (e.get('name'), e.get('value')) for e in cookie]) headers = { 'accept': "application/json, text/javascript, */*; q=0.01", 'Content-Type': "application/json", 'cookie': cookie, 'origin': "https://developer.huawei.com", 'referer': "https://developer.huawei.com/consumer/cn/service/apcs/app/home.html", 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36", } data = { 'apiName': 'OpenUP.Developer.getInfo', 'params': '{"queryRangeFlag":"000110"}' } res = post(url, data=json.dumps(data), headers=headers) if not res.get('is_success'): # 网络异常 return {'succ': False, 'msg': res.get('msg')} data = json.loads(res.get('msg').content) # logger.info(data) if data.get('retCode') == 0: return {'succ': True, 'msg': 'login success'} else: # 登陆失败 return {'succ': False, 'msg': 'login failed'}
def get_data(self, sd, ed, data, tid, tname): logger.info('get into (self.get_data_common_version)function') url = "https://e.qq.com/ec/api.php" fname = '%(productType)s_%(productId)s_%(productName)s_%(sd)s_%(ed)s.json' % { 'productType': tname, 'productId': data.get('pid'), 'productName': data.get('pname'), 'sd': sd, 'ed': ed } # make params pdata = { "sdate": sd, "edate": ed, "product_type": tid, 'g_tk': self.g_tk, "product_id": data.get("pid"), "owner": self.uid } headers = { "cookie": self.cookie_str, "referer": "http://e.qq.com/atlas/%(uid)s/report/order_old?pid=%(pid)s&ptype=%(ptype)s" % { 'uid': self.uid, 'pid': data.get('pid'), 'ptype': tid } } self.params['gtk'] = self.g_tk self.pdata.update(pdata) self._headers.update(headers) data = post(url, data=self.pdata, params=self.params, headers=self._headers) if not data['is_success']: return {'succ': False, 'msg': data.get('msg')} file_name = join(self.dir_path, fname) data = data['msg'].json() cost = data.get('data').get('total').get('cost').replace(',', '') if float(cost) == 0: return {'succ': False, 'msg': 'no data'} with open(file_name, 'w', encoding='utf-8') as f: json.dump(data, f) logger.info('crawled data: %s' % data) return {'succ': True}
def get_task(): global task_type # 获取任务 data = {'platformType': task_type, 'flag': pc_name} ret = post(get_task_url, data=json.dumps(data), headers={'Content-Type': 'application/json'}, timeout=60) if not ret['is_success']: # 请求失败 logger.error('get task failed') sleep(60) return False else: # 请求成功 try: um = ret['msg'].json() logger.debug('from task_api:\n%s' % um) except: sleep(30) # 服务器error logger.error('server error') return False if um['errorCode'] == 8: # 没有任务 print('none task') task_type = 'MSG' if task_type in 'CPA' else 'CPA' sleep(60) return False elif um['errorCode'] == 0: # 得到任务 data = um.get('body') return data else: logger.error('task server error') sleep(5) return False
def send_params_to_parse(platform, account): data = {"platform": platform, 'account': account} test_url = 'http://erp.btomorrow.cn/adminjson/ERP_AnalysisCPA' ret = post(test_url, data=json.dumps(data), headers={'Content-Type': 'application/json'}, timeout=60) if not ret.get('is_success'): print(ret.get('msg').text) print(f'parser | {ret.get("msg").text}')
def post_res(self, data): data = json.dumps(data) res = post(post_res_url, data=data, headers={'Content-Type': 'application/json'}) if not res['is_success']: # 上报失败 return False else: # 上报成功 logger.info('Post success! ret_msg: ' + res['msg'].content.decode('utf-8')) return True
def get_task(): global icbc_last_time while True: # 获取任务 data = {'platformType': 'python'} ret = post(get_task_url, data=json.dumps(data), headers={'Content-Type': 'application/json'}, timeout=60) if not ret['is_success']: # 请求失败 logger.warning('get task failed') sleep(60) continue else: # 请求成功 logger.debug('from task_api:\n' + ret['msg'].content.decode()) try: um = json.loads(ret['msg'].content) except: sleep(30) # 服务器error continue if um['errorCode'] == 8: # 没有任务 sleep(5) continue elif um['errorCode'] == 0: # 得到任务 if um['body']['platform'] in spider_type.keys(): # acc = ['*****@*****.**', '*****@*****.**', '*****@*****.**', '2745358874'] # 指定账号 # if um['body']['account'] not in acc: # 执行任务 # continue if um.get('body').get('platform') not in [ 'qq_finance' ]: #, 'WIFIKEY']: sleep(0.5) continue logger.info('task info:') logger.info(um) # 执行任务 try: # spider_type[um['body']['platform']].run_task(um['body']) run_process(task_name=spider_type.get( um.get('body').get('platform')), args=um.get('body')) except Exception as e: logger.error(e, exc_info=1) # 发送告警 logger.critical(e, exc_info=1) sleep(5) else: sleep(2) continue else: logger.error('task server error') sleep(5)
def get_main_content(self, p_cf, cookie, sd, ed): # 获取主账号的数据,并且完成截图 # get_data------ids url = 'https://e.yunos.com/api/campaign/list/layout' headers = { 'accept': "application/json, text/javascript", 'content-type': "application/x-www-form-urlencoded", 'cookie': cookie, 'referer': "https://e.yunos.com/", 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36", } data = get(url, headers=headers) if not data.get('is_success'): return data id_list = json.loads(data.get('msg').content.decode('utf-8')) if id_list.get('total_count') == 0: return {'succ': False, 'msg': 'no data', 'account': self.acc} ids = [(e.get('id'), e.get('title')) for e in id_list.get('data') if '东方' in e.get('title')] # get_data------data filename = join(self.dir_path, 'mainContent_#%s_#%s.json' % (sd, ed)) # change file name url2 = 'https://e.yunos.com/api/rpt/list' payload = { 'param': json.dumps({ "page": 1, "page_size": 99999, "date_range_type": 0, "campaign_id": [i[0] for i in ids], "report_level": 1, "report_data_type": 1, "start_ds": sd, "end_ds": ed }), 'p_cf': p_cf } data = post(url2, data=payload, headers=headers) if not data.get('is_success'): return {'succ': False, 'msg': data.get('msg')} data = json.loads(data.get('msg').content.decode('utf-8')) if data.get('total_count') == 0: return {'succ': False, 'msg': 'no data', 'account': self.acc} with open(filename, 'w', encoding='utf-8') as f: json.dump(data, f) return {'succ': True, 'msg': data, 'ids': ids, 'sd': sd, 'ed': ed}
def get_data(self, c_cf, sd, ed, cid, identity, company, total_count=10, reget=True): """ 获取数据 :param cookie: a list of cookie[{}, {}] :param cid: a list of group id :return: """ # 处理文件名 fname = '%(childAcc)s_#%(company)s_#%(sd)s_#%(ed)s.json' % { 'childAcc': identity, 'company': company.strip(), 'sd': sd, 'ed': ed } file_name = os.path.join(self.dir_path, fname) if not cid: # 没有数据, 没有产品 with open(file_name, 'w') as f: f.write('{"msg": "no data"}') return {'succ': False, 'msg': 'no data'} # 处理请求参数 deal_identity = quote(identity) url = "https://e.yunos.com/api/rpt/list" param = {"page": 1, "page_size": total_count, "date_range_type": 0, "campaign_id": cid, "report_level": 1, "report_data_type": 1, "start_ds": sd, "end_ds": ed} pdata = {'identity': deal_identity, 'param': json.dumps(param), 'p_cf': c_cf} # 发送请求 data = post(url, data=pdata, headers=self.headers) if not data['is_success']: return {'succ': False, 'msg': data['msg']} # 获取数据总数 data = data['msg'].content.decode('utf-8') t_count = int(json.loads(data).get('total_count')) if t_count == 0: # 无消耗结算 with open(file_name, 'w') as f: f.write('{"msg": "no data"}') return {'succ': False, 'msg': 'no data'} if reget: time.sleep(0.25) return self.get_data(c_cf, sd, ed, cid, identity, company, total_count=t_count, reget=False) # 乱码整理 data = json.loads(data) for i in data['data']: i['campaign_name'] = i.get('campaign_name').encode('gbk', 'ignore').decode('gbk') data = json.dumps(data) # 写入文件 with open(file_name, 'w', encoding='gbk') as f: try: f.write(data) # json.dump(data, f) except Exception as e: logger.error(e, exc_info=1) logger.info('crawled data: --------' + data) return {'succ': True}
def get_data(self, data, sd, ed): logger.info('get into (self.get_data_common_version)function') url = "https://e.qq.com/ec/api.php" fname = '%(productId)s_%(productName)s_%(sd)s_%(ed)s.json' % { 'productId': data['pid'], 'productName': data['pname'], 'sd': sd, 'ed': ed } params = {"g_tk": str(self.gtk)} pdata = { "sdate": sd, "edate": ed, "product_type": '20', "product_id": data.get('pid'), "owner": self.uid } headers = { "cookie": self.cookies_str, "referer": "http://e.qq.com/atlas/%(uid)s/report/order_old?pid=%(pid)s&ptype=%(ptype)s" % { 'uid': self.uid, 'pid': data.get('pid'), 'ptype': '20' } } self.params.update(params) self.pdata.update(pdata) self._headers.update(headers) data = post(url, data=self.pdata, params=self.params, headers=self._headers) if not data['is_success']: return {'succ': False, 'msg': data['msg']} file_name = os.path.join(self.dir_path, fname) data = json.loads(data['msg'].content.decode('utf-8')) cost = data.get('data').get('total').get('cost').replace(',', '') if float(cost) == 0: return {'succ': False, 'msg': 'no data'} data['account'] = self.acc with open(file_name, 'w') as f: json.dump(data, f) logger.info('crawled data: ' + json.dumps(data)) return {'succ': True}
def get_data(self, sd, ed): """ 获取数据 """ url = "http://baitong.baidu.com/request.ajax?path=appads/GET/report/all/list" # 处理文件名 fname = '%s_%s.json' % (sd, ed) file_name = os.path.join(self.dir_path, fname) # 处理数据 headers = { 'Accept': "*/*", 'Content-Type': "application/x-www-form-urlencoded", 'Cookie': self.cookie_str, 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36", } p = { "startDate": sd, "endDate": ed, "pageSize": 50, "page": 1, "sortField": "date", "sortOrder": "desc", "timeUnit": 1 } payload = {'params': json.dumps(p)} data = post(url, data=payload, headers=headers) if not data.get('is_success'): return {'succ': False, 'msg': 'net error'} data = data.get('msg').content.decode('utf-8') # 没有数据 if json.loads(data).get('data').get('count') == 0: logger.info({'msg': 'no data', 'date': '%s-%s' % (sd, ed)}) return {'succ': False, 'msg': 'no data'} # 写入文件 with open(file_name, 'w', encoding='utf-8') as f: try: f.write(data) except Exception as e: logger.error(e, exc_info=1) logger.info('crawled data: --------%s' % data) return {'succ': True}
def get_data(self, sd, ed, cookie, pnum=1, content=None, child_acc=None): """ 获取数据 """ url = 'https://cpd.vivo.com.cn/bplantdata/ajaxPageDataReport.action' # 处理文件名 child_acc_name = child_acc if child_acc else self.acc fname = '%s_%s_%s.json' % (child_acc_name, sd, ed) file_name = os.path.join(self.dir_path, fname) payload = { "dataReportRequest.queryStartDate": sd, "dataReportRequest.queryEndDate": ed, "page.currentPageNum": pnum } self._headers.update({'Cookie': cookie}) # 处理数据 data = post(url, data=payload, headers=self._headers) if not data.get('is_success'): raise Exception(data.get('msg')) data = data.get('msg').content.decode('utf-8') data = json.loads(data) if data.get('page').get('recordCount') == 0: logger.warning( f'no data | child_acc:{child_acc} | range: {sd} ~ {ed}') return {'succ': False, 'msg': 'no data'} content = content if isinstance(content, list) else [] page_num = data.get('page').get('currentPageNum') page_count = data.get('page').get('pageCount') content.extend(data.get('dataReportResponseList')) if page_num != page_count: pnum += 1 return self.get_data(sd, ed, cookie, pnum=pnum, content=content, child_acc=child_acc) # 写入文件 with open(file_name, 'w', encoding='utf-8') as f: try: json.dump({'data': content}, f) except Exception as e: logger.error(e, exc_info=1) logger.info('crawled data: -------- %s' % data) return {'succ': True}
def get_pids(self, tid, sd, ed): url = 'https://e.qq.com/ec/api.php' headers = { 'accept': "application/json, text/javascript, */*; q=0.01", 'cookie': self.cookie_str, 'referer': "https://e.qq.com/atlas/%s/report/producttype" % self.uid, 'user-agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3569.0 Safari/537.36", } payload = {"mod": "report", "act": "getproduct", "g_tk": self.g_tk, "owner": self.uid, "sdate": sd, "edate": ed, "searchtype": "product", "product_type": tid} data = post(url, data=payload, headers=headers, verify=False) if not data.get('is_success'): return {'succ': False, 'msg': data.get('msg')} data = data.get('msg').json() if not data.get('data') or not data.get('data').get('list'): return {'succ': False, 'msg': 'no data'} pdatas = [{'pname': e.get('pname'), 'pid': e.get('product_id')} for e in data.get('data').get('list')] return {'succ': True, 'msg': pdatas}
def get_pid(self, child_account, special=None): """ 获取产品id :param child_account: 子账号 :return: pid list """ url = 'https://developer.huawei.com/consumer/cn/service/apcs/app/gwService' headers = { "referer": "https://developer.huawei.com/consumer/cn/service/apcs/app/memberCenter.html?customerAccountId=%s" % child_account } self._headers.update(headers) params = { "fromRecCount": 1, "maxReqCount": 99999, "status": "AUDITED,RUN,SUSPENDED,DONE,CANCELED,TERMINATE", "sortType": 0, "accountRegionType": "CHINA", "customerAccountId": child_account } pdata = { "apiName": "Inapp.Developer.queryTaskList", "params": json.dumps(params) } data = post(url, data=json.dumps(pdata), headers=self._headers) if not data['is_success']: raise Exception(data.get('msg')) data = json.loads(data['msg'].content.decode()) # 对同名产品的不同id进行分组 task_list = data.get('datas').get('taskList') pids = [(x.get('taskID'), x.get('contentAppName'), x.get('taskName')) for x in task_list ] if data.get('datas').get('totalCount') > 0 else [] if special: pids = [x for x in pids if x[2] in special.get('cname')] after_distinct = list(set([e[1] for e in pids])) pgids = [] for e in after_distinct: # 合并重复项 z = {'pname': e, 'pid': []} z.get('pid').extend([x[0] for x in pids if x[1] == e]) pgids.append(z) logger.info(pgids) return {'succ': True, 'msg': pgids}
def get_cid(self, get_bal=False): """二代账号获取子账号""" url = "https://id.vivo.com.cn/api/account/querySubAccountList" headers = { 'Accept': "application/json, text/javascript, */*; q=0.01", 'Content-Type': "application/x-www-form-urlencoded", 'Cookie': self.cookies, 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36", } payload = { 'param': '{"page": 1,"size": 20,"pageIndex": 1,"pageSize": 20}' } data = post(url, data=payload, headers=headers) if not data.get('is_success'): raise Exception(data.get('msg')) data = data.get('msg').content.decode('utf-8') data_list = json.loads(data).get('object').get('list') if not data_list: logger.info('none child account') return {'succ': True, 'msg': []} # 获取没有消耗的子账号的余额 if get_bal: self.balance_data = [{ '账号': i.get('companyName'), '现金余额': i.get('cpdCashBalance'), '虚拟金余额': i.get('cpdDiscountBalance'), 'name': i.get('name') } for i in data_list if i.get('status') == 2 and i.get('name') in self.none_cost_list] return uids = [(e.get('name'), e.get('uuid')) for e in data_list if e.get('status') == 2] return {'succ': True, 'msg': uids}
def get_data(self, cookie, osd, oed): cookie = '; '.join( ['%s=%s' % (e.get('name'), e.get('value')) for e in cookie]) headers = { 'accept': 'application/json, text/javascript, */*; q=0.01', 'cookie': cookie, 'origin': 'https://s.qq.com', 'referer': 'https://s.qq.com/tx/data/center.html', "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36" } # 请求1 gamelist_url = 'https://s.qq.com/service/datacenter/gamelist' gamelist_data = None gameres = post(gamelist_url, gamelist_data, headers=headers) if not gameres.get('is_success'): logger.error(gameres.get('msg'), exc_info=1) return {'succ': False, 'data': gameres.get('msg')} if gameres['msg'].status_code != 200: return { 'succ': False, 'data': "{'msg': 'game status_code not 200'}" } gameres = gameres['msg'].content.decode('utf-8') gameres = json.loads(gameres) if gameres['ret'] != 0: return {'succ': False, 'data': "{'msg':'game data error'}"} # if len(gameres['result']) == 0: # return {'succ': False, 'data':"{'msg':'no game data'}"} # 请求2 channellist_url = 'https://s.qq.com/service/datacenter/channellist' channellist_data = None channelres = post(channellist_url, channellist_data, headers=headers) if not channelres.get('is_success'): logger.error(channelres.get('msg'), exc_info=1) return {'succ': False, 'data': channelres.get('msg')} if channelres['msg'].status_code != 200: return { 'succ': False, 'data': "{'msg': 'channellist status_code not 200'}" } channelres = channelres['msg'].content.decode('utf-8') channelres = json.loads(channelres) if gameres['ret'] != 0: return {'succ': False, 'data': "{'msg':'channellist data error'}"} # if len(gameres['result']) == 0: # return {'succ': False, 'data':"{'msg':'no channellist data'}"} # 请求3 total = -1 pagesize = 500 pageno = 0 alldetails = [] while total == -1 or (pageno + 1) <= math.ceil( total % pagesize): # 未请求,或者未请求完所有分页,循环请求 details_url = 'https://s.qq.com/service/datacenter/details' gameidarr = [] gamemap = {} for gameitem in gameres['result']: gameidarr.append(str(gameitem['id'])) gamemap[gameitem['id']] = gameitem['name'] channelidarr = [] for channelitem in channelres['result']: channelidarr.append(str(channelitem['id'])) details_data = { 'tStartTime': osd, 'tEndTime': oed, 'vChannelId': json.dumps(channelidarr), 'vGameId': json.dumps(gameidarr), 'iPageNo': pageno, 'iPageSize': pagesize, # 最大支持500 'iType': 0 } logger.info('req details, pageno:%d, total:%d' % (pageno, total)) detailsres = post(details_url, details_data, headers=headers) if not detailsres['is_success']: logger.error('%s\n%s' % (details_url, details_data)) pageno += 1 continue detailsres = detailsres['msg'].content.decode('utf-8') detailsres = json.loads(detailsres) if detailsres['ret'] != 0: logger.info('details_data:%s, header:%s, detailres:%s' % (details_data, headers, detailsres)) return {'succ': False, 'data': "{'msg':'detail data error'}"} # if len(detailsres['result']['rows']) == 0: # logger.info('details_data:%s, header:%s, detailres:%s' % (details_data, headers, detailsres)) # return {'succ': False, 'data':"{'msg':'no detail data'}"} for detailitem in detailsres['result']['rows']: detailitem['gameappname'] = gamemap.get( detailitem['gameappid']) alldetails.append(detailitem) total = detailsres['result']['total'] pageno += 1 return { 'succ': True, 'data': json.dumps(alldetails, ensure_ascii=False), 'options': { 'gameList': gameres.get('result'), 'channelList': channelres.get('result') } }
def post_res(task_id, account, platform, file_path, status, has_data=1, has_pic=1): """ 上报结果 :param task_id: :param account: :param platform: 任务类型 :param file_path: 上传目录的服务器相对路径 :param status: booltype, 是否成功 :param has_data: int, 是否有数据 :param has_pic: int, 是否有截图 :return: 返回是否上报成功 """ logger = logging.getLogger('%s.post_res' % settings.GlobalVal.CUR_MAIN_LOG_NAME) post_data = { 'taskId': task_id, 'errorCode': None, 'status': None, 'statusDesc': None, 'account': account, 'platform': platform, 'filePathCatalog': '', 'flag': settings.GlobalVal.CUR_TASK_TYPE, 'isScreenshots': has_pic, 'isData': has_data } dst_file_path = file_path if file_path else settings.GlobalVal.DST_DIR if status and status == 5: need_change = { 'status': 5, 'statusDesc': '爬虫逻辑错误', 'filePathCatalog': dst_file_path } elif status: need_change = {'status': 3, 'statusDesc': '成功'} else: need_change = { 'status': 4, 'statusDesc': '账号无效', 'errorCode': 10000, 'filePathCatalog': dst_file_path, 'isData': 0, 'isScreenshots': 0 } # need_change = {'status': 5, 'statusDesc': '账号无效'} post_data.update(need_change) logger.info('Post Data: %s' % post_data) data = json.dumps(post_data) res = post(post_res_url, data=data, headers={'Content-Type': 'application/json'}) if not res['is_success']: logger.warning('上报失败') logger.error(res.get('msg'), exc_info=1) # 上报失败 return False else: # 上报成功 logger.info('Post success! ret_msg: %s' % res['msg'].content.decode('utf-8')) return True