def get_info(session): logger = log.getLog("union_baidu") datas = {"platformId": 1, "accountId": 1} yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y%m%d') datas["date"] = datetime.datetime.now().strftime('%Y%m%d') url = "http://union.baidu.com/v2/client/report/query?begin={begin}&end={end}&timeGranularity=sum&metrics=adPositionView%2Cpageview%2Cclick%2CclickRatio%2Cecpm%2Cincome&pageNo=1&order=desc&orderBy=adPositionName&dimensions=adPositionId%2CadPositionName&filterFields=unionBizTypeId&filterValues=1&pageSize=500".format( begin=yesterday, end=yesterday) header = { # "Cookie":"__cas__st__6={}; __cas__id__6=19548885".format(cas), "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36", } rs = session.get(url, headers=header) json_data = json.loads(rs.text) results = json_data.get("data", {}).get("results", []) data_list = [] for data in results: data_dict = {} sourceId = data.get("adPositionId", "") sourceName = data.get("adPositionName", "") sourcePv = data.get("adPositionView", 0) pagePv = data.get("pageview", 0) if pagePv == None: pagePv = 0 clickCount = data.get("click", 0) clickRate = data.get("clickRatio", 0) income = data.get("income", 0) cpm = data.get("ecpm", 0) if cpm == None: cpm = 0 if sourcePv == None: sourcePv = 0 if clickCount == None: clickCount = 0 if clickRate == None: clickRate = 0 if income == None: income = 0 if clickRate == None: clickRate = 0 data_dict["logTime"] = yesterday data_dict["sourceId"] = sourceId data_dict["sourceName"] = sourceName data_dict["sourcePv"] = sourcePv data_dict["pagePv"] = pagePv data_dict["clickCount"] = clickCount data_dict["clickRate"] = clickRate data_dict["income"] = income data_dict["cpm"] = cpm data_list.append(data_dict) print sourceId, sourceName, sourcePv, pagePv, clickCount, clickRate, income, cpm datas["data"] = data_list logger.info(datas) print datas
def get_info(session, token): ''' :param session: 登陆session :param token: 登陆token :return: 内容数据 :name:豆盟 5 ''' logger = log.getLog("doumob") data = {"platformId": 5, "accountId": 1} yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y-%m-%d') data["date"] = datetime.datetime.now().strftime('%Y%m%d') header = { "Referer": "https://www.doumob.com/front/", "token": token, "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36", } data_list = [] name_list = session.get(list_url, headers=header) lists = json.loads(name_list.text) list = lists.get("list", []) # print list # for k,i in media_dict.items(): for li in list: k = li.get("medianame", "") i = li.get("id", "") url = "https://www.doumob.com/end/app/getHdggAdSpaceList?mediaId={}".format( i) response = session.get(url, headers=header) json_data = json.loads(response.text) lists = json_data['list'] for list in lists: adspacename = list['adspacename'] id = list['id'] _url = DATA_URL.format(startDate=yesterday, endDate=yesterday, mediaId=i, hdggadspaceId=id) content = session.get(_url, headers=header) content_json = json.loads(content.text) if (content_json['list']): data_dict = {} data_dict["logTime"] = yesterday.replace("-", "") data_dict["mediaName"] = k data_dict['sourceName'] = adspacename data_dict['uv'] = content_json['list'][0]['uv'] data_dict['income'] = content_json['list'][0]['hdggMoney'] data_list.append(data_dict) print k, adspacename, content_json['list'][0][ 'uv'], content_json['list'][0]['hdggMoney'] data["data"] = data_list logger.info(data) print data
def get_info(session): ''' :param session: 登陆session :return: 内容数据 :name: 金立ssp广告数据 ''' logger = log.getLog("sspdev") data = {"platformId": 6, "accountId": 1} yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y-%m-%d') data["date"] = datetime.datetime.now().strftime('%Y%m%d') session.get("http://sspdev.gionee.com/report/index") PHPSESSID = session.cookies.items()[-1][1] header = { "Cookie": "PHPSESSID={}".format(PHPSESSID), "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Referer": "http://sspdev.gionee.com/report/index", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36", } data_list = [] response = session.get(DATA_URL.format(yesterday, yesterday), headers=header) soup = BeautifulSoup(response.text, "lxml") trs = soup.find("table", {"class": "table"}).find("tbody").find_all("tr") for tr in trs[1:]: data_dict = {} tds = tr.find_all("td") name = tds[0].get_text().strip() shownum = tds[4].get_text().strip() clicknum = tds[5].get_text().strip() money = tds[-1].get_text().strip() data_dict["logTime"] = yesterday.replace("-", "") data_dict["sourceName"] = name data_dict["sourcePv"] = shownum data_dict["clickCount"] = clicknum data_dict["income"] = money data_list.append(data_dict) print yesterday, name, shownum, clicknum, money data["data"] = data_list logger.info(data) print data
def get_session(session, user_name, password): ''' :param session: 登陆session :param user_name: 用户名 :param password: 密码 :return: 内容数据 :name: 搜狗 ''' logger = log.getLog("sogou") data = {"platformId": 7, "accountId": 1} yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y%m%d') data["date"] = datetime.datetime.now().strftime('%Y%m%d') header = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36", } pic = get_picsnum(session) datas = { "systemType": 1, "loginFromPage": "homePage", "username": user_name, "password": password, "activecode": pic } session.post("http://union.sogou.com/loginauth.action", headers=header, data=datas) xx = session.get( "http://union.sogou.com/stat/product_stat!query.action?unionid=17getfun", headers=header) soup = BeautifulSoup(xx.text, "lxml") data_list = [] content_data = soup.find_all("span", {"class": "pronumauto"}) data_dict = {} data_dict["logTime"] = yesterday data_dict["sourcePv"] = content_data[0].get_text().strip().replace(",", "") data_dict["clickCount"] = content_data[1].get_text().strip() data_dict["income"] = content_data[2].get_text().strip() data_list.append(data_dict) data["data"] = data_list logger.info(data) print data
def get_info(session): ''' :param param: 登陆session :return: 内容数据 :name:点冠 8 ''' logger = log.getLog("aiclk") datas = {"platformId": 8, "accountId": 1} yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y%m%d') datas["date"] = datetime.datetime.now().strftime('%Y%m%d') cpc_ssp = session.cookies.items()[0][-1] header = { "Cookie": "cpc-ssp={}".format(cpc_ssp), "Referer": "http://union.aiclk.com/", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36", } response = session.get(DATA_URL.format(yesterday, yesterday), headers=header) soup = BeautifulSoup(response.text, "lxml") if soup: data_list = [] json_data = json.loads(soup.text) for data in json_data: data_dict = {} adslot_name = data.get("adslot_name", "") #广告位 click = data.get("click", 0) #点击数 impression = data.get("impression", 0) #展现数 income = data.get("income", 0) #收入 ctr = data.get("ctr", 0.0) #点击率 data_dict["logTime"] = yesterday data_dict["sourceName"] = adslot_name data_dict["sourcePv"] = impression data_dict["clickCount"] = click data_dict["income"] = income data_list.append(data_dict) print yesterday, adslot_name, click, impression, income, ctr datas["data"] = data_list print datas
def get_info(session): ''' :param session: 登陆session :return: 内容数据 :name: 好看 ''' logger = log.getLog("mmp") data = {"platformId": 10, "accountId": 1} data["date"] = datetime.datetime.now().strftime('%Y%m%d') yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y-%m-%d') url = "https://mmp.levect.com/image/report?pageSize=10000&pageNo=1" header = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36", "Referer": "https://mmp.levect.com/page/imageReport", } datas = {"startReleaseTime": "2019-02-01", "endReleaseTime": yesterday} rs = session.post(url, headers=header, data=datas) json_data = json.loads(rs.text) data_list = json_data.get("data", {}).get("list", []) list_data = [] for d in data_list: dict_data = {} clickCount = d.get("imgPv", 0) tm = d.get("releaseTime", 0) title = d.get("title", "") # dict_data["logTime"]=yesterday.replace("-","") dict_data["logttime"] = time.strftime("%Y%m%d", time.localtime(tm / 1000)) dict_data["title"] = title dict_data["isJingpin"] = 1 dict_data["clickCount"] = clickCount list_data.append(dict_data) print title, 1, clickCount data["data"] = list_data logger.info(data) print data
def get_list(token): logger = log.getLog("umeng") data = {"platformId": 9, "accountId": 1} yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y-%m-%d') data["date"] = datetime.datetime.now().strftime('%Y%m%d') # header = { # "Cookie":"umplus_uc_token={}".format(token), # "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36", # } rs = token.get(LIST_URL) json_data = rs.json() lists = json_data.get("data", {}).get("getsitelist", {}).get("items", {}).get("main", []) data_list = [] for list in lists: data_dict = {} siteid = list.get("siteid", "") name = list.get("name", "") domain = list.get("domain", "") url = INFO_URL.format(siteid, yesterday, yesterday) info = token.get(url) info_json = info.json() items = info_json.get("data", {}).get("summary", {}).get("items", {}) if items: pv = items.get("pv", 0) uv = items.get("uv", 0) data_dict["logTime"] = yesterday.replace("-", "") data_dict["url"] = name + "-" + domain data_dict["pagePv"] = pv data_dict["uv"] = uv data_list.append(data_dict) print name, pv, uv data["data"] = data_list logger.info(data) print data
data_list.append(data_dict) print sourceId, sourceName, sourcePv, pagePv, clickCount, clickRate, income, cpm datas["data"] = data_list logger.info(datas) print datas def dowloadimg(name, pwd): session = requests.Session() tm = int(time.time()) url = "https://cas.baidu.com/?action=image2&appid=6&key={}".format(tm) # url="http://cas.baidu.com/?action=image" imgresponse = session.get(url, stream=True) # 以流的方式打开 image = imgresponse.content with open("img.jpg", "wb") as jpg: jpg.write(image) x = fateadm_api.TestFunc() print x get_session(session, x, name, pwd) if __name__ == "__main__": for log in login_name: name = log.get("name", "") pwd = log.get("pwd", "") print "start crawl {}".format(name) dowloadimg(name, pwd) # get_info("")