Ejemplo n.º 1
0
def get_info(session):
    logger = log.getLog("union_baidu")
    datas = {"platformId": 1, "accountId": 1}
    yesterday = (datetime.datetime.now() -
                 datetime.timedelta(days=1)).strftime('%Y%m%d')
    datas["date"] = datetime.datetime.now().strftime('%Y%m%d')
    url = "http://union.baidu.com/v2/client/report/query?begin={begin}&end={end}&timeGranularity=sum&metrics=adPositionView%2Cpageview%2Cclick%2CclickRatio%2Cecpm%2Cincome&pageNo=1&order=desc&orderBy=adPositionName&dimensions=adPositionId%2CadPositionName&filterFields=unionBizTypeId&filterValues=1&pageSize=500".format(
        begin=yesterday, end=yesterday)
    header = {
        # "Cookie":"__cas__st__6={}; __cas__id__6=19548885".format(cas),
        "User-Agent":
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
    }
    rs = session.get(url, headers=header)
    json_data = json.loads(rs.text)
    results = json_data.get("data", {}).get("results", [])
    data_list = []
    for data in results:
        data_dict = {}
        sourceId = data.get("adPositionId", "")
        sourceName = data.get("adPositionName", "")
        sourcePv = data.get("adPositionView", 0)
        pagePv = data.get("pageview", 0)
        if pagePv == None:
            pagePv = 0
        clickCount = data.get("click", 0)
        clickRate = data.get("clickRatio", 0)
        income = data.get("income", 0)
        cpm = data.get("ecpm", 0)
        if cpm == None:
            cpm = 0
        if sourcePv == None:
            sourcePv = 0
        if clickCount == None:
            clickCount = 0
        if clickRate == None:
            clickRate = 0
        if income == None:
            income = 0
        if clickRate == None:
            clickRate = 0

        data_dict["logTime"] = yesterday
        data_dict["sourceId"] = sourceId
        data_dict["sourceName"] = sourceName
        data_dict["sourcePv"] = sourcePv
        data_dict["pagePv"] = pagePv
        data_dict["clickCount"] = clickCount
        data_dict["clickRate"] = clickRate
        data_dict["income"] = income
        data_dict["cpm"] = cpm

        data_list.append(data_dict)
        print sourceId, sourceName, sourcePv, pagePv, clickCount, clickRate, income, cpm

    datas["data"] = data_list
    logger.info(datas)
    print datas
Ejemplo n.º 2
0
def get_info(session, token):
    '''
    :param session: 登陆session
    :param token: 登陆token
    :return: 内容数据
    :name:豆盟   5
    '''
    logger = log.getLog("doumob")
    data = {"platformId": 5, "accountId": 1}
    yesterday = (datetime.datetime.now() -
                 datetime.timedelta(days=1)).strftime('%Y-%m-%d')
    data["date"] = datetime.datetime.now().strftime('%Y%m%d')

    header = {
        "Referer":
        "https://www.doumob.com/front/",
        "token":
        token,
        "User-Agent":
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
    }
    data_list = []
    name_list = session.get(list_url, headers=header)
    lists = json.loads(name_list.text)
    list = lists.get("list", [])
    # print list
    # for k,i in media_dict.items():
    for li in list:
        k = li.get("medianame", "")
        i = li.get("id", "")
        url = "https://www.doumob.com/end/app/getHdggAdSpaceList?mediaId={}".format(
            i)
        response = session.get(url, headers=header)
        json_data = json.loads(response.text)
        lists = json_data['list']
        for list in lists:
            adspacename = list['adspacename']
            id = list['id']
            _url = DATA_URL.format(startDate=yesterday,
                                   endDate=yesterday,
                                   mediaId=i,
                                   hdggadspaceId=id)
            content = session.get(_url, headers=header)
            content_json = json.loads(content.text)
            if (content_json['list']):
                data_dict = {}
                data_dict["logTime"] = yesterday.replace("-", "")
                data_dict["mediaName"] = k
                data_dict['sourceName'] = adspacename
                data_dict['uv'] = content_json['list'][0]['uv']
                data_dict['income'] = content_json['list'][0]['hdggMoney']
                data_list.append(data_dict)
                print k, adspacename, content_json['list'][0][
                    'uv'], content_json['list'][0]['hdggMoney']
    data["data"] = data_list
    logger.info(data)
    print data
Ejemplo n.º 3
0
def get_info(session):
    '''
    :param session: 登陆session
    :return: 内容数据
    :name: 金立ssp广告数据
    '''
    logger = log.getLog("sspdev")
    data = {"platformId": 6, "accountId": 1}
    yesterday = (datetime.datetime.now() -
                 datetime.timedelta(days=1)).strftime('%Y-%m-%d')
    data["date"] = datetime.datetime.now().strftime('%Y%m%d')
    session.get("http://sspdev.gionee.com/report/index")
    PHPSESSID = session.cookies.items()[-1][1]
    header = {
        "Cookie":
        "PHPSESSID={}".format(PHPSESSID),
        "Accept":
        "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
        "Referer":
        "http://sspdev.gionee.com/report/index",
        "User-Agent":
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
    }
    data_list = []
    response = session.get(DATA_URL.format(yesterday, yesterday),
                           headers=header)
    soup = BeautifulSoup(response.text, "lxml")
    trs = soup.find("table", {"class": "table"}).find("tbody").find_all("tr")
    for tr in trs[1:]:
        data_dict = {}
        tds = tr.find_all("td")
        name = tds[0].get_text().strip()
        shownum = tds[4].get_text().strip()
        clicknum = tds[5].get_text().strip()
        money = tds[-1].get_text().strip()
        data_dict["logTime"] = yesterday.replace("-", "")
        data_dict["sourceName"] = name
        data_dict["sourcePv"] = shownum
        data_dict["clickCount"] = clicknum
        data_dict["income"] = money
        data_list.append(data_dict)
        print yesterday, name, shownum, clicknum, money
    data["data"] = data_list
    logger.info(data)
    print data
Ejemplo n.º 4
0
def get_session(session, user_name, password):
    '''
    :param session: 登陆session
    :param user_name: 用户名
    :param password: 密码
    :return: 内容数据
    :name: 搜狗
    '''

    logger = log.getLog("sogou")
    data = {"platformId": 7, "accountId": 1}
    yesterday = (datetime.datetime.now() -
                 datetime.timedelta(days=1)).strftime('%Y%m%d')
    data["date"] = datetime.datetime.now().strftime('%Y%m%d')
    header = {
        "User-Agent":
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
    }
    pic = get_picsnum(session)

    datas = {
        "systemType": 1,
        "loginFromPage": "homePage",
        "username": user_name,
        "password": password,
        "activecode": pic
    }
    session.post("http://union.sogou.com/loginauth.action",
                 headers=header,
                 data=datas)
    xx = session.get(
        "http://union.sogou.com/stat/product_stat!query.action?unionid=17getfun",
        headers=header)
    soup = BeautifulSoup(xx.text, "lxml")
    data_list = []
    content_data = soup.find_all("span", {"class": "pronumauto"})
    data_dict = {}
    data_dict["logTime"] = yesterday
    data_dict["sourcePv"] = content_data[0].get_text().strip().replace(",", "")
    data_dict["clickCount"] = content_data[1].get_text().strip()
    data_dict["income"] = content_data[2].get_text().strip()
    data_list.append(data_dict)
    data["data"] = data_list
    logger.info(data)
    print data
Ejemplo n.º 5
0
def get_info(session):
    '''
   :param param: 登陆session
   :return: 内容数据
   :name:点冠   8
    '''
    logger = log.getLog("aiclk")
    datas = {"platformId": 8, "accountId": 1}
    yesterday = (datetime.datetime.now() -
                 datetime.timedelta(days=1)).strftime('%Y%m%d')
    datas["date"] = datetime.datetime.now().strftime('%Y%m%d')
    cpc_ssp = session.cookies.items()[0][-1]
    header = {
        "Cookie":
        "cpc-ssp={}".format(cpc_ssp),
        "Referer":
        "http://union.aiclk.com/",
        "User-Agent":
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
    }
    response = session.get(DATA_URL.format(yesterday, yesterday),
                           headers=header)
    soup = BeautifulSoup(response.text, "lxml")
    if soup:
        data_list = []
        json_data = json.loads(soup.text)
        for data in json_data:
            data_dict = {}
            adslot_name = data.get("adslot_name", "")  #广告位
            click = data.get("click", 0)  #点击数
            impression = data.get("impression", 0)  #展现数
            income = data.get("income", 0)  #收入
            ctr = data.get("ctr", 0.0)
            #点击率
            data_dict["logTime"] = yesterday
            data_dict["sourceName"] = adslot_name
            data_dict["sourcePv"] = impression
            data_dict["clickCount"] = click
            data_dict["income"] = income
            data_list.append(data_dict)
            print yesterday, adslot_name, click, impression, income, ctr
    datas["data"] = data_list
    print datas
Ejemplo n.º 6
0
def get_info(session):
    '''
    :param session: 登陆session
    :return: 内容数据
    :name: 好看
    '''
    logger = log.getLog("mmp")
    data = {"platformId": 10, "accountId": 1}
    data["date"] = datetime.datetime.now().strftime('%Y%m%d')
    yesterday = (datetime.datetime.now() -
                 datetime.timedelta(days=1)).strftime('%Y-%m-%d')
    url = "https://mmp.levect.com/image/report?pageSize=10000&pageNo=1"
    header = {
        "User-Agent":
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
        "Referer": "https://mmp.levect.com/page/imageReport",
    }
    datas = {"startReleaseTime": "2019-02-01", "endReleaseTime": yesterday}
    rs = session.post(url, headers=header, data=datas)
    json_data = json.loads(rs.text)
    data_list = json_data.get("data", {}).get("list", [])
    list_data = []
    for d in data_list:
        dict_data = {}
        clickCount = d.get("imgPv", 0)
        tm = d.get("releaseTime", 0)
        title = d.get("title", "")
        # dict_data["logTime"]=yesterday.replace("-","")
        dict_data["logttime"] = time.strftime("%Y%m%d",
                                              time.localtime(tm / 1000))
        dict_data["title"] = title
        dict_data["isJingpin"] = 1
        dict_data["clickCount"] = clickCount
        list_data.append(dict_data)
        print title, 1, clickCount
    data["data"] = list_data
    logger.info(data)
    print data
Ejemplo n.º 7
0
def get_list(token):
    logger = log.getLog("umeng")
    data = {"platformId": 9, "accountId": 1}
    yesterday = (datetime.datetime.now() -
                 datetime.timedelta(days=1)).strftime('%Y-%m-%d')
    data["date"] = datetime.datetime.now().strftime('%Y%m%d')
    # header = {
    #     "Cookie":"umplus_uc_token={}".format(token),
    #     "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
    # }
    rs = token.get(LIST_URL)

    json_data = rs.json()
    lists = json_data.get("data", {}).get("getsitelist",
                                          {}).get("items", {}).get("main", [])
    data_list = []
    for list in lists:
        data_dict = {}
        siteid = list.get("siteid", "")
        name = list.get("name", "")
        domain = list.get("domain", "")
        url = INFO_URL.format(siteid, yesterday, yesterday)
        info = token.get(url)
        info_json = info.json()
        items = info_json.get("data", {}).get("summary", {}).get("items", {})
        if items:
            pv = items.get("pv", 0)
            uv = items.get("uv", 0)
            data_dict["logTime"] = yesterday.replace("-", "")
            data_dict["url"] = name + "-" + domain
            data_dict["pagePv"] = pv
            data_dict["uv"] = uv
            data_list.append(data_dict)
            print name, pv, uv
    data["data"] = data_list
    logger.info(data)
    print data
Ejemplo n.º 8
0
        data_list.append(data_dict)
        print sourceId, sourceName, sourcePv, pagePv, clickCount, clickRate, income, cpm

    datas["data"] = data_list
    logger.info(datas)
    print datas


def dowloadimg(name, pwd):
    session = requests.Session()
    tm = int(time.time())
    url = "https://cas.baidu.com/?action=image2&appid=6&key={}".format(tm)
    # url="http://cas.baidu.com/?action=image"
    imgresponse = session.get(url, stream=True)  # 以流的方式打开
    image = imgresponse.content
    with open("img.jpg", "wb") as jpg:
        jpg.write(image)

    x = fateadm_api.TestFunc()
    print x
    get_session(session, x, name, pwd)


if __name__ == "__main__":
    for log in login_name:
        name = log.get("name", "")
        pwd = log.get("pwd", "")
        print "start crawl {}".format(name)
        dowloadimg(name, pwd)
    # get_info("")