Exemple #1
0
    def __init__(self):
        # 创建日志文件夹
        nowTime = common.getNowTime()
        self.nowDate = common.getNowTime()
        self.logPath = common.getDirPath(nowTime, "log")
        common.mkdir(self.logPath)
        # 设置日志文件的文件名
        self.logName = 'jsTag_' + time.strftime(
            '%Y%m%d_%H_%M_%S_%M', time.localtime(time.time())) + '.log'
        self.logFile = self.logPath + self.logName

        # 初始化日志
        # 1、设置formatter,日志的输出格式
        self.logFormat = '%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s'
        self.formatter = logging.Formatter(self.logFormat)
        # 2、设置Handler,用于写入日志的控制。先创建Handler,然后设置Handler级别
        # 级别:CRITICAL > ERROR > WARNING > INFO > DEBUG,默认级别为 WARNING
        self.handler = logging.FileHandler(self.logFile, mode='a')
        self.handler.setLevel(logging.DEBUG)
        self.handler.setFormatter(self.formatter)
        # 3、创建及配置logger
        self.logger = logging.getLogger()
        self.logger.addHandler(self.handler)
Exemple #2
0
def test_AOL():
    print("............AOL................")

    # 找到前天的日期
    def getDate():
        d = datetime.now() + timedelta(days=-2)
        d1 = d + timedelta(days=-1)
        if (int(d.strftime('%Y-%m-%d %H:%M:%S')[11:13]) <= 3):
            str_d = d1.strftime('%Y-%m-%d %H:%M:%S')
        else:
            str_d = d.strftime('%Y-%m-%d %H:%M:%S')
        yyyy1 = str_d[0:4]
        mmmm1 = str_d[5:7]
        dddd1 = str_d[8:10]
        logInURL1 = yyyy1 + "-" + mmmm1 + "-" + dddd1
        return logInURL1

    flag = False
    for try_num in range(5):
        print("AOL第" + str(try_num + 1) + "次尝试-----------")
        try:
            chrome_options = webdriver.ChromeOptions()
            # 使用headless无界面浏览器模式
            chrome_options.add_argument('--headless')
            chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument(
                'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36')
            browser = webdriver.Chrome(chrome_options=chrome_options)
            loginURL = "https://onemobile.aol.com/"
            dataURL = "https://onemobile.aol.com/#/seller/39625/reports"
            browser.get(loginURL)
            print("点击登录---------------------------------")
            browser.find_element_by_id("native-login").click()
            time.sleep(15)
            username, password = common_mysql.selectFromTb("AOL")
            print("输入用户名---------------------------------")
            browser.find_element_by_xpath("//input[@placeholder='Username']").send_keys(username)
            time.sleep(2)
            print("确认用户名,点击下一步---------------------------------")
            browser.find_element_by_xpath("//input[@name='callback_2']").click()
            time.sleep(10)
            print("输入密码---------------------------------")
            browser.find_element_by_xpath("//input[@placeholder='Password']").send_keys(password)
            time.sleep(2)
            print("点击登录按钮---------------------------------")
            browser.find_element_by_xpath("//input[@name='callback_2']").click()
            time.sleep(10)
            browser.get(dataURL)
            # 等待网页刷新
            t = 15
            while t > 0:
                print("网页加载中……倒计时" + str(t) + "秒后开始操作……")
                time.sleep(1)  # 等待10秒钟加载时间,网络好的话,5秒就够,但是10秒比较保守
                t = t - 1

            # 有一个概率性弹窗,有就点击关闭。
            try:
                print("点击X")
                browser.find_element_by_xpath("//a[@data-dismiss='modal']").click()
                print("关闭弹窗成功")
            except Exception as e:
                errorInfo = traceback.format_exc()
                print(errorInfo)
                comm_logging.myLogger.write_logger(errorInfo)

            print("选择开始日期%s---------------------------------" % getDate())
            browser.find_element_by_xpath("//input[@placeholder='YYYY-MM-DD'][1]").clear()
            time.sleep(3)
            browser.find_element_by_xpath("//input[@placeholder='YYYY-MM-DD'][1]").send_keys(getDate())
            browser.find_element_by_xpath("//input[@placeholder='YYYY-MM-DD'][2]").clear()
            time.sleep(3)
            print("选择结束日期---------------------------------")
            browser.find_element_by_xpath("//input[@placeholder='YYYY-MM-DD'][2]").send_keys(getDate())
            time.sleep(3)
            print("点击确认日期---------------------------------")
            # class='datepicker-apply-button pendo-id-datepicker-apply-button' 的 a 有3个 为何不用 index

            browser.find_element_by_xpath("//div[@class='datepicker-button-panel']/a").click()
            print("点击查询---------------------------------")
            browser.find_element_by_xpath("//button[@class='e-btn button-primary pendo-id-generate-report']").click()
            print("点击第一行数据---------------------------------")
            time.sleep(5)
            browser.find_element_by_xpath("//tbody[@aria-live='polite']/tr[1]").click()
            print("点击维度选择---------------------------------")
            time.sleep(2)
            browser.find_elements_by_xpath("//span[@class='title']")[2].click()
            time.sleep(1)
            print("点击tag维度---------------------------------")
            browser.find_element_by_xpath("//li[@data-sid='report-dimension-adTagId']").click()
            print("等待加载数据---------------------------------")
            time.sleep(10)
            dimensionsOfdata = ["Ad Tag", "Requests", "Served", "Delivered", "Fill Rate", "Clicks", "CTR", "Revenue",
                                "eCPM", "RPM"]
            workbook = xlwt.Workbook(encoding='utf-8')
            worksheet = workbook.add_sheet('AOL')

            # 加表头
            col = 0
            for di in dimensionsOfdata:
                worksheet.write(0, col, dimensionsOfdata[col])
                col = col + 1

            print("开始爬数据")
            soup = BeautifulSoup(browser.page_source, "lxml")
            table = soup.find("table",
                              {"class": "table table-body table-nexage tablesorter tablesorter-default hasResizable"})
            # find() 直接返回结果 findAll()返回一个符合条件的所有tag的list
            tbody = table.find("tbody")
            trSum = tbody.findAll("tr")
            # trSum 为表格中的所有行的list
            row = 1
            for tr in trSum:
                # col 放在for 外面,会引起错误ValueError: column index (256) not an int in range(256)
                col = 0
                # tdSum 为一个tr的所有列的list
                tdSum = tr.findAll("td")
                # td 中存在空的<td></td>,在每个tr的最后一个td
                # 不取每一行的最后一个 td
                for td in tdSum:
                    if td.text.strip() != '':
                        if col == 0:
                            worksheet.write(row, col, td.text)
                        else:
                            worksheet.write(row, col, float(td.text.replace(",", "").replace("%", "").replace("$", "")))
                    col = col + 1
                row = row + 1

            nowTime = common.getNowTime()
            # 路径只取nowTime的日期部分
            path = common.getDirPath(nowTime)
            excelName = common.getExcelName(nowTime, "AOL")
            common.mkdir(path)
            workbook.save(path + excelName)
            print("  excel保存成功,路径:" + path + "-----------")
            print("||||||||||||AOL抓取完毕||||||||||||||")
            flag = True
        except Exception as e:
            flag = False
            errorInfo = traceback.format_exc()
            print(errorInfo)
            comm_logging.myLogger.write_logger(errorInfo)
            continue
        finally:
            browser.quit()
            if (flag or try_num >= 4):
                break
Exemple #3
0
def test_Mobfox():
    print("............Mobfox................")
    flag = False
    for try_num in range(3):
        print("Mobfox第" + str(try_num + 1) + "次尝试-----------")
        yesterday = common.getNowTime(delta=-1, type="-")[0:10]
        try:
            chrome_options = webdriver.ChromeOptions()
            # 使用headless无界面浏览器模式
            chrome_options.add_argument('--headless')
            chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument(
                'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'
            )

            # 启动浏览器,获取网页源代码
            browser = webdriver.Chrome(chrome_options=chrome_options)
            loginURL = "https://account.mobfox.com/www/cp/login.php"
            dataURL = "https://account.mobfox.com/www/cp/exchange_reporting.php"
            dataInnerURL = "https://account.mobfox.com/react/exchange-reporting?apikey=065f325c0728e09132ebf4cedfe10ed3&accountid=72511&hash=a305f8299d4f99e855a6a8aa26d3a221&env=prod&siteRoot=https://account.mobfox.com:443/&apiRoot=https://api-v3.mobfox.com"
            dataLoadURL = "https://api-v3.mobfox.com/publisher/report?apikey=065f325c0728e09132ebf4cedfe10ed3&from=" + yesterday + "&to=" + yesterday + "&tz=Asia%2FHong_Kong&group=inventory_id&timegroup=day&totals=total_impressions%2Ctotal_served%2Ctotal_ad_source_opportunities%2Ctotal_clicks%2Ctotal_earnings&f%3Aad_source=exchange&o%3Ainclude_entities=true"
            browser.get(loginURL)
            time.sleep(2)
            print("输入用户名密码---------------------------------")
            username, password = common_mysql.selectFromTb("Mobfox")
            browser.find_element_by_id("email").send_keys(username)
            browser.find_element_by_id("password").send_keys(password)

            # 有一个概率性弹窗,有就点击关闭。
            try:
                print("点击accept")
                browser.find_element_by_xpath(
                    '//a[@class="optanon-allow-all"]').click()
                print("关闭弹窗成功")
            except Exception as e:
                errorInfo = traceback.format_exc()
                comm_logging.myLogger.write_logger(errorInfo)
                print(traceback.format_exc())
                print("无弹窗 或 弹窗关闭失败")

            time.sleep(5)
            browser.find_element_by_xpath(".//*[@type='submit']").click()
            # browser.find_element_by_class_name('btn btn-primary btn-myDsp ').click()
            print("跳转到数据页面---------------------------------")
            browser.get(dataURL)
            time.sleep(2)
            browser.get(dataInnerURL)
            print("自动选择数据维度内容---------------------------------")
            browser.get(dataLoadURL)
            time.sleep(2)

            workbook = xlwt.Workbook(encoding='utf-8')
            worksheet = workbook.add_sheet('Mobfox')
            print("开始爬数据")

            soup = BeautifulSoup(browser.page_source, "lxml")
            dataSoup = soup.find("pre").text
            #网页里提供的数据为json数据,需要json进行解析
            dataJson = json.loads(dataSoup)
            dataCol = dataJson["columns"]
            # dataResults 的大小为 rowNum * 7
            dataResults = dataJson["results"]
            rowNum = dataResults.__len__()
            dimensions = [
                "day", "inventory", "source_opportunities", "total_served",
                "total_impressions", "total_clicks", "ctr", "fillrate",
                "total_earnings", "ecpm"
            ]
            for col in range(10):
                worksheet.write(0, col, dimensions[col])
            try:
                for row in range(0, rowNum):

                    for col in range(10):
                        if col == 0 or col == 3 or col == 5:
                            worksheet.write(row + 1, col,
                                            dataResults[row][col])
                        elif col == 1:
                            InventoryID = str(dataResults[row][col])
                            InventoryName = dataJson["entities"][
                                "inventory_id"][InventoryID]["name"]
                            Inventory = InventoryName + " (" + InventoryID + ")"
                            worksheet.write(row + 1, col, Inventory)
                        elif col == 2:
                            worksheet.write(row + 1, col, dataResults[row][4])
                        elif col == 4:
                            worksheet.write(row + 1, col, dataResults[row][2])
                        elif col == 6:
                            if dataResults[row][4] != 0:
                                ctr = dataResults[row][5] / dataResults[row][
                                    4] * 100
                            else:
                                ctr = "#DIV/!"
                            worksheet.write(row + 1, col, ctr)
                        elif col == 7:
                            if dataResults[row][2] != 0:
                                fillRate = dataResults[row][4] / dataResults[
                                    row][2] * 100
                            else:
                                fillRate = "#DIV/!"
                            worksheet.write(row + 1, col, fillRate)
                        elif col == 8:
                            worksheet.write(row + 1, col, dataResults[row][6])
                        elif col == 9:
                            if dataResults[row][4] != 0:
                                ecpm = dataResults[row][6] / dataResults[row][
                                    4] * 100
                            else:
                                ecpm = "#DIV/!"
                            worksheet.write(row + 1, col, ecpm)
            except Exception:
                print(traceback.format_exc())
                errorInfo = traceback.format_exc()
                comm_logging.myLogger.write_logger(errorInfo)
                continue

            nowTime = common.getNowTime()
            # 路径只取nowTime的日期部分
            path = common.getDirPath(nowTime)
            excelName = common.getExcelName(nowTime, "Mobfox")
            common.mkdir(path)
            workbook.save(path + excelName)
            print("  excel保存成功,路径:" + path + "-----------")
            print("||||||||||||Mobfox抓取完毕||||||||||||||")
            flag = True
        except Exception as e:
            flag = False
            print(traceback.format_exc())
            errorInfo = traceback.format_exc()
            comm_logging.myLogger.write_logger(errorInfo)
            continue
        finally:
            browser.quit()
            if (flag or try_num == 4):
                break
Exemple #4
0
def test_Mobfox():
    print("............Mobfox................")
    flag = False
    for try_num in range(3):
        print("Mobfox第" + str(try_num + 1) + "次尝试-----------")
        try:
            chrome_options = webdriver.ChromeOptions()
            # 使用headless无界面浏览器模式
            # chrome_options.add_argument('--headless')
            # chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument(
                'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'
            )

            # 启动浏览器,获取网页源代码
            browser = webdriver.Chrome(chrome_options=chrome_options)
            loginURL = "https://account.mobfox.com/www/cp/login.php"
            dataURL = "https://account.mobfox.com/www/cp/exchange_reporting.php"
            dataInnerURL = "https://account.mobfox.com/react/exchange-reporting?apikey=065f325c0728e09132ebf4cedfe10ed3&accountid=72511&hash=a305f8299d4f99e855a6a8aa26d3a221&env=prod&siteRoot=https://account.mobfox.com:443/&apiRoot=https://api-v3.mobfox.com"
            browser.get(loginURL)
            time.sleep(2)
            print("输入用户名密码---------------------------------")
            browser.find_element_by_id("email").send_keys(
                "*****@*****.**")
            browser.find_element_by_id("password").send_keys("360Security2017")

            # 有一个概率性弹窗,有就点击关闭。
            try:
                print("点击accept")
                browser.find_element_by_xpath(
                    '//a[@class="optanon-allow-all"]').click()
                print("关闭弹窗成功")
            except Exception as e:
                errorInfo = traceback.format_exc()
                comm_logging.myLogger.write_logger(errorInfo)
                print(traceback.format_exc())
                print("无弹窗 或 弹窗关闭失败")

            time.sleep(5)
            browser.find_element_by_xpath(".//*[@type='submit']").click()
            # browser.find_element_by_class_name('btn btn-primary btn-myDsp ').click()
            print("跳转到数据页面---------------------------------")
            browser.get(dataURL)
            time.sleep(2)
            browser.get(dataInnerURL)
            print("自动选择数据维度内容---------------------------------")
            # s = browser.find_element_by_id("react-select-5--value")
            # Select(s).select_by_value("inventory_id")
            # s = browser.find_element_by_id("period")
            # Select(s).select_by_value("yesterday")
            # s = browser.find_element_by_id("timezone")
            # Select(s).select_by_value("Asia/Hong_Kong")
            # s = browser.find_element_by_id("timegroup")
            # Select(s).select_by_value("day")

            # browser.find_element_by_xpath('//span[@id="react-select-5--value"]/div[2]/input').send_keys("Daily")
            # browser.find_element_by_xpath('//span[@id="react-select-6--value-item"]').text = "react-select-6--value-item"
            # browser.find_element_by_class_name("text-center form-control").click()
            time.sleep(10)

            s = browser.find_element_by_xpath('//div[@id="content"]')
            html = browser.find_element_by_xpath(
                '//html[@class=" supports cssfilters"]')
            browser.find_element_by_xpath('//li[text()="Yesterday"]').click()
            browser.find_element_by_xpath(
                '//button[@title="Download Excel"]').click()

            time.sleep(2)
            browser.find_element_by_xpath('//button[@type="submit"]').click()

            dimensionsOfdata = [
                "col-day sorting_1", "col-inventory_id",
                "col-total_ad_source_opportunities", "col-total_served",
                "col-total_impressions", "col-total_clicks", "col-ctr",
                "col-fillrate", "col-total_earnings", "col-ecpm"
            ]
            workbook = xlwt.Workbook(encoding='utf-8')
            worksheet = workbook.add_sheet('Mobfox')
            print("开始爬数据")
            soup = BeautifulSoup(browser.page_source, "lxml")
            tbody = soup.find("div", {
                "class": "dataTables_scrollBody"
            }).find("tbody")
            trSum = tbody.findAll("tr")

            # 加表头
            col = 0
            for di in dimensionsOfdata:
                worksheet.write(0, col, dimensionsOfdata[col])
                col = col + 1

            row = 1
            for tr in trSum:
                # col 放在for 外面,会引起错误ValueError: column index (256) not an int in range(256)
                col = 0
                # tdSum 为一个tr的所有列的list
                tdSum = tr.findAll("td")
                for td in tdSum:
                    try:
                        worksheet.write(
                            row, col,
                            float(td.text.strip().replace(",", "").replace(
                                "%", "").replace("$", "")))
                    except ValueError:
                        worksheet.write(row, col, td.text.strip())
                    col = col + 1
                row = row + 1

            nowTime = common.getNowTime()
            # 路径只取nowTime的日期部分
            path = common.getDirPath(nowTime)
            excelName = common.getExcelName(nowTime, "Mobfox")
            common.mkdir(path)
            workbook.save(path + excelName)
            print("  excel保存成功,路径:" + path + "-----------")
            print("||||||||||||Mobfox抓取完毕||||||||||||||")
            flag = True
        except Exception as e:
            flag = False
            print(traceback.format_exc())
            errorInfo = traceback.format_exc()
            comm_logging.myLogger.write_logger(errorInfo)
            continue
        finally:
            browser.quit()
            if (flag or try_num == 4):
                break
Exemple #5
0
def test_Mopub():
    print("............Mopub................")
    flag = False
    for try_num in range(3):
        print("Mopub第" + str(try_num + 1) + "次尝试-----------")
        try:
            chrome_options = webdriver.ChromeOptions()
            # 使用headless无界面浏览器模式
            chrome_options.add_argument('--headless')
            chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument(
                'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'
            )

            # 启动浏览器,获取网页源代码
            browser = webdriver.Chrome(chrome_options=chrome_options)

            def getURL(str):
                d = datetime.now() + timedelta(days=-1)
                d1 = d + timedelta(days=-1)
                d2 = d + timedelta(days=-2)
                if (int(d.strftime('%Y-%m-%d %H:%M:%S')[11:13]) <= 3):
                    str_d1 = d1.strftime('%Y-%m-%d %H:%M:%S')
                    str_d2 = d2.strftime('%Y-%m-%d %H:%M:%S')
                else:
                    str_d1 = d.strftime('%Y-%m-%d %H:%M:%S')
                    str_d2 = d1.strftime('%Y-%m-%d %H:%M:%S')
                yyyy1 = str_d1[0:4]
                mmmm1 = str_d1[5:7]
                dddd1 = str_d1[8:10]
                yyyy2 = str_d2[0:4]
                mmmm2 = str_d2[5:7]
                dddd2 = str_d2[8:10]
                # 这个MopubBanner的link
                logInURL1 = "https://dash.metamarkets.com/mopub-360_mobile_security/explore#ed=app_name&fs.0.k=ad_size&fs.0.v.0=300x250&fs.0.v.1=320x50&fs.1.k=timestamp&fs.1.t.0.tr.end=" + yyyy1 + "-" + mmmm1 + "-" + dddd1 + "T16&fs.1.t.0.tr.start=" + yyyy2 + "-" + mmmm2 + "-" + dddd2 + "T16&gm.0=rev_adj&gm.1=auctions&gm.2=win_rate_v2&gm.3=cleared_done&gm.4=ctr&gm.5=ecpm&gm.6=uniques&od.0=ad_size&od.1=adgroup_priority&od.2=adunit_name&od.3=app_version&od.4=app_name&od.5=country&od.6=creative_id&od.7=pub_id&sbd=0&sortBy=rev_adj&sortDim=0&sortDir=descending&td=time_day&tm.0=rev_adj&tm.1=auctions&tm.2=win_rate_v2&tm.3=cleared_done&tm.4=ctr&tm.5=ecpm&tm.6=uniques&tz=Asia~2fShanghai&zz=4"
                # 这个Mopub native的link
                logInURL2 = "https://dash.metamarkets.com/mopub-360_mobile_security/explore#ed=app_name&fs.0.k=ad_size&fs.0.v.0=0x0&fs.0.v.1=320x480&fs.1.k=timestamp&fs.1.t.0.tr.end=" + yyyy1 + "-" + mmmm1 + "-" + dddd1 + "T16&fs.1.t.0.tr.start=" + yyyy2 + "-" + mmmm2 + "-" + dddd2 + "T16&gm.0=rev_adj&gm.1=auctions&gm.2=win_rate_v2&gm.3=cleared_done&gm.4=ctr&gm.5=ecpm&gm.6=uniques&od.0=ad_size&od.1=adgroup_priority&od.2=adunit_name&od.3=app_version&od.4=app_name&od.5=country&od.6=creative_id&od.7=pub_id&sbd=0&sortBy=rev_adj&sortDim=0&sortDir=descending&td=time_day&tm.0=rev_adj&tm.1=auctions&tm.2=win_rate_v2&tm.3=cleared_done&tm.4=ctr&tm.5=ecpm&tm.6=uniques&tz=Asia~2fShanghai&zz=4"
                if str == "Mopub_Banner":
                    logInURL = logInURL1
                elif str == "Mopub_Native":
                    logInURL = logInURL2
                # 传什么参数,那么就返回哪个链接
                return logInURL

            # dimensionsOfdata中元素 = 数据div中的colid 值
            dimensionsOfdata = [
                "auctions", "cleared_done", "uniques", "rev_adj",
                "win_rate_v2", "ctr", "ecpm"
            ]
            dataRes = ["Mopub_Banner", "Mopub_Native"]

            workbook = xlwt.Workbook(encoding='utf-8')

            for resouce in dataRes:
                worksheet1 = workbook.add_sheet(resouce)
                browser.get(getURL(resouce))
                print("现在开始抓取" + resouce + "的数据。" + "\n链接:" + getURL(resouce))
                # 因为先运行Mopub_Banner,因此只要判断第一次,第一次登陆即可
                if resouce == "Mopub_Banner":
                    username, password = common_mysql.selectFromTb("Mopub")
                    browser.find_element_by_id("form-id1").send_keys(username)
                    browser.find_element_by_id("form-id2").send_keys(password)
                    browser.find_element_by_xpath(
                        "//button[@class='primary login']").click()

                time.sleep(10)

                soup = BeautifulSoup(browser.page_source, "lxml")
                appNameList = soup.findAll("div", colid="app_name")

                # 插入表头
                m = 0
                for appName in appNameList:
                    # 虽然网页的表头 colid="app_name" ,但是,div的下一层没有span,而数据部分的div下一层有span,因此appname的第一行空着
                    worksheet1.write(m, 0, appName.span.text)
                    m = m + 1

                # 将数据按列插入excel中
                j = 1
                for col in dimensionsOfdata:
                    worksheet1.write(0, j, col)
                    colData = soup.findAll("div", colid=col)
                    i = 1
                    for row in colData[1:]:
                        try:
                            worksheet1.write(i, j, float(row.span["title"]))
                            i = i + 1
                        except BaseException as err:
                            print("Exception:", err)
                    j = j + 1
                print(resouce + " 抓取完成----------------")

            nowTime = common.getNowTime()
            # 路径只取nowTime的日期部分
            path = common.getDirPath(nowTime)
            excelName = common.getExcelName(nowTime, "Mopub")
            common.mkdir(path)
            workbook.save(path + excelName)
            print("  excel保存成功,路径:" + path + "-----------")
            browser.quit()
            print("||||||||||||Mopub抓取完毕||||||||||||||")
            flag = True
        except:
            flag = False
            print(traceback.format_exc())
            errorInfo = traceback.format_exc()
            comm_logging.myLogger.write_logger(errorInfo)
            continue
        finally:
            browser.quit()
            if (flag or try_num == 3):
                break
Exemple #6
0
def test_Adview():
    print("............Adview................")
    flag = False
    for try_num in range(5):
        print("Adview第" + str(try_num + 1) + "次尝试-----------")
        try:
            # adview取前天
            yesterday = common.getNowTime(-2)
            yyyy = yesterday[0:4]
            mm = yesterday[5:7]
            dd = yesterday[8:10]
            # beforeYerter = common.getNowTime(-2)
            # yyyyBefore = beforeYerter[0:4]
            # mmBefore = beforeYerter[5:7]
            # ddBefore = beforeYerter[8:10]
            nowTime = common.getNowTime()
            yearNow = nowTime[0:4]
            monthNow = nowTime[5:7]
            dayNow = nowTime[8:10]
            # match_Str 为要保留的日期,但是pandas的日期格式为 yyyy-mm-dd
            match_Str = yyyy + "-" + mm + "-" + dd
            # match_Str_before = yyyyBefore + "-" + mmBefore + "-" + ddBefore
            # Adview下载的文件名的前缀
            prefix = "AdView_" + yearNow + monthNow + dayNow
            # 路径只取nowTime的日期部分
            path = common.getDirPath(nowTime)
            excelName = common.getExcelName(nowTime, "Adview")

            chrome_options = webdriver.ChromeOptions()
            # 使用headless无界面浏览器模式
            # chrome_options.add_argument('--headless')
            # chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument(
                'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36')
            # 修改默认下载地址
            prefs = {'profile.default_content_settings.popups': 0, 'download.default_directory': path}
            chrome_options.add_experimental_option('prefs', prefs)

            # 启动浏览器,获取网页源代码
            browser = webdriver.Chrome(chrome_options=chrome_options)
            loginURL = "http://www.adview.cn/web/overseas/login"
            dataURL = "http://www.adview.cn/user/bid/income"
            browser.get(loginURL)
            time.sleep(5)
            print("输入用户名密码---------------------------------")
            browser.find_element_by_xpath('//span[text()="Publishers"]').click()
            username,password = common_mysql.selectFromTb("Adview")
            browser.find_element_by_id("email").send_keys(username)
            browser.find_element_by_id("pwd").send_keys(password)
            ctypes.windll.user32.MessageBoxA(0, u"点击确定后,请在15秒内输入验证码,不要点击登录!!!".encode('gb2312'),
                                             u' 信息'.encode('gb2312'), 0)
            print("请输入验证码,等待 15秒")
            for i in range(1,15):
                print("倒计时:",15-i)
                time.sleep(1)
            print("点击登录按钮------")
            browser.find_element_by_xpath('//button[@class="form-control btn btn-blue blue submitBtn"]').click()
            time.sleep(5)
            print("跳转到数据页面---------------------------------")
            browser.get(dataURL)
            time.sleep(5)
            try:
                browser.find_element_by_xpath('//a[text()="English"]').click()
            except:
                flag = False
                print(traceback.format_exc())
                errorInfo = traceback.format_exc()
                comm_logging.myLogger.write_logger(errorInfo)
            time.sleep(2)
            print("点击下载")
            browser.find_element_by_xpath('//input[@value="Export detail CSV"]').click()
            time.sleep(2)

            # 扫描文件夹,获取 Adview 的文件列表
            print("正在扫描下载的 csv 文件---------")
            fileList = common.scan_File(path, prefix)
            common.mkdir(path)
            # 读取下载的文件,删除无用数据并 重新保存
            print("正在剔除其他日期的数据 并另存为 excel ---------")
            common.turnToXls_ByPandas(path + fileList[-1], path + excelName, 'Adview', match_Str)
            common.remove_File(path, prefix)
            print("  excel保存成功,路径:" + path + "-----------")
            print("||||||||||||Adview抓取完毕||||||||||||||")
            flag = True
        except:
            flag = False
            print(traceback.format_exc())
            errorInfo = traceback.format_exc()
            comm_logging.myLogger.write_logger(errorInfo)
            continue
        finally:
            browser.quit()
            if (flag or try_num == 5):
                break
Exemple #7
0
def test_Pubnative():
    print("............Pubnative................")
    flag = False
    for try_num in range(3):
        print("Pubnative第" + str(try_num + 1) + "次尝试-----------")
        try:
            yesterday = common.getNowTime(-1)
            yyyy = yesterday[0:4]
            mm = yesterday[5:7]
            dd = yesterday[8:10]
            # downloadFile = "C:/Users/Administrator/Downloads" + "/" + "Publisher App_" + dd + "." + mm + "." + yyyy + ".csv"
            # 下载的文件名
            downloadFile = "Publisher App_" + dd + "." + mm + "." + yyyy + "-" + dd + "." + mm + "." + yyyy + ".csv"
            # 用于删除 匹配的字符串
            prefix = "Publisher App_" + dd + "." + mm + "." + yyyy + "-" + dd + "." + mm + "." + yyyy
            # print(downloadFile)
            nowTime = common.getNowTime()
            # 路径只取nowTime的日期部分
            path = common.getDirPath(nowTime)
            # print(path)
            excelName = common.getExcelName(nowTime, "Pubnative")

            chrome_options = webdriver.ChromeOptions()
            # 使用headless无界面浏览器模式
            # chrome_options.add_argument('--headless')
            # chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument(
                'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'
            )
            # 修改默认下载地址
            prefs = {
                'profile.default_content_settings.popups': 0,
                'download.default_directory': path
            }
            chrome_options.add_experimental_option('prefs', prefs)

            # 启动浏览器,获取网页源代码
            browser = webdriver.Chrome(chrome_options=chrome_options)
            loginURL = "https://dashboard.pubnative.net"
            dataURL = "https://dashboard.pubnative.net/partner/#!/api"
            browser.get(loginURL)
            time.sleep(5)
            print("输入用户名密码---------------------------------")
            from public import common_mysql
            username, password = common_mysql.selectFromTb("Pubnative")
            browser.find_element_by_id("email").send_keys(username)
            browser.find_element_by_id("password").send_keys(password)
            # browser.find_element_by_xpath('//form[@action="/sessions"]/div[0]/div/input').send_keys("*****@*****.**")
            # browser.find_element_by_xpath('//form[@action="/sessions"]/div[1]/div/input').send_keys("360Security2017666")
            time.sleep(2)
            browser.find_element_by_xpath('//input[@value="LOGIN"]').click()
            print("跳转到数据页面---------------------------------")
            browser.get(dataURL)
            time.sleep(15)
            print("自动选择数据维度内容---------------------------------")
            browser.find_element_by_xpath('//div[@class="col-md-7"]').click()
            browser.find_element_by_xpath(
                '//div[@class="daterange"]/ul/li[2]').click()
            s = browser.find_element_by_tag_name("select")
            # Select(s).select_by_value("0")
            Select(s).select_by_index(12)
            browser.find_element_by_xpath(
                '//button[@class="btn btn-secondary ng-star-inserted"]').click(
                )
            time.sleep(2)
            browser.find_element_by_xpath(
                '//div[@class="reports-filters row"]/div[3]').click()
            print("准备下载...")
            time.sleep(2)

            dimensionsOfdata = [
                "Publisher App", "Impressions", "Requests", "Fill Rate",
                "eCPM", "Clicks", "CTR", "Conversions", "Payout"
            ]
            # workbook = xlwt.Workbook(encoding='utf-8')
            # worksheet = workbook.add_sheet('Pubnative')
            # print("开始爬数据")
            # soup = BeautifulSoup(browser.page_source, "lxml")
            # tbody = soup.find("tbody")
            # time.sleep(2)
            # trSum = tbody.findAll("tr")
            # time.sleep(2)
            # # 加表头
            # col = 0
            # for di in dimensionsOfdata:
            #     worksheet.write(0, col, dimensionsOfdata[col])
            #     col = col + 1
            #
            # row = 1
            # for tr in trSum[3:]:
            #     # col 放在for 外面,会引起错误ValueError: column index (256) not an int in range(256)
            #     col = 0
            #     # tdSum 为一个tr的所有列的list
            #     tdSum = tr.findAll("td")
            #     for td in tdSum:
            #         worksheet.write(row, col, td.text.strip())
            #         col = col + 1
            #     row = row + 1
            print("点击下载")
            browser.find_element_by_xpath(
                '//div[@class="col-md order-disabled optional-buttons"]/a[2]'
            ).click()
            time.sleep(2)

            common.mkdir(path)
            # workbook.save(path + excelName)
            print("读取 csv 另存为 excel")
            common.turnToXls_ByPandas(path + downloadFile, path + excelName,
                                      'Pubnative')
            print("删除多余的 csv")
            common.remove_File(path, prefix)
            print("  excel保存成功,路径:" + path + "-----------")
            print("||||||||||||Pubnative抓取完毕||||||||||||||")
            flag = True
        except:
            flag = False
            print(traceback.format_exc())
            errorInfo = traceback.format_exc()
            comm_logging.myLogger.write_logger(errorInfo)
            continue
        finally:
            browser.quit()
            if (flag or try_num == 3):
                break
Exemple #8
0
def test_OpenX():
    print("............OpenX................")
    nowTime = common.getNowTime()
    yearNow = nowTime[0:4]
    monthNow = nowTime[5:7]
    dayNow = nowTime[8:10]
    # 路径只取nowTime的日期部分
    path = common.getDirPath(nowTime)
    # 用于删除 匹配的字符串
    prefix = "Workspace 1-" + monthNow + "-" + dayNow + "-" + yearNow
    excelName = common.getExcelName(nowTime, "openx")

    # 尝试5次,错误继续
    flag = False
    for try_num in range(2):
        print("OpenX第" + str(try_num + 1) + "次尝试-----------")
        try:
            print("打开浏览器")
            chrome_options = webdriver.ChromeOptions()
            # 使用headless无界面浏览器模式
            # chrome_options.add_argument('--headless')
            # chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument(
                'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'
            )
            prefs = {
                'profile.default_content_settings.popups': 0,
                'download.default_directory': path
            }
            chrome_options.add_experimental_option('prefs', prefs)
            # 启动浏览器,获取网页源代码
            browser = webdriver.Chrome(chrome_options=chrome_options)
            # 不最大化 某些按钮按不到会报错
            browser.maximize_window()
            time.sleep(2)
            logInURL = "https://sso.openx.com/login/login"
            dataURL = "http://mobimagic-ui.openx.net/app.html#/reports/pie"

            # 前天
            yesterday = common.getNowTime(-2)
            yyyy = yesterday[0:4]
            mm = yesterday[5:7]
            dd = yesterday[8:10]
            # print(yyyy,mm,dd)

            print("打开登录网址")
            browser.get(logInURL)
            time.sleep(5)

            # try 是正常不需要验证码登录,如果try里面失败了,相当于登录失败,需要验证码,然后就跑到了except中,按照需要验证码的方式进行登录。
            username, password = common_mysql.selectFromTb("OpenX")
            try:
                browser.find_element_by_id("email").send_keys(username)
                time.sleep(3)
                browser.find_element_by_id('password').send_keys(password)
                time.sleep(2)
                browser.find_element_by_id("submit").click()
                time.sleep(5)
                browser.find_element_by_xpath(
                    '//a[text()="http:// mobimagic-ui.openx.net/"]').click()
            except:
                print(traceback.format_exc())
                errorInfo = traceback.format_exc()
                comm_logging.myLogger.write_logger(errorInfo)
                #  不能再重新载入界面,重新载入后不会出现验证码,然后继续失败
                # browser.get(logInURL)
                time.sleep(2)
                browser.find_element_by_id("email").clear()
                browser.find_element_by_id("email").send_keys(username)
                time.sleep(3)
                browser.find_element_by_id('password').clear()
                browser.find_element_by_id('password').send_keys(password)
                ctypes.windll.user32.MessageBoxA(
                    0, u"点击确定后,请在15秒内输入验证码,不要点击登录!!!".encode('gb2312'),
                    u' 信息'.encode('gb2312'), 0)
                print("请输入验证码,等待 15秒")
                for i in range(1, 15):
                    print("倒计时:", 15 - i)
                    time.sleep(1)
                browser.find_element_by_id("submit").click()

            print("模拟登录成功")
            print("打开数据网址")
            browser.get(dataURL)
            time.sleep(10)
            browser.find_element_by_xpath(
                '//div[@class="date-range-filter__input"]').click()
            browser.find_element_by_xpath(
                '//li[@class="date-range-filter__filter date-range-filter__filter--custom date-range-filter__filter__label"]'
            ).click()
            time.sleep(5)
            # startElements = browser.find_element_by_xpath('//input[@placeholder="Enter Date"]').send_keys(mm + "/" + dd + "/" + yyyy)
            # 各种方法试了一通,根本不行,下面可以list[0] 是开始日期,list[1]是结束日期
            print("开始选择日期和维度")
            inputElements = browser.find_elements_by_css_selector(
                '[placeholder="Enter Date"]')
            inputElements[0].clear()
            inputElements[0].send_keys(mm + "/" + dd + "/" + yyyy)
            inputElements[1].clear()
            inputElements[1].send_keys(mm + "/" + dd + "/" + yyyy)
            browser.find_element_by_xpath(
                '//div[@class="date-range-filter__custom-date-range-menu__buttons"]/div/button[2]'
            ).click()
            time.sleep(5)

            # 隐藏日期栏、折线图 不隐藏点不到下载按钮
            browser.find_element_by_xpath(
                '//div[@class="reports-pie-data-collection-container__collapse-trigger"]'
            ).click()
            time.sleep(1)
            browser.find_element_by_xpath(
                '//div[@class="reports-pie-collapse-toggle"]').click()
            time.sleep(1)
            # 点击下载 选择xlsx
            browser.find_element_by_xpath(
                '//div[@class="reports-pie-chart-toolkit__export"]/ox-dropdown'
            ).click()
            time.sleep(1)
            # browser.find_element_by_xpath('//button[@class="ox-btn ox-btn--tertiary"]/div').click()
            browser.find_element_by_xpath('//span[text()="Excel "]').click()
            time.sleep(10)

            common.mkdir(path)
            # 扫描文件夹,获取 openx 的文件列表
            print("正在扫描下载的 xlsx 文件---------")
            fileList = common.scan_File(path, prefix)
            common.turnToXls_ByPandas(path + fileList[-1], path + excelName,
                                      'openx')
            common.remove_File(path, prefix)
            print("  excel保存成功,路径:" + path + "-----------")
            print("||||||||||||OpenX抓取完毕||||||||||||||")
            flag = True
        except Exception as e:
            flag = False
            print(traceback.format_exc())
            errorInfo = traceback.format_exc()
            comm_logging.myLogger.write_logger(errorInfo)
            continue
        finally:
            browser.quit()
            if (flag or try_num == 4):
                break
Exemple #9
0
def test_cm():
    print("............cm................")

    # 尝试5次,错误继续
    flag = False
    for try_num in range(5):
        print("cmcm第" + str(try_num + 1) + "次尝试-----------")
        try:
            print("打开浏览器")
            chrome_options = webdriver.ChromeOptions()
            # 使用headless无界面浏览器模式
            chrome_options.add_argument('--headless')
            chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument(
                'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'
            )
            # 启动浏览器,获取网页源代码
            browser = webdriver.Chrome(chrome_options=chrome_options)
            logInURL = "http://console.cmcm.com"
            d = datetime.now() + timedelta(days=-1)
            d1 = d + timedelta(days=-1)
            if (int(d.strftime('%Y-%m-%d %H:%M:%S')[11:13]) <= 3):
                str_d1 = d1.strftime('%Y-%m-%d %H:%M:%S')
            else:
                str_d1 = d.strftime('%Y-%m-%d %H:%M:%S')
            yyyy1 = str_d1[0:4]
            mmmm1 = str_d1[5:7]
            dddd1 = str_d1[8:10]
            dataURL = "http://console.cmcm.com/report/placement?by=day&from=" + yyyy1 + "-" + mmmm1 + "-" + dddd1 + "&to=" + yyyy1 + "-" + mmmm1 + "-" + dddd1
            print("打开登录网址")
            browser.get(logInURL)
            time.sleep(2)
            print("模拟输入用户名密码中")
            username, password = common_mysql.selectFromTb("cm")
            browser.find_element_by_name('email').send_keys(username)
            browser.find_element_by_name('password').send_keys(password)
            browser.find_element_by_xpath(
                '//*[@id="login-form"]/div[3]/div/button').click()
            print("模拟登录成功")
            print("打开数据网址")
            browser.get(dataURL)
            time.sleep(20)
            dimensionsOfdata = [
                "datekey", "item_placement", "backfill", "wins", "impressions",
                "requests", "fillrate", "winrate", "clicks", "ctr", "ecpm",
                "money"
            ]
            workbook = xlwt.Workbook(encoding='utf-8')
            worksheet = workbook.add_sheet('cmcm')

            pageNum = browser.find_elements_by_xpath(
                '//li[@data-page]').__len__()
            # 如果不进行此判断,会出现空表
            if pageNum >= 1:
                row = 1
                for page in range(pageNum):
                    print("-------读取第" + str(page + 1) + "页----------")
                    browser.find_element_by_xpath('//li[@data-page=' +
                                                  str(page + 1) + ']').click()
                    soup = BeautifulSoup(browser.page_source, "lxml")
                    tbody = soup.find(
                        "table", {
                            "class": "bordered highlighted scrolling-table"
                        }).find("tbody")
                    trSum = tbody.findAll("tr")
                    for tr in trSum:
                        col = 0
                        tdSum = tr.findAll("td")
                        for td in tdSum:
                            if col == 0 or col == 1:
                                worksheet.write(row, col, td.text)
                            else:
                                worksheet.write(
                                    row, col, float(td.text.replace(",", "")))
                            col = col + 1
                        row = row + 1
                    time.sleep(2)
            else:
                continue

            # 处理表头
            col = 0
            for d in dimensionsOfdata:
                worksheet.write(0, col, dimensionsOfdata[col])
                col = col + 1

            nowTime = common.getNowTime()
            # 路径只取nowTime的日期部分
            path = common.getDirPath(nowTime)
            excelName = common.getExcelName(nowTime, "cmcm")
            common.mkdir(path)
            workbook.save(path + excelName)
            print("  excel保存成功,路径:" + path + "-----------")
            print("||||||||||||cmcm抓取完毕||||||||||||||")
            flag = True
        except Exception as e:
            flag = False
            errorInfo = traceback.format_exc()
            comm_logging.myLogger.write_logger(errorInfo)
            continue
        finally:
            browser.quit()
            if (flag or try_num == 4):
                break
Exemple #10
0
def test_Smaato():
    print("............Smaato................")

    # 尝试5次,错误继续
    flag = False
    for try_num in range(5):
        print("Smaato第" + str(try_num + 1) + "次尝试-----------")
        try:
            chrome_options = webdriver.ChromeOptions()
            # 使用headless无界面浏览器模式
            chrome_options.add_argument('--headless')
            chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument(
                'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'
            )
            # 启动浏览器,获取网页源代码
            browser = webdriver.Chrome(chrome_options=chrome_options)
            loginURL = "https://spx.smaato.com/publisherportal/pages/login.xhtml"
            username, password = common_mysql.selectFromTb("Smaato")
            dataURL = "https://spx.smaato.com/publisherportal/pages/reporting/reporting.xhtml"
            print("进入登陆页---------------------------------")
            browser.get(loginURL)
            print("  输入用户名密码-------------------------")
            browser.find_element_by_id("j_username").send_keys(username)
            browser.find_element_by_id("j_password").send_keys(password)
            browser.find_element_by_xpath("//button[@type='submit']").click()
            time.sleep(5)
            print("跳转到数据页面---------------------------------")
            browser.get(dataURL)
            time.sleep(10)
            print("  点击日历表------------------")
            browser.find_element_by_xpath(
                "//span[@id='reporting:popup']").click()
            print("  选择yesterday----------------")
            browser.find_element_by_xpath(
                "//div[@class='drp_shortcuts-block1']/span[2]").click()
            print("  点击update----------------")
            browser.find_element_by_xpath(
                "//input[@class='apply-btn']").click()
            time.sleep(2)
            print("  点击display by:----------------")
            browser.find_element_by_xpath(
                "//label[@id='reporting:displayByMenu_label']").click()
            print("  点击Adspace----------------")
            # data-label 是 li 的数据部分
            browser.find_element_by_xpath(
                "//li[@data-label='Adspace']").click()
            # 需要一个刷新延时
            time.sleep(5)
            # 表头信息列表
            dimensionsOfdata = [
                "Adspace", "Adspace ID", "Net Revenue", "Gross Revenue",
                "Ad Requests", "Served Ads", "Fillrate", "Impressions",
                "Viewrate", "Net eCPM", "Gross eCPM", "Clicks"
            ]
            # 新建表格对象
            workbook = xlwt.Workbook(encoding='utf-8')
            # 新建sheet对象
            worksheet = workbook.add_sheet('Smaato')

            soup = BeautifulSoup(browser.page_source, "lxml")
            print("开始爬数据------------------")
            table = soup.find("tbody",
                              {"id": "reporting:reportingSummaryTable_data"})
            trSum = table.findAll("tr")
            time.sleep(2)
            # 处理数据部分
            # 第一行为表头,从第二行开始填充数据
            row = 1
            # 对所有行进行循环
            for tr in trSum:
                col = 0
                tdSum = tr.findAll("td")
                # 对每一行的所有列进行循环
                for td in tdSum:
                    try:
                        if col == 2 or col == 3 or col == 9 or col == 10:
                            worksheet.write(
                                row, col,
                                float(
                                    td.text.replace("$", "").replace(",", "")))
                        elif col == 4 or col == 5 or col == 7 or col == 11:
                            worksheet.write(row, col,
                                            float(td.text.replace(",", "")))
                        elif col == 6 or col == 8:
                            worksheet.write(row, col,
                                            float(td.text.replace("%", "")))
                        elif col == 1:
                            worksheet.write(row, col, float(td.text))
                        else:
                            worksheet.write(row, col, td.text)
                    except Exception as e:
                        print(traceback.format_exc())
                        # continue

                    col = col + 1
                row = row + 1
            # 处理表头
            col = 0
            for d in dimensionsOfdata:
                worksheet.write(0, col, dimensionsOfdata[col])
                col = col + 1

            nowTime = common.getNowTime()
            # 路径只取nowTime的日期部分
            path = common.getDirPath(nowTime)
            excelName = common.getExcelName(nowTime, "Smaato")
            common.mkdir(path)
            workbook.save(path + excelName)
            print("  excel保存成功,路径:" + path + "-----------")

            print("||||||||||||Smaato抓取完毕||||||||||||||")

            flag = True
            # read_workbook = xlrd.open_workbook(path + excelName)
            # read_sheetName = read_workbook.sheet_names()[0]
            # read_sheet = read_workbook.sheet_by_name(read_sheetName)
            # read_numRow, read_numCol = read_sheet.nrows, read_sheet.ncols
            # if read_sheet.cell(read_numRow - 1,1) is '':
            #     flag = False

        except Exception as e:
            flag = False
            print(traceback.format_exc())
            errorInfo = traceback.format_exc()
            comm_logging.myLogger.write_logger(errorInfo)
            continue
        finally:
            browser.quit()
            if (flag or try_num == 4):
                break
Exemple #11
0
def test_NewCM():
    print("............NewCM................")
    flag = False
    for try_num in range(5):
        print("NewCM第" + str(try_num + 1) + "次尝试-----------")
        try:
            yesterday = common.getNowTime(-1)
            yyyy = yesterday[0:4]
            mm = yesterday[5:7]
            dd = yesterday[8:10]
            nowTime = common.getNowTime()
            yearNow = nowTime[0:4]
            monthNow = nowTime[5:7]
            dayNow = nowTime[8:10]

            # NewCM下载的文件名的前缀
            prefix = "Brand+Details_" + yearNow + monthNow + dayNow
            # 路径只取nowTime的日期部分
            path = common.getDirPath(nowTime)
            excelName = common.getExcelName(nowTime, "NewCM")

            chrome_options = webdriver.ChromeOptions()
            # 使用headless无界面浏览器模式
            # chrome_options.add_argument('--headless')
            # chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument(
                'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'
            )
            # 修改默认下载地址
            prefs = {
                'profile.default_content_settings.popups': 0,
                'download.default_directory': path
            }
            chrome_options.add_experimental_option('prefs', prefs)

            # 启动浏览器,获取网页源代码
            browser = webdriver.Chrome(chrome_options=chrome_options)
            loginURL = "https://peg.cmcm.com/login"
            dataURL = "https://peg.cmcm.com/frontReport/default"
            browser.get(loginURL)
            time.sleep(5)
            print("输入用户名密码---------------------------------")
            username, password = common_mysql.selectFromTb("NewCM")
            browser.find_element_by_name("username").send_keys(username)
            browser.find_element_by_name("password").send_keys(password)
            ctypes.windll.user32.MessageBoxA(
                0, u"点击确定后,请在15秒内输入验证码,不要点击登录!!!".encode('gb2312'),
                u' 信息'.encode('gb2312'), 0)
            print("请输入验证码,等待 15秒")
            for i in range(1, 15):
                print("倒计时:", 15 - i)
                time.sleep(1)
            print("点击登录按钮------")
            browser.find_element_by_xpath('//button[@type="submit"]').click()
            time.sleep(5)
            print("跳转到数据页面---------------------------------")
            browser.get(dataURL)
            time.sleep(5)
            # 选择 AD unit
            print("选择 AD unit")
            browser.find_element_by_xpath(
                '//button[text()="Dimension & Metric"]').click()
            browser.find_element_by_xpath('//span[text()="Ad Unit"]').click()
            time.sleep(2)
            browser.find_element_by_xpath('//button[text()="Submit"]').click()
            time.sleep(2)
            # 选择日期
            print("选择日期")
            browser.find_element_by_xpath(
                '//button[@class="btn btn-default ng-binding"]').click()
            browser.find_element_by_xpath('//span[text()="Yesterday"]').click()
            time.sleep(2)
            browser.find_element_by_xpath('//button[text()="Submit"]').click()
            time.sleep(2)
            # 点击search
            print("# 点击search")
            browser.find_element_by_xpath('//input[@value="Search"]').click()
            time.sleep(5)
            print("点击下载")
            browser.find_element_by_xpath(
                '//a[@class="btn btn-outline"]').click()
            time.sleep(5)

            # 扫描文件夹,获取 NewCM 的文件列表
            print("正在扫描下载的 csv 文件---------")
            fileList = common.scan_File(path, prefix)
            common.mkdir(path)
            # 读取下载的文件,删除无用数据并 重新保存
            print("正在剔除其他日期的数据 并另存为 excel ---------")
            common.turnToXls_ByPandas(path + fileList[-1], path + excelName,
                                      'NewCM')
            common.remove_File(path, prefix)
            print("  excel保存成功,路径:" + path + "-----------")
            print("||||||||||||NewCM抓取完毕||||||||||||||")
            flag = True
        except:
            flag = False
            print(traceback.format_exc())
            errorInfo = traceback.format_exc()
            comm_logging.myLogger.write_logger(errorInfo)
            continue
        finally:
            browser.quit()
            if (flag or try_num == 5):
                break
Exemple #12
0
def test_Tappx():
    print("............Tappx................")
    flag = False
    for try_num in range(3):
        print("Tappx第" + str(try_num + 1) + "次尝试-----------")
        try:
            chrome_options = webdriver.ChromeOptions()
            # 使用headless无界面浏览器模式
            chrome_options.add_argument('--headless')
            chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument(
                'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'
            )

            # 启动浏览器,获取网页源代码
            browser = webdriver.Chrome(chrome_options=chrome_options)
            loginURL = "https://www.tappx.com/en/admin/login/"
            dataURL = "https://www.tappx.com/en/admin/monetize/"
            browser.get(loginURL)

            print("输入用户名密码---------------------------------")
            username, password = common_mysql.selectFromTb("Tappx")
            browser.find_element_by_id("username").send_keys(username)
            browser.find_element_by_id("password").send_keys(password)
            browser.find_element_by_xpath("//button[@type='submit']").click()
            print("跳转到数据页面---------------------------------")
            browser.get(dataURL)
            print("自动选择数据维度内容---------------------------------")
            time.sleep(5)
            print("选择货币形式为美元---------------------------------")
            browser.find_element_by_xpath(
                "//div[@id='currency-selector']/a[2]").click()
            time.sleep(1)
            print("取消APP维度---------------------------------")
            browser.find_element_by_xpath(
                "//i[@class='tappxicon tappxicon-close']").click()
            time.sleep(1)
            print("展开维度---------------------------------")
            browser.find_element_by_xpath(
                "//button[@class='btn-block btn btn-default dropdown-toggle']"
            ).click()
            time.sleep(1)
            print("点击显示下拉维度---------------------------------")
            browser.find_element_by_xpath(
                "//a[@class='dropdown-opener']").click()
            time.sleep(1)
            print("点击day维度---------------------------------")
            browser.find_element_by_xpath(
                "//li[@class='model-g_time_day no-image']").click()
            time.sleep(1)
            print("展开维度---------------------------------")
            browser.find_element_by_xpath(
                "//button[@class='btn-block btn btn-default dropdown-toggle']"
            ).click()
            time.sleep(1)
            print("点击app维度---------------------------------")
            browser.find_element_by_xpath(
                "//ul[@class='list-unstyled']/li[5]").click()
            time.sleep(1)
            print("展开维度---------------------------------")
            browser.find_element_by_xpath(
                "//button[@class='btn-block btn btn-default dropdown-toggle']"
            ).click()
            time.sleep(1)
            print("点击format维度---------------------------------")
            browser.find_element_by_xpath(
                "//ul[@class='list-unstyled']/li[2]").click()
            time.sleep(1)
            print("展开维度---------------------------------")
            browser.find_element_by_xpath(
                "//button[@class='btn btn-default btn-plus dropdown-toggle']"
            ).click()
            time.sleep(1)
            print("点击时间下拉维度---------------------------------")
            browser.find_element_by_xpath(
                "//ul[@id='default-options']/li[4]").click()
            time.sleep(1)
            print("点击昨天维度---------------------------------")
            browser.find_element_by_xpath(
                "//ul[@id='default-options']/li[4]/ul/li[2]").click()
            time.sleep(3)
            dimensionsOfdata = [
                "Date", "App", "Format", "Requests", "Deliveries",
                "Impressions", "Clicks", "CPM", "CTR", "Fill Rate",
                "Render Rate", "Benefits"
            ]
            workbook = xlwt.Workbook(encoding='utf-8')
            worksheet = workbook.add_sheet('Tappx')
            soup = BeautifulSoup(browser.page_source, "lxml")
            print("开始爬数据")
            tbody = soup.find("tbody", {"id": "data-raw-table"})
            trSum = tbody.findAll("tr")
            row = 1
            for tr in trSum:
                col = 0
                tdSum = tr.findAll("td")
                for td in tdSum:
                    # print(td.text)
                    try:
                        if col >= 3:
                            worksheet.write(
                                row, col,
                                float(
                                    td.text.replace(",", "").replace(
                                        "%", "").replace("$", "")))
                        elif col == 1:
                            x = td.text.replace("\n", "").strip()
                            worksheet.write(row, col,
                                            td.text.replace("\n", "").strip())
                        else:
                            worksheet.write(row, col, float(td.text))
                    except ValueError:
                        worksheet.write(row, col, td.text)
                    col = col + 1
                row = row + 1
            # 加表头
            col = 0
            for di in dimensionsOfdata:
                worksheet.write(0, col, dimensionsOfdata[col])
                col = col + 1

            nowTime = common.getNowTime()
            # 路径只取nowTime的日期部分
            path = common.getDirPath(nowTime)
            excelName = common.getExcelName(nowTime, "Tappx")
            common.mkdir(path)
            workbook.save(path + excelName)
            print("  excel保存成功,路径:" + path + "-----------")
            browser.quit()
            print("||||||||||||Tappx抓取完毕||||||||||||||")
            flag = True
        except:
            flag = False
            print(traceback.format_exc())
            errorInfo = traceback.format_exc()
            comm_logging.myLogger.write_logger(errorInfo)
            continue
        finally:
            browser.quit()
            if (flag or try_num == 3):
                break
Exemple #13
0
def test_Solo():
    print("............Solo................")
    flag = False
    for try_num in range(3):
        print("Solo第" + str(try_num + 1) + "次尝试-----------")
        try:
            dayBeforeYester = common.getNowTime(-2)
            yyyy = dayBeforeYester[0:4]
            mm = dayBeforeYester[5:7]
            dd = dayBeforeYester[8:10]
            nowTime = common.getNowTime()
            yearNow = nowTime[0:4]
            monthNow = nowTime[5:7]
            dayNow = nowTime[8:10]

            # Solo下载的文件名的前缀
            prefix = "ReportTable"
            # 路径只取nowTime的日期部分
            path = common.getDirPath(nowTime)
            excelName = common.getExcelName(nowTime, "Solo")

            chrome_options = webdriver.ChromeOptions()
            # 使用headless无界面浏览器模式
            # chrome_options.add_argument('--headless')
            # chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument(
                'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'
            )
            # 修改默认下载地址
            prefs = {
                'profile.default_content_settings.popups': 0,
                'download.default_directory': path
            }
            chrome_options.add_experimental_option('prefs', prefs)

            # 启动浏览器,获取网页源代码
            browser = webdriver.Chrome(chrome_options=chrome_options)
            loginURL = "https://portal.newborntown.com/logout"
            dataURL = "https://portal.newborntown.com/report"
            browser.get(loginURL)
            time.sleep(5)
            print("输入用户名密码---------------------------------")
            username, password = common_mysql.selectFromTb("Solo")
            browser.find_element_by_name("email").send_keys(username)
            browser.find_element_by_name("password").send_keys(password)
            ctypes.windll.user32.MessageBoxA(
                0, u"点击确定后,请在15秒内输入验证码,不要点击登录!!!".encode('gb2312'),
                u' 信息'.encode('gb2312'), 0)
            print("请输入验证码,等待 15秒")
            for i in range(1, 15):
                print("倒计时:", 15 - i)
                time.sleep(1)
            print("点击登录按钮------")
            browser.find_element_by_xpath('//button[@id="btn_login"]').click()
            time.sleep(5)
            print("跳转到数据页面---------------------------------")
            browser.get(dataURL)
            time.sleep(5)
            # 选择stats页
            print("选择stats页")
            browser.find_element_by_xpath('//h4[text()="Stats"]').click()
            time.sleep(5)

            # 开始选择日期
            print("开始选择日期")
            browser.find_element_by_id("reportrange").click()
            time.sleep(2)
            browser.find_element_by_xpath(
                '//li[text()="Custom Range"]').click()
            time.sleep(2)
            browser.find_element_by_name("daterangepicker_start").clear()
            browser.find_element_by_name("daterangepicker_start").send_keys(
                mm + "/" + dd + "/" + yyyy)
            time.sleep(2)
            browser.find_element_by_name("daterangepicker_end").clear()
            browser.find_element_by_name("daterangepicker_end").send_keys(mm +
                                                                          "/" +
                                                                          dd +
                                                                          "/" +
                                                                          yyyy)
            time.sleep(2)
            browser.find_element_by_xpath('//button[text()="Apply"]').click()
            time.sleep(5)

            # 点击选择 slot 页
            print("点击选择 slot 页")
            browser.find_element_by_id("slot").click()
            # browser.maximize_window()
            time.sleep(5)

            print("点击下载")
            browser.find_element_by_id("export").click()
            time.sleep(2)

            # 扫描文件夹,获取 Solo 的文件列表
            print("正在扫描下载的 csv 文件---------")
            fileList = common.scan_File(path, prefix)
            common.mkdir(path)
            # 读取下载的文件,删除无用数据并 重新保存
            print("正在剔除其他日期的数据 并另存为 excel ---------")
            common.turnToXls_ByPandas(path + fileList[0], path + excelName,
                                      'Solo')
            common.remove_File(path, prefix)
            print("  excel保存成功,路径:" + path + "-----------")
            print("||||||||||||Solo抓取完毕||||||||||||||")
            flag = True
        except:
            flag = False
            print(traceback.format_exc())
            errorInfo = traceback.format_exc()
            comm_logging.myLogger.write_logger(errorInfo)
            continue
        finally:
            browser.quit()
            if (flag or try_num == 3):
                break