def test_Adview(): print("............Adview................") flag = False for try_num in range(5): print("Adview第" + str(try_num + 1) + "次尝试-----------") try: # adview取前天 yesterday = common.getNowTime(-2) yyyy = yesterday[0:4] mm = yesterday[5:7] dd = yesterday[8:10] # beforeYerter = common.getNowTime(-2) # yyyyBefore = beforeYerter[0:4] # mmBefore = beforeYerter[5:7] # ddBefore = beforeYerter[8:10] nowTime = common.getNowTime() yearNow = nowTime[0:4] monthNow = nowTime[5:7] dayNow = nowTime[8:10] # match_Str 为要保留的日期,但是pandas的日期格式为 yyyy-mm-dd match_Str = yyyy + "-" + mm + "-" + dd # match_Str_before = yyyyBefore + "-" + mmBefore + "-" + ddBefore # Adview下载的文件名的前缀 prefix = "AdView_" + yearNow + monthNow + dayNow # 路径只取nowTime的日期部分 path = common.getDirPath(nowTime) excelName = common.getExcelName(nowTime, "Adview") chrome_options = webdriver.ChromeOptions() # 使用headless无界面浏览器模式 # chrome_options.add_argument('--headless') # chrome_options.add_argument('--disable-gpu') chrome_options.add_argument( 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36') # 修改默认下载地址 prefs = {'profile.default_content_settings.popups': 0, 'download.default_directory': path} chrome_options.add_experimental_option('prefs', prefs) # 启动浏览器,获取网页源代码 browser = webdriver.Chrome(chrome_options=chrome_options) loginURL = "http://www.adview.cn/web/overseas/login" dataURL = "http://www.adview.cn/user/bid/income" browser.get(loginURL) time.sleep(5) print("输入用户名密码---------------------------------") browser.find_element_by_xpath('//span[text()="Publishers"]').click() username,password = common_mysql.selectFromTb("Adview") browser.find_element_by_id("email").send_keys(username) browser.find_element_by_id("pwd").send_keys(password) ctypes.windll.user32.MessageBoxA(0, u"点击确定后,请在15秒内输入验证码,不要点击登录!!!".encode('gb2312'), u' 信息'.encode('gb2312'), 0) print("请输入验证码,等待 15秒") for i in range(1,15): print("倒计时:",15-i) time.sleep(1) print("点击登录按钮------") browser.find_element_by_xpath('//button[@class="form-control btn btn-blue blue submitBtn"]').click() time.sleep(5) print("跳转到数据页面---------------------------------") browser.get(dataURL) time.sleep(5) try: browser.find_element_by_xpath('//a[text()="English"]').click() except: flag = False print(traceback.format_exc()) errorInfo = traceback.format_exc() comm_logging.myLogger.write_logger(errorInfo) time.sleep(2) print("点击下载") browser.find_element_by_xpath('//input[@value="Export detail CSV"]').click() time.sleep(2) # 扫描文件夹,获取 Adview 的文件列表 print("正在扫描下载的 csv 文件---------") fileList = common.scan_File(path, prefix) common.mkdir(path) # 读取下载的文件,删除无用数据并 重新保存 print("正在剔除其他日期的数据 并另存为 excel ---------") common.turnToXls_ByPandas(path + fileList[-1], path + excelName, 'Adview', match_Str) common.remove_File(path, prefix) print(" excel保存成功,路径:" + path + "-----------") print("||||||||||||Adview抓取完毕||||||||||||||") flag = True except: flag = False print(traceback.format_exc()) errorInfo = traceback.format_exc() comm_logging.myLogger.write_logger(errorInfo) continue finally: browser.quit() if (flag or try_num == 5): break
def test_Pubnative(): print("............Pubnative................") flag = False for try_num in range(3): print("Pubnative第" + str(try_num + 1) + "次尝试-----------") try: yesterday = common.getNowTime(-1) yyyy = yesterday[0:4] mm = yesterday[5:7] dd = yesterday[8:10] # downloadFile = "C:/Users/Administrator/Downloads" + "/" + "Publisher App_" + dd + "." + mm + "." + yyyy + ".csv" # 下载的文件名 downloadFile = "Publisher App_" + dd + "." + mm + "." + yyyy + "-" + dd + "." + mm + "." + yyyy + ".csv" # 用于删除 匹配的字符串 prefix = "Publisher App_" + dd + "." + mm + "." + yyyy + "-" + dd + "." + mm + "." + yyyy # print(downloadFile) nowTime = common.getNowTime() # 路径只取nowTime的日期部分 path = common.getDirPath(nowTime) # print(path) excelName = common.getExcelName(nowTime, "Pubnative") chrome_options = webdriver.ChromeOptions() # 使用headless无界面浏览器模式 # chrome_options.add_argument('--headless') # chrome_options.add_argument('--disable-gpu') chrome_options.add_argument( 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36' ) # 修改默认下载地址 prefs = { 'profile.default_content_settings.popups': 0, 'download.default_directory': path } chrome_options.add_experimental_option('prefs', prefs) # 启动浏览器,获取网页源代码 browser = webdriver.Chrome(chrome_options=chrome_options) loginURL = "https://dashboard.pubnative.net" dataURL = "https://dashboard.pubnative.net/partner/#!/api" browser.get(loginURL) time.sleep(5) print("输入用户名密码---------------------------------") from public import common_mysql username, password = common_mysql.selectFromTb("Pubnative") browser.find_element_by_id("email").send_keys(username) browser.find_element_by_id("password").send_keys(password) # browser.find_element_by_xpath('//form[@action="/sessions"]/div[0]/div/input').send_keys("*****@*****.**") # browser.find_element_by_xpath('//form[@action="/sessions"]/div[1]/div/input').send_keys("360Security2017666") time.sleep(2) browser.find_element_by_xpath('//input[@value="LOGIN"]').click() print("跳转到数据页面---------------------------------") browser.get(dataURL) time.sleep(15) print("自动选择数据维度内容---------------------------------") browser.find_element_by_xpath('//div[@class="col-md-7"]').click() browser.find_element_by_xpath( '//div[@class="daterange"]/ul/li[2]').click() s = browser.find_element_by_tag_name("select") # Select(s).select_by_value("0") Select(s).select_by_index(12) browser.find_element_by_xpath( '//button[@class="btn btn-secondary ng-star-inserted"]').click( ) time.sleep(2) browser.find_element_by_xpath( '//div[@class="reports-filters row"]/div[3]').click() print("准备下载...") time.sleep(2) dimensionsOfdata = [ "Publisher App", "Impressions", "Requests", "Fill Rate", "eCPM", "Clicks", "CTR", "Conversions", "Payout" ] # workbook = xlwt.Workbook(encoding='utf-8') # worksheet = workbook.add_sheet('Pubnative') # print("开始爬数据") # soup = BeautifulSoup(browser.page_source, "lxml") # tbody = soup.find("tbody") # time.sleep(2) # trSum = tbody.findAll("tr") # time.sleep(2) # # 加表头 # col = 0 # for di in dimensionsOfdata: # worksheet.write(0, col, dimensionsOfdata[col]) # col = col + 1 # # row = 1 # for tr in trSum[3:]: # # col 放在for 外面,会引起错误ValueError: column index (256) not an int in range(256) # col = 0 # # tdSum 为一个tr的所有列的list # tdSum = tr.findAll("td") # for td in tdSum: # worksheet.write(row, col, td.text.strip()) # col = col + 1 # row = row + 1 print("点击下载") browser.find_element_by_xpath( '//div[@class="col-md order-disabled optional-buttons"]/a[2]' ).click() time.sleep(2) common.mkdir(path) # workbook.save(path + excelName) print("读取 csv 另存为 excel") common.turnToXls_ByPandas(path + downloadFile, path + excelName, 'Pubnative') print("删除多余的 csv") common.remove_File(path, prefix) print(" excel保存成功,路径:" + path + "-----------") print("||||||||||||Pubnative抓取完毕||||||||||||||") flag = True except: flag = False print(traceback.format_exc()) errorInfo = traceback.format_exc() comm_logging.myLogger.write_logger(errorInfo) continue finally: browser.quit() if (flag or try_num == 3): break
def test_NewCM(): print("............NewCM................") flag = False for try_num in range(5): print("NewCM第" + str(try_num + 1) + "次尝试-----------") try: yesterday = common.getNowTime(-1) yyyy = yesterday[0:4] mm = yesterday[5:7] dd = yesterday[8:10] nowTime = common.getNowTime() yearNow = nowTime[0:4] monthNow = nowTime[5:7] dayNow = nowTime[8:10] # NewCM下载的文件名的前缀 prefix = "Brand+Details_" + yearNow + monthNow + dayNow # 路径只取nowTime的日期部分 path = common.getDirPath(nowTime) excelName = common.getExcelName(nowTime, "NewCM") chrome_options = webdriver.ChromeOptions() # 使用headless无界面浏览器模式 # chrome_options.add_argument('--headless') # chrome_options.add_argument('--disable-gpu') chrome_options.add_argument( 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36' ) # 修改默认下载地址 prefs = { 'profile.default_content_settings.popups': 0, 'download.default_directory': path } chrome_options.add_experimental_option('prefs', prefs) # 启动浏览器,获取网页源代码 browser = webdriver.Chrome(chrome_options=chrome_options) loginURL = "https://peg.cmcm.com/login" dataURL = "https://peg.cmcm.com/frontReport/default" browser.get(loginURL) time.sleep(5) print("输入用户名密码---------------------------------") username, password = common_mysql.selectFromTb("NewCM") browser.find_element_by_name("username").send_keys(username) browser.find_element_by_name("password").send_keys(password) ctypes.windll.user32.MessageBoxA( 0, u"点击确定后,请在15秒内输入验证码,不要点击登录!!!".encode('gb2312'), u' 信息'.encode('gb2312'), 0) print("请输入验证码,等待 15秒") for i in range(1, 15): print("倒计时:", 15 - i) time.sleep(1) print("点击登录按钮------") browser.find_element_by_xpath('//button[@type="submit"]').click() time.sleep(5) print("跳转到数据页面---------------------------------") browser.get(dataURL) time.sleep(5) # 选择 AD unit print("选择 AD unit") browser.find_element_by_xpath( '//button[text()="Dimension & Metric"]').click() browser.find_element_by_xpath('//span[text()="Ad Unit"]').click() time.sleep(2) browser.find_element_by_xpath('//button[text()="Submit"]').click() time.sleep(2) # 选择日期 print("选择日期") browser.find_element_by_xpath( '//button[@class="btn btn-default ng-binding"]').click() browser.find_element_by_xpath('//span[text()="Yesterday"]').click() time.sleep(2) browser.find_element_by_xpath('//button[text()="Submit"]').click() time.sleep(2) # 点击search print("# 点击search") browser.find_element_by_xpath('//input[@value="Search"]').click() time.sleep(5) print("点击下载") browser.find_element_by_xpath( '//a[@class="btn btn-outline"]').click() time.sleep(5) # 扫描文件夹,获取 NewCM 的文件列表 print("正在扫描下载的 csv 文件---------") fileList = common.scan_File(path, prefix) common.mkdir(path) # 读取下载的文件,删除无用数据并 重新保存 print("正在剔除其他日期的数据 并另存为 excel ---------") common.turnToXls_ByPandas(path + fileList[-1], path + excelName, 'NewCM') common.remove_File(path, prefix) print(" excel保存成功,路径:" + path + "-----------") print("||||||||||||NewCM抓取完毕||||||||||||||") flag = True except: flag = False print(traceback.format_exc()) errorInfo = traceback.format_exc() comm_logging.myLogger.write_logger(errorInfo) continue finally: browser.quit() if (flag or try_num == 5): break
def test_OpenX(): print("............OpenX................") nowTime = common.getNowTime() yearNow = nowTime[0:4] monthNow = nowTime[5:7] dayNow = nowTime[8:10] # 路径只取nowTime的日期部分 path = common.getDirPath(nowTime) # 用于删除 匹配的字符串 prefix = "Workspace 1-" + monthNow + "-" + dayNow + "-" + yearNow excelName = common.getExcelName(nowTime, "openx") # 尝试5次,错误继续 flag = False for try_num in range(2): print("OpenX第" + str(try_num + 1) + "次尝试-----------") try: print("打开浏览器") chrome_options = webdriver.ChromeOptions() # 使用headless无界面浏览器模式 # chrome_options.add_argument('--headless') # chrome_options.add_argument('--disable-gpu') chrome_options.add_argument( 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36' ) prefs = { 'profile.default_content_settings.popups': 0, 'download.default_directory': path } chrome_options.add_experimental_option('prefs', prefs) # 启动浏览器,获取网页源代码 browser = webdriver.Chrome(chrome_options=chrome_options) # 不最大化 某些按钮按不到会报错 browser.maximize_window() time.sleep(2) logInURL = "https://sso.openx.com/login/login" dataURL = "http://mobimagic-ui.openx.net/app.html#/reports/pie" # 前天 yesterday = common.getNowTime(-2) yyyy = yesterday[0:4] mm = yesterday[5:7] dd = yesterday[8:10] # print(yyyy,mm,dd) print("打开登录网址") browser.get(logInURL) time.sleep(5) # try 是正常不需要验证码登录,如果try里面失败了,相当于登录失败,需要验证码,然后就跑到了except中,按照需要验证码的方式进行登录。 username, password = common_mysql.selectFromTb("OpenX") try: browser.find_element_by_id("email").send_keys(username) time.sleep(3) browser.find_element_by_id('password').send_keys(password) time.sleep(2) browser.find_element_by_id("submit").click() time.sleep(5) browser.find_element_by_xpath( '//a[text()="http:// mobimagic-ui.openx.net/"]').click() except: print(traceback.format_exc()) errorInfo = traceback.format_exc() comm_logging.myLogger.write_logger(errorInfo) # 不能再重新载入界面,重新载入后不会出现验证码,然后继续失败 # browser.get(logInURL) time.sleep(2) browser.find_element_by_id("email").clear() browser.find_element_by_id("email").send_keys(username) time.sleep(3) browser.find_element_by_id('password').clear() browser.find_element_by_id('password').send_keys(password) ctypes.windll.user32.MessageBoxA( 0, u"点击确定后,请在15秒内输入验证码,不要点击登录!!!".encode('gb2312'), u' 信息'.encode('gb2312'), 0) print("请输入验证码,等待 15秒") for i in range(1, 15): print("倒计时:", 15 - i) time.sleep(1) browser.find_element_by_id("submit").click() print("模拟登录成功") print("打开数据网址") browser.get(dataURL) time.sleep(10) browser.find_element_by_xpath( '//div[@class="date-range-filter__input"]').click() browser.find_element_by_xpath( '//li[@class="date-range-filter__filter date-range-filter__filter--custom date-range-filter__filter__label"]' ).click() time.sleep(5) # startElements = browser.find_element_by_xpath('//input[@placeholder="Enter Date"]').send_keys(mm + "/" + dd + "/" + yyyy) # 各种方法试了一通,根本不行,下面可以list[0] 是开始日期,list[1]是结束日期 print("开始选择日期和维度") inputElements = browser.find_elements_by_css_selector( '[placeholder="Enter Date"]') inputElements[0].clear() inputElements[0].send_keys(mm + "/" + dd + "/" + yyyy) inputElements[1].clear() inputElements[1].send_keys(mm + "/" + dd + "/" + yyyy) browser.find_element_by_xpath( '//div[@class="date-range-filter__custom-date-range-menu__buttons"]/div/button[2]' ).click() time.sleep(5) # 隐藏日期栏、折线图 不隐藏点不到下载按钮 browser.find_element_by_xpath( '//div[@class="reports-pie-data-collection-container__collapse-trigger"]' ).click() time.sleep(1) browser.find_element_by_xpath( '//div[@class="reports-pie-collapse-toggle"]').click() time.sleep(1) # 点击下载 选择xlsx browser.find_element_by_xpath( '//div[@class="reports-pie-chart-toolkit__export"]/ox-dropdown' ).click() time.sleep(1) # browser.find_element_by_xpath('//button[@class="ox-btn ox-btn--tertiary"]/div').click() browser.find_element_by_xpath('//span[text()="Excel "]').click() time.sleep(10) common.mkdir(path) # 扫描文件夹,获取 openx 的文件列表 print("正在扫描下载的 xlsx 文件---------") fileList = common.scan_File(path, prefix) common.turnToXls_ByPandas(path + fileList[-1], path + excelName, 'openx') common.remove_File(path, prefix) print(" excel保存成功,路径:" + path + "-----------") print("||||||||||||OpenX抓取完毕||||||||||||||") flag = True except Exception as e: flag = False print(traceback.format_exc()) errorInfo = traceback.format_exc() comm_logging.myLogger.write_logger(errorInfo) continue finally: browser.quit() if (flag or try_num == 4): break
def test_Solo(): print("............Solo................") flag = False for try_num in range(3): print("Solo第" + str(try_num + 1) + "次尝试-----------") try: dayBeforeYester = common.getNowTime(-2) yyyy = dayBeforeYester[0:4] mm = dayBeforeYester[5:7] dd = dayBeforeYester[8:10] nowTime = common.getNowTime() yearNow = nowTime[0:4] monthNow = nowTime[5:7] dayNow = nowTime[8:10] # Solo下载的文件名的前缀 prefix = "ReportTable" # 路径只取nowTime的日期部分 path = common.getDirPath(nowTime) excelName = common.getExcelName(nowTime, "Solo") chrome_options = webdriver.ChromeOptions() # 使用headless无界面浏览器模式 # chrome_options.add_argument('--headless') # chrome_options.add_argument('--disable-gpu') chrome_options.add_argument( 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36' ) # 修改默认下载地址 prefs = { 'profile.default_content_settings.popups': 0, 'download.default_directory': path } chrome_options.add_experimental_option('prefs', prefs) # 启动浏览器,获取网页源代码 browser = webdriver.Chrome(chrome_options=chrome_options) loginURL = "https://portal.newborntown.com/logout" dataURL = "https://portal.newborntown.com/report" browser.get(loginURL) time.sleep(5) print("输入用户名密码---------------------------------") username, password = common_mysql.selectFromTb("Solo") browser.find_element_by_name("email").send_keys(username) browser.find_element_by_name("password").send_keys(password) ctypes.windll.user32.MessageBoxA( 0, u"点击确定后,请在15秒内输入验证码,不要点击登录!!!".encode('gb2312'), u' 信息'.encode('gb2312'), 0) print("请输入验证码,等待 15秒") for i in range(1, 15): print("倒计时:", 15 - i) time.sleep(1) print("点击登录按钮------") browser.find_element_by_xpath('//button[@id="btn_login"]').click() time.sleep(5) print("跳转到数据页面---------------------------------") browser.get(dataURL) time.sleep(5) # 选择stats页 print("选择stats页") browser.find_element_by_xpath('//h4[text()="Stats"]').click() time.sleep(5) # 开始选择日期 print("开始选择日期") browser.find_element_by_id("reportrange").click() time.sleep(2) browser.find_element_by_xpath( '//li[text()="Custom Range"]').click() time.sleep(2) browser.find_element_by_name("daterangepicker_start").clear() browser.find_element_by_name("daterangepicker_start").send_keys( mm + "/" + dd + "/" + yyyy) time.sleep(2) browser.find_element_by_name("daterangepicker_end").clear() browser.find_element_by_name("daterangepicker_end").send_keys(mm + "/" + dd + "/" + yyyy) time.sleep(2) browser.find_element_by_xpath('//button[text()="Apply"]').click() time.sleep(5) # 点击选择 slot 页 print("点击选择 slot 页") browser.find_element_by_id("slot").click() # browser.maximize_window() time.sleep(5) print("点击下载") browser.find_element_by_id("export").click() time.sleep(2) # 扫描文件夹,获取 Solo 的文件列表 print("正在扫描下载的 csv 文件---------") fileList = common.scan_File(path, prefix) common.mkdir(path) # 读取下载的文件,删除无用数据并 重新保存 print("正在剔除其他日期的数据 并另存为 excel ---------") common.turnToXls_ByPandas(path + fileList[0], path + excelName, 'Solo') common.remove_File(path, prefix) print(" excel保存成功,路径:" + path + "-----------") print("||||||||||||Solo抓取完毕||||||||||||||") flag = True except: flag = False print(traceback.format_exc()) errorInfo = traceback.format_exc() comm_logging.myLogger.write_logger(errorInfo) continue finally: browser.quit() if (flag or try_num == 3): break