def VisitGoodsPage(mongo_collection, driver, key, brand): # 初始化各个变量 url = brand['original_url'] try: driver.get(url) except WebDriverException as e: time.sleep(10) Logger.info('Error!' + str(e)) driver.quit() driver = loginTmall.login_tmall() driver.get(url) time.sleep(random.uniform(2, 4)) time.sleep(random.uniform(0.5, 1)) print('准备访问商品页面') print('商品详细信息') time.sleep(random.uniform(2, 4)) driver.execute_script("scrollTo(0,1000)") time.sleep(random.uniform(1, 2)) driver.execute_script("scrollTo(0,5000)") time.sleep(random.uniform(1, 2)) driver.execute_script("scrollTo(0,10000)") time.sleep(random.uniform(1, 2)) # driver.execute_script("scrollTo(0,30000)") bs_obj = BeautifulSoup(driver.page_source, 'lxml') dealWith(mongo_collection, bs_obj, key, brand) print("done..")
class Login(object): logger = Logger().logger driver = None username = Location("用户名输入框", "input[type=text]") passwd = Location("密码输入框", "input[type=password]") submit = Location("登录按钮", "button[type=submit]") img = Location("后台管理系统果壳logo", '//*[@id="root"]/div/section/header/div[1]/div/span[1]/img', "XPATH") def __init__(self): # 启动driver try: self.driver = ChromeDriver() self.driver.maximize_window() self.driver.get(Config.url) self.driver.set_page_load_timeout(Config.TIMEOUT) self.driver.set_script_timeout(10) except Exception as e: Log.error("driver初始化失败....\n系统信息: {} \n浏览器类型: {}\n详细信息: {}".format( Config.system, Config.BROWSER, str(e))) if self.driver: self.driver.quit() raise Exception(e) def login(self): # 屏蔽登录部分 self.driver.send(self.username, Config.USER) self.driver.send(self.passwd, Config.PWD) self.driver.click(self.submit) assert self.driver.exists(self.img), "登录失败, 未找到大后台左上角logo" assert self.driver.title == "GOOCKR CHARGING", \ "打开GOOCKR CHARGING失败, 浏览器title不为'GOOCKR CHARGING',可能未进入GOOCKR CHARGING首页" return self.driver
class Login(object): logger = Logger().logger driver = None # username = Location("用户名输入框", "input[type=text]") # passwd = Location("密码输入框", "input[type=password]") # submit = Location("登录按钮", ".btn-login") # logo = Location("大后台管理系统图标", "#leftpanel .img") def __init__(self): # 启动driver try: self.driver = ChromeDriver() self.driver.maximize_window() self.driver.get(Config.url) self.driver.set_page_load_timeout(Config.TIMEOUT) self.driver.set_script_timeout(10) except Exception as e: Log.error("driver初始化失败....\n系统信息: {} \n浏览器类型: {}\n详细信息: {}".format( Config.system, Config.BROWSER, str(e))) if self.driver: self.driver.quit() raise Exception(e) def login(self): # 屏蔽登录部分 # self.driver.send(self.username, Config.USER) # self.driver.send(self.passwd, Config.PWD) # self.driver.click(self.submit) # assert self.driver.exists(self.logo), "登录失败, 未找到大后台左上角logo" assert self.driver.title == "微软 Bing 搜索 - 国内版", \ "打开bing失败, 浏览器title不为'微软 Bing 搜索 - 国内版',可能未进入bing首页" return self.driver
auc = _eval(sess, model, test_len, fd, auc_op) logger.get_log().info( 'Epoch %d DONE\tCost time: %.2f Train_loss: %.5f\tTest_loss: %.5f\tEval_AUC: %.5f' % (i, time.time() - start_time, loss_sum / batch_per_epoch, auc['loss'], auc['auc'])) start_time = time.time() sys.stdout.flush() print('*' * 16) print('best test_auc:', best_auc) res = best_auc best_auc = 0.0 print('*' * 16) sys.stdout.flush() return res if __name__ == '__main__': logger = Logger('fm_auc_diff') sample = '../Data/adult_uci/raw_adult_sample.tfrecord' config = '../Config/feature_adult.yaml' fdtmp, idxtmp, train_lentmp, test_lentmp = input_fn(sample, config, rand_col=None) auc_value = train_epochs(fdtmp, idxtmp, train_lentmp, test_lentmp) print('auc: ', auc_value)
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2020-07-13 20:35 # @Author : 刘开 import os import xlrd from Tools.logger import Logger logger = Logger(logger='execl').getlog() class ExeclReader(): def __init__(self, fielname, sheet): self.filename = fielname self.sheet = sheet def get_execl_name(self): #路径参数化 base_dir = str(os.path.dirname(os.path.dirname(__file__))) #返回路径名称 base_dir = base_dir.replace('\\', '/') # 拼凑文件路径 file_paht = base_dir + '/Data/' + self.filename logger.info('开始查找文件') #读取execl execl = xlrd.open_workbook(file_paht) #获取sheet页 worksheet = execl.sheet_by_name(self.sheet) logger.info('通过sheet名称读取文件内容') # 读表头
text_logger, MODELS_FOLDER, SUMMARIES_FOLDER = save_context( __file__, KEY_ARGUMENTS) torch.manual_seed(1234) torch.cuda.manual_seed(1235) np.random.seed(1236) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = True device = torch.device("cuda" if torch.cuda.is_available() else "cpu") FLAGS.device = device nlabels = FLAGS.y_dist.dim batch_size = FLAGS.training.batch_size checkpoint_io = CheckpointIO(checkpoint_dir=MODELS_FOLDER) logger = Logger(log_dir=SUMMARIES_FOLDER) itr = inputs.get_data_iter(batch_size) GNet, GOptim = inputs.get_generator_optimizer() DNet, DOptim = inputs.get_discriminator_optimizer() GNet_test = copy.deepcopy(GNet) update_average(GNet_test, GNet, 0.0) ydist = get_ydist(**vars(FLAGS.y_dist)) zdist = get_zdist(**vars(FLAGS.z_dist)) checkpoint_io.register_modules(GNet=GNet, GOptim=GOptim, DNet=DNet, DOptim=DOptim) checkpoint_io.register_modules(GNet_test=GNet_test)
dataBase = mongo_conn['db_ysld'] # Database key_list = [] for cat in urls_collections_config.keys(): key_list.append(cat) for key in key_list: collection_name = urls_collections_config[key][1] collection = dataBase[collection_name] # collection name = urls_collections_config[key][3] url = urls_collections_config[key][0] category_id = urls_collections_config[key][2] category = key brand = {} brand['category_id'] = category_id brand['id'] = '' brand['original_url'] = url brand['name'] = name brand['category'] = category brand['store_id'] = 1 Logger.info(name) Logger.info(url) VisitGoodsPage(collection, driver, key, brand) time.sleep(random.uniform(8, 15)) driver.quit() mongo_conn.close() output.close()
import base64 from functools import wraps from selenium.common.exceptions import ElementNotVisibleException from selenium.webdriver.common.by import By from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from Tools.logger import Logger from Tools.web_tool import Tools from config import Config Log = Logger().logger def wait(func): @wraps(func) def wrapper(*args, **kwargs): Log.info("当前Page: {} 操作: {} 控件名: {}".format( args[1].file, func.__name__, "->".join([str(x) for x in args[1:]]))) try: WebDriverWait(args[0], Config.TIMEOUT).until( EC.element_to_be_clickable( (getattr(By, args[1].method), args[1].value))) # WebDriverWait(args[0], Config.TIMEOUT).until( # lambda x: x.find_element(getattr(By, args[1].method), args[1].value).send_keys("1") # ) except Exception: Log.error("等待元素超时! \n文件名: {}\n函数名: {}\n控件名: {}".format( args[1].file, func.__name__, args[1].name)) assert 0, "等待元素超时! \n文件名: {}\n函数名: {}\n控件名: {}".format(
# @Time : 2020-07-13 21:46 # @Author : 刘开 import ast from Tools.logger import Logger import requests import json from Tools.config import Config from Tools.execlReader import ExeclReader from Tools.config import Config from Tools.TimeGlobal import * config = Config() print(year_time()) cookies = ast.literal_eval(config.get_value('cookie.conf', 'cookies', 'cookie')) print(type(cookies)) loger = Logger('SendRequest').getlog() class SendRequest(): # 去掉初始化 @staticmethod def request_api(host, url, method, data, cookie): test_url = host + url # 做一个没加http://的判断 if not test_url.startswith('http://'): test_url = 'http://' + test_url # 封装请求方法 try: if method == 'GET':
class ChromeDriver(base()): logger = Logger().logger def __init__(self): if Config.SYS == "linux": Browser.set_browser() # 创建虚拟桌面 display = Display(visible=0, size=(1920, 1080)) display.start() super(ChromeDriver, self).__init__() else: # 无头模式 # op = chrome_op() # op.add_argument("--headless") if getattr(Config, "DRIVER_PATH", None) is None: Browser.set_browser() super(ChromeDriver, self).__init__(executable_path=getattr(Config, "DRIVER_PATH", None)) # 更多方法查看根目录Selenium.jpg def get_element(self, ele): """ 获取element :param ele: :return: """ return self.find_element(by=getattr(By, ele.method), value=ele.value) def get_elements(self, ele): """ 获取elements :param ele: :return: """ return self.find_elements(by=getattr(By, ele.method), value=ele.value) @wait def send(self, ele, text): """ 向输入框输入文本 :param ele: :param text: :return: """ self.get_element(ele).send_keys(text) @wait def bounds(self, ele): """ 获取元素bounds :param ele: :return: """ element = self.get_element(ele) location = element.location size = element.size bounds = (location["x"], location["y"], location["x"] + size["width"], location["y"] + size["height"]) return bounds @wait def select(self, ele, text): """ 选择下拉框内文本 :param ele: :param text: :return: """ select = Select(self.get_element(ele)) select.select_by_visible_text(text) @wait def click(self, ele): """ 点击元素 :param ele: :return: """ time.sleep(1) return self.get_element(ele).click() @wait def clear(self, ele): """ 清除输入框内容 :param ele: :return: """ return self.get_element(ele).clear() @wait def click_text(self, ele_list, name): """ 点击文本(如导航条和菜单) :param ele_list: :param name: :return: """ elements = self.get_elements(ele_list) for e in elements: if e.text == name: e.click() break else: assert 0, "未找到{}中的文本: {}".format(ele_list.name, name) def switch_handle(self): """ 切换到新窗口 :return: """ handles = self.window_handles for hand in handles: if hand != self.current_window_handle: self.switch_to.window(hand) else: self.logger.info("没有可用的新窗口!") def switch_frame(self, ele): """ 进入iframe(可传webelement元素) :param ele: :return: """ frame = self.get_element(ele) self.switch_to.frame(frame) def switch_back(self): """ 退出iframe, 返回主层 :return: """ self.switch_to.default_content() def alert_confirm(self): """ 在弹出的alert窗口中选择确认 :return: """ self.switch_to.alert().accept() def alert_refuse(self): """ 在弹出的alert窗口中选择取消 :return: """ self.switch_to.alert().dismiss() @wait def get_text(self, ele): """ 获取html标签的文本 :param ele: :return: """ return self.get_element(ele).text @wait def attr(self, ele, attribute): """ 获取html标签的属性 :param ele: :param attribute: :return: """ return self.get_element(ele).get_attribute(attribute) def set_attr(self, ele, attribute, value): """ 设置html标签的属性 :param ele: :param attribute: :param value: :return: """ ele = self.get_element(ele) self.execute_script("arguments[0].{}='{}';".format(attribute, value), ele) @wait def move(self, pos): """ 移动到某个元素 :param pos: 元素定位 :return: """ positon = self.get_element(pos) # 移动到目的元素 ActionChains(self).move_to_element(positon).perform() def exists(self, ele): try: WebDriverWait(self, Config.TIMEOUT).until(EC.visibility_of_element_located( (getattr(By, ele.method), ele.value))) return True except: return False def is_show(self, ele): return self.execute_script("return arguments[0].display;", self.get_element(ele))
def VisitGoodsPage(mongo_collection, driver, key, brand): # 初始化各个变量 url = brand['original_url'] try: driver.get(url) except WebDriverException as e: time.sleep(10) Logger.info('Error!' + str(e)) driver.quit() driver = loginTmall.login_tmall() driver.get(url) time.sleep(random.uniform(2, 4)) time.sleep(random.uniform(0.5, 1)) # 判断是否跳入了验证码 current_url = driver.current_url if 'https://sec.taobao.com' in current_url: collection_name = mongo_collection.name writeToCsv(url, brand, collection_name) time.sleep(random.uniform(2, 4)) return # 判断是否没有商品 if isNoItem(driver): return max_page = getMaxPage(driver) if max_page == 0: return Logger.info('最大页数:' + str(max_page)) print('准备访问商品页面') print('商品详细信息') time.sleep(random.uniform(2, 4)) driver.execute_script("scrollTo(0,1000)") time.sleep(random.uniform(1, 2)) driver.execute_script("scrollTo(0,5000)") time.sleep(random.uniform(1, 2)) driver.execute_script("scrollTo(0,10000)") time.sleep(random.uniform(1, 2)) # driver.execute_script("scrollTo(0,30000)") bs_obj = BeautifulSoup(driver.page_source, 'lxml') dealWith(mongo_collection, bs_obj, key, brand) N = 2 while N <= int(max_page): time.sleep(2) element = WebDriverWait( driver, 60).until(lambda driver: driver.find_element_by_xpath( "//a[@class='ui-page-s-next']")) element.click() time.sleep(2) driver.execute_script("scrollTo(0,1000)") time.sleep(1) driver.execute_script("scrollTo(0,5000)") time.sleep(1) driver.execute_script("scrollTo(0,10000)") time.sleep(1) # driver.execute_script("scrollTo(0,30000)") Logger.info(driver.current_url) # driver.execute_script("scrollTo(0,30000)") bs_obj = BeautifulSoup(driver.page_source, 'lxml') dealWith(mongo_collection, bs_obj, key, brand) # time.sleep(5) current_page = getCurrentPage(driver) Logger.info('完成当前页爬取:' + str(current_page)) if int(current_page) == int(max_page): Logger.info(brand['original_name']) Logger.info(''' ######################################################################### | 最大页数爬取完毕 | ######################################################################### ''') N = int(current_page) + 1 print("done..")
cursors.close() return res if __name__ == '__main__': driver = loginTmall.login_tmall() time.sleep(1) mongo_conn = connect_mongo(mongodb_host, mongodb_port, mongodb_username, mongodb_password) dataBase = mongo_conn['power'] # Database url_list = [] with open('fail_url.txt', 'r', encoding='utf-8') as fc: for i in fc: temp = i.split('/*/') url_list.append(temp) for url_item in url_list: collection_name = url_item[0].strip() collection = dataBase[collection_name] brand_str = url_item[1].replace('ObjectId', '') brand = eval(brand_str) Logger.info(brand['original_name']) Logger.info(brand['original_url']) VisitGoodsPage(collection, driver, brand['category'], brand) time.sleep(random.uniform(5, 20)) driver.quit() mongo_conn.close() output.close()
class Suite(unittest.TestSuite): logger = Logger().logger def run(self, result, debug=False): topLevel = False if getattr(result, '_testRunEntered', False) is False: result._testRunEntered = topLevel = True for index, test in enumerate(self): retry = getattr(test, "retry", Config.RETRY) if result.shouldStop: break for i in range(1, retry + 2): if _isnotsuite(test): self._tearDownPreviousClass(test, result) self._handleModuleFixture(test, result) self._handleClassSetUp(test, result) # result._previousTestClass = test.__class__ if (getattr(test.__class__, '_classSetupFailed', False) or getattr(result, '_moduleSetUpFailed', False)): continue self.logger.info("用例: {}正在尝试第{}次运行!".format( test.__class__.__name__, i)) if not debug: test(result) else: test.debug() if i < retry + 1: # 重试判断 这段写的很丑就别细看了,欢迎优化 error, fail = None, None fail_id = [x.get("case_id") for x in result.failures] error_id = [x.get("case_id") for x in result.errors] if test.case_id in fail_id: fail = fail_id.index(test.case_id) if test.case_id in error_id: error = error_id.index(test.case_id) if error is not None and fail is not None: break elif error is not None: self.logger.warning("用例: {} 第{}次失败 原因: {}".format( test.__class__.__name__, i, str(result.errors[error]['msg']))) del result.errors[error] elif fail is not None: self.logger.warning("用例: {} 第{}次失败 原因: {}".format( test.__class__.__name__, i, str(result.failures[fail]['msg']))) del result.failures[fail] result._previousTestClass = test.__class__ continue if self._cleanup: self._removeTestAtIndex(index) if topLevel: self._tearDownPreviousClass(None, result) self._handleModuleTearDown(result) result._testRunEntered = False return result