def clean_salary(): salary_info_z = [] sql = "select job_id,salary from zhilian_job where salary_code is null" conn = hu_utils.open_line_db() salary_infos = hu_utils.select_one(conn, sql) print(len(salary_infos)) for salary_info in salary_infos: # print(salary_info) if salary_info["salary"] == "薪资面议" or salary_info["salary"] == "校招": salary_info["salary_code"] = -1 salary_info_z.append(salary_info) continue if salary_info["salary"] == "1K以下": salary_info["salary_code"] = 105 salary_info_z.append(salary_info) continue salary = salary_info["salary"].replace("K", "").split("-") salary_mid = (float(salary[0]) + float(salary[1])) // 2 # print(salary_mid) if salary_info["salary"] == "1K以下" or salary_mid <= 5: salary_info["salary_code"] = 105 salary_info_z.append(salary_info) continue if salary_mid <= 10: salary_info["salary_code"] = 510 salary_info_z.append(salary_info) continue if salary_mid <= 15: salary_info["salary_code"] = 1015 salary_info_z.append(salary_info) continue if salary_mid <= 20: salary_info["salary_code"] = 1520 salary_info_z.append(salary_info) continue if salary_mid <= 25: salary_info["salary_code"] = 2025 salary_info_z.append(salary_info) continue if salary_mid <= 30: salary_info["salary_code"] = 2530 salary_info_z.append(salary_info) continue if salary_mid <= 35: salary_info["salary_code"] = 3035 salary_info_z.append(salary_info) continue if salary_mid <= 40: salary_info["salary_code"] = 3540 salary_info_z.append(salary_info) continue else: salary_info["salary_code"] = 4099 salary_info_z.append(salary_info) continue # print(salary_info["salary_code"]) # print("--------------") # pprint(salary_info_z) conn = hu_utils.open_line_db() hu_utils.insert_update_many(conn, salary_info_z, "zhilian_job")
def main(): try: browser = get_chrome(True) # 获取chrome browser.maximize_window() # 全屏浏览器界面 url = "https://www.qimai.cn/weixin" browser.get(url) browser.implicitly_wait(10) wait = WebDriverWait(browser, 10) wait.until(EC.presence_of_all_elements_located(("xpath", "//p[@class='medium-txt']"))) trs = browser.find_elements("xpath", "//tr[@class='ivu-table-row']") # 100条信息 public_accounts_infos = [] for tr in trs: public_accounts_info = {} public_accounts_name = tr.find_element("xpath", "td//p[@class='medium-txt']").text et_name = tr.find_element("xpath", "td[3]/div/span").text strength_value = tr.find_element("xpath", "td[4]/div/span").text public_accounts_info["name"] = public_accounts_name public_accounts_info["et_name"] = et_name public_accounts_info["strength_value"] = strength_value print(public_accounts_info) public_accounts_infos.append(public_accounts_info) finally: logger.info("关闭浏览器") browser.close() conn = hu_utils.open_local_db(db="app_info") hu_utils.insert_update_many(conn, public_accounts_infos, "public_accounts_info")
def clean_jobname(): job_name_infos = [] sql = "select job_id,jobname from zhilian_job" conn = hu_utils.open_line_db() jobname_infos = hu_utils.select_one(conn, sql) print(jobname_infos) for jobname_info in jobname_infos: jobname_info["jobname"] = jobname_info["jobname"].lower() job_name_infos.append(jobname_info) conn = hu_utils.open_line_db() hu_utils.insert_update_many(conn, job_name_infos, "zhilian_job")