web = Edge(executable_path="M.exe") html = web.get(url) time.sleep(1) web.find_element_by_xpath( '//*[@id="J_filter"]/div[1]/div[1]/a[2]').click() # 点击销量排序 # web.find_elements_by_class_name('fs-tit').click time.sleep(1) # for i in range(1000,20000,1000): # js = "var q=document.documentElement.scrollTop={}".format(i) # web.execute_script(js) # time.sleep(1) # # web.execute_script("document.documentElement.scrollTop=0") goods = web.find_elements_by_class_name('gl-warp > li') j = 1 for i in goods: results = {} results['ID'] = j results['书名'] = i.find_element_by_xpath('.//div[@class="p-name"]//em').text time.sleep(1) results['价格'] = i.find_element_by_xpath( './/div[@class="p-price"]//i').text + "¥" time.sleep(1) results['出版社'] = i.find_element_by_xpath( './/div[@class="p-shopnum"]//a').text table = 'books' keys = ', '.join(results.keys())
from selenium.webdriver import Edge import time from bs4 import BeautifulSoup driver = Edge( 'C:\\Users\\cakarst\\IdeaProjects\\Documentation Web Scrapping\\msedgedriver.exe' ) driver.get( 'https://docs.microsoft.com//en-us//sql//t-sql//data-types//date-transact-sql?view=sql-server-ver15' ) left_nav_buttons = driver.find_elements_by_class_name('tree-item') def click_buttons(buttonList): for x in range(len(buttonList)): if buttonList[x].is_displayed() and buttonList[x].get_attribute( 'aria-expanded') == "false": driver.execute_script( "arguments[0].click();", buttonList[x].find_element_by_class_name('tree-expander')) child_list = buttonList[x].find_elements_by_class_name('tree-item') click_buttons(child_list) time.sleep(.01) ####################################################### click_buttons(left_nav_buttons) bs = BeautifulSoup(driver.page_source, 'html.parser') for link in bs.find_all(class_="tree-item is-leaf"):