options.add_argument('headless') browser = webdriver.Chrome(options=options) # browser = webdriver.Chrome() # 브라우저 켠채로 실행하는 코드 browser.get(new_url) # .tit > a로 검색시 신규페이지, 갯수는 유동적인듯 16~20개 link_list = [] elmt_list = browser.find_elements_by_css_selector('.tit > a') for elmt in elmt_list: link_list.append(elmt.get_attribute('href')) for idx, link in enumerate(link_list): print(f'{idx+1}번째 크롤링 시작: {link.split("/")[-1]}') browser.get(link) dto = DTO() dto.title = browser.find_element_by_css_selector('.tit').text dto.view_count = browser.find_element_by_css_selector('.read').text.split( ':')[-1].strip().replace(',', '') raw_list = browser.find_element_by_css_selector( '.cd-info-list').text.split('\n') dto.contest_field = list( map(lambda x: x.strip(), raw_list[raw_list.index('분야') + 1].replace('/', '-').split(','))) dto.target = raw_list[raw_list.index('응모대상') + 1] dto.host = raw_list[raw_list.index('주최/주관') + 1] dto.sponsor = None if raw_list[raw_list.index('후원/협찬') + 1] == '접수기간' else raw_list[ raw_list.index('후원/협찬') + 1]
# 데이터 빼보기 # 3. 브라우저 창 띄울 필요없음 # 4. 제목 # 5. DTO만들기 from selenium import webdriver from dto import DTO # 43959 def url(no): return f'https://www.wevity.com/?c=find&s=1&gbn=viewok&gp=1&ix={no}' dto = DTO() options = webdriver.ChromeOptions() options.add_argument('headless') browser = webdriver.Chrome(options=options) browser.get(url(43959)) dto.title = browser.find_element_by_css_selector('.tit').text print(dto.__dict__)
# 데이터 빼보기 # 3. 브라우저 창 띄울 필요없음 # 4. 제목 # 5. DTO만들기 # 6. 조회수 from selenium import webdriver from dto import DTO # 43959 def url(no): return f'https://www.wevity.com/?c=find&s=1&gbn=viewok&gp=1&ix={no}' dto = DTO() options = webdriver.ChromeOptions() options.add_argument('headless') browser = webdriver.Chrome(options=options) browser.get(url(43959)) dto.title = browser.find_element_by_css_selector('.tit').text dto.view_count = browser.find_element_by_css_selector('.read').text.split( ':')[-1].strip().replace(',', '') print(dto.__dict__)