Beispiel #1
0
options.add_argument('headless')
browser = webdriver.Chrome(options=options)
# browser = webdriver.Chrome()  # 브라우저 켠채로 실행하는 코드
browser.get(new_url)

# .tit > a로 검색시 신규페이지, 갯수는 유동적인듯 16~20개
link_list = []
elmt_list = browser.find_elements_by_css_selector('.tit > a')
for elmt in elmt_list:
    link_list.append(elmt.get_attribute('href'))

for idx, link in enumerate(link_list):
    print(f'{idx+1}번째 크롤링 시작: {link.split("/")[-1]}')
    browser.get(link)

    dto = DTO()
    dto.title = browser.find_element_by_css_selector('.tit').text
    dto.view_count = browser.find_element_by_css_selector('.read').text.split(
        ':')[-1].strip().replace(',', '')

    raw_list = browser.find_element_by_css_selector(
        '.cd-info-list').text.split('\n')

    dto.contest_field = list(
        map(lambda x: x.strip(),
            raw_list[raw_list.index('분야') + 1].replace('/', '-').split(',')))
    dto.target = raw_list[raw_list.index('응모대상') + 1]
    dto.host = raw_list[raw_list.index('주최/주관') + 1]
    dto.sponsor = None if raw_list[raw_list.index('후원/협찬') +
                                   1] == '접수기간' else raw_list[
                                       raw_list.index('후원/협찬') + 1]
Beispiel #2
0
# 데이터 빼보기
# 3. 브라우저 창 띄울 필요없음
# 4. 제목
# 5. DTO만들기

from selenium import webdriver
from dto import DTO


# 43959
def url(no):
    return f'https://www.wevity.com/?c=find&s=1&gbn=viewok&gp=1&ix={no}'


dto = DTO()

options = webdriver.ChromeOptions()
options.add_argument('headless')
browser = webdriver.Chrome(options=options)
browser.get(url(43959))

dto.title = browser.find_element_by_css_selector('.tit').text

print(dto.__dict__)
Beispiel #3
0
# 데이터 빼보기
# 3. 브라우저 창 띄울 필요없음
# 4. 제목
# 5. DTO만들기
# 6. 조회수

from selenium import webdriver
from dto import DTO


# 43959
def url(no):
    return f'https://www.wevity.com/?c=find&s=1&gbn=viewok&gp=1&ix={no}'


dto = DTO()

options = webdriver.ChromeOptions()
options.add_argument('headless')
browser = webdriver.Chrome(options=options)
browser.get(url(43959))

dto.title = browser.find_element_by_css_selector('.tit').text
dto.view_count = browser.find_element_by_css_selector('.read').text.split(
    ':')[-1].strip().replace(',', '')

print(dto.__dict__)