Esempio n. 1
0
def Parsing_list_url(URL, page_url):
    List = []
    domain = Domain_check(URL['url'])

    #만약 driver이 켜져있으면 끄고, 없으면 그냥 진행
    try:
        driver.quit()
    except:
        pass

    driver = chromedriver()
    driver = everytime.login(driver)

    #에브리타임 게시판이 사라졌을 경우 대비
    try:
        driver.get(page_url)
        driver.implicitly_wait(3)
    except:
        data = (driver, List)
        return data

    WebDriverWait(driver, 100).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "a.article")))
    html = driver.page_source
    bs = BeautifulSoup(html, 'html.parser')

    posts = bs.find("div", {"class": 'wrap articles'}).findAll("article")
    if len(posts) == 1:  #게시물이 아무것도 없는 경우
        pass
    else:
        for post in posts:
            url = post.find("a")['href']
            url = domain + url
            List.append(url)

    data = (driver, List)

    return data
Esempio n. 2
0
def everytime_all_board(URL, end_date, db):
    main_url = URL['url']
    board_search_url = "https://everytime.kr/community/search?keyword="
    board_search_word = ['게시판', '갤러리']
    board_list = []
    # driver 연결
    try:
        driver = chromedriver()
        driver = everytime.login(driver)
    except Exception as e:
        error_handler(e, URL, main_url, db)
        return
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "a.article")))
    html = driver.page_source
    bs = BeautifulSoup(html, 'html.parser')
    # 에브리타임 상단 동적 게시판 긁기=============================================================================
    board_group_list = bs.find("div", {
        "id": "submenu"
    }).findAll('div', {"class": "group"})
    for board_group in board_group_list:
        try:
            board_li_list = board_group.find("ul").findAll("li")
            for board_li in board_li_list:
                board_li_dic = {}
                board_li_dic['tag'] = board_li.find("a").text
                if board_li.find("a").text.strip() == "더 보기":
                    continue
                else:
                    board_li_dic['url'] = main_url + board_li.find("a")['href']
                if (board_li_dic['tag'].find("찾기") != -1):
                    continue
                board_list.append(board_li_dic)
        except:
            continue
    # 에브리타임 추가 동적 게시판 긁기
    for search_word in board_search_word:
        try:
            board_search_url_done = board_search_url + search_word
            driver.get(board_search_url_done)
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "a.result")))
            html = driver.page_source
            bs = BeautifulSoup(html, 'html.parser')
            board_a_list = bs.find("div", {
                "class": "searchresults"
            }).findAll('a')
            for board_a in board_a_list:
                board_li_dic = {}
                board_li_dic['tag'] = board_a.find("h3").text
                board_li_dic['url'] = main_url + board_a.get('href')
                board_list.append(board_li_dic)
        except:
            continue
    #===========================================================================================================
    # 동적 게시판들 반복문
    for board in board_list:
        page = 1
        page_flag = 0
        board_url = board['url']
        page_url = Change_page(board_url, page)  #현재 페이지 포스트 url 반환
        print("\nTarget : ", URL['info'], " :: ", board['tag'])
        continue_handler(URL['info'] + " :: " + board['tag'], URL, page_url)
        # 페이지 반복문
        while True:
            if page_flag == 50:
                page_flag = 0
                driver.quit()
                time.sleep(3)
                driver = chromedriver()
                driver = everytime.login(driver)
            try:
                print("page_url :::: ", page_url)  #현재 url 출력
                print("Page : ", page)  #현재 페이지 출력
                post_urls = Parsing_list_url(main_url, page_url, driver, db)
                # everytime 고질병 문제 고려, 재시도
                if len(post_urls) == 0:
                    time.sleep(2)
                    post_urls = Parsing_list_url(main_url, page_url, driver,
                                                 db)
                post_data_prepare = []
                # 포스트 반복문
                for post_url in post_urls:
                    get_post_data = Parsing_post_data(driver, post_url, URL,
                                                      board['tag'], db)
                    if get_post_data == "error":
                        break
                    title = get_post_data[1]
                    date = get_post_data[2]
                    print(date, "::::", title)  #현재 크롤링한 포스트의 date, title 출력
                    #게시물의 날짜가 end_date 보다 옛날 글이면 continue, 최신 글이면 append
                    if str(date) <= end_date:
                        continue
                    else:
                        post_data_prepare.append(get_post_data[0])
                add_cnt = db_manager(URL, post_data_prepare, db)
                print("add_OK : ", add_cnt)  #DB에 저장된 게시글 수 출력
                #DB에 추가된 게시글이 0 이면 break, 아니면 다음페이지
                if add_cnt == 0:
                    page_flag = 0
                    break
                else:
                    page_flag += 1
                    page += 1
                    page_url = Change_page(board_url, page)
            except Exception as e:
                error_handler(e, URL, page_url, db)
                driver.quit()
                time.sleep(3)
                driver = chromedriver()
                driver = everytime.login(driver)
                break
    #드라이버 연결 해제
    driver.quit()
Esempio n. 3
0
import sys
sys.path.append("..")

import everytime

friend_timetables = []
ses, res = everytime.login("YOUR_ID", "YOUR_PASSWORD")

friends = everytime.get_friend_list(ses)
for friend in friends:
    temp = everytime.get_timetable_user_id(ses, friend["userid"])
    friend_timetables.append(temp)
union = everytime.union_time_table(friend_timetables)
empty = everytime.empty_time_table(friend_timetables)

print(union)
print(empty)

import util

print(util.int2datetime(empty))