def Parsing_list_url(URL, page_url): List = [] domain = Domain_check(URL['url']) #만약 driver이 켜져있으면 끄고, 없으면 그냥 진행 try: driver.quit() except: pass driver = chromedriver() driver = everytime.login(driver) #에브리타임 게시판이 사라졌을 경우 대비 try: driver.get(page_url) driver.implicitly_wait(3) except: data = (driver, List) return data WebDriverWait(driver, 100).until( EC.presence_of_element_located((By.CSS_SELECTOR, "a.article"))) html = driver.page_source bs = BeautifulSoup(html, 'html.parser') posts = bs.find("div", {"class": 'wrap articles'}).findAll("article") if len(posts) == 1: #게시물이 아무것도 없는 경우 pass else: for post in posts: url = post.find("a")['href'] url = domain + url List.append(url) data = (driver, List) return data
def everytime_all_board(URL, end_date, db): main_url = URL['url'] board_search_url = "https://everytime.kr/community/search?keyword=" board_search_word = ['게시판', '갤러리'] board_list = [] # driver 연결 try: driver = chromedriver() driver = everytime.login(driver) except Exception as e: error_handler(e, URL, main_url, db) return WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.CSS_SELECTOR, "a.article"))) html = driver.page_source bs = BeautifulSoup(html, 'html.parser') # 에브리타임 상단 동적 게시판 긁기============================================================================= board_group_list = bs.find("div", { "id": "submenu" }).findAll('div', {"class": "group"}) for board_group in board_group_list: try: board_li_list = board_group.find("ul").findAll("li") for board_li in board_li_list: board_li_dic = {} board_li_dic['tag'] = board_li.find("a").text if board_li.find("a").text.strip() == "더 보기": continue else: board_li_dic['url'] = main_url + board_li.find("a")['href'] if (board_li_dic['tag'].find("찾기") != -1): continue board_list.append(board_li_dic) except: continue # 에브리타임 추가 동적 게시판 긁기 for search_word in board_search_word: try: board_search_url_done = board_search_url + search_word driver.get(board_search_url_done) WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.CSS_SELECTOR, "a.result"))) html = driver.page_source bs = BeautifulSoup(html, 'html.parser') board_a_list = bs.find("div", { "class": "searchresults" }).findAll('a') for board_a in board_a_list: board_li_dic = {} board_li_dic['tag'] = board_a.find("h3").text board_li_dic['url'] = main_url + board_a.get('href') board_list.append(board_li_dic) except: continue #=========================================================================================================== # 동적 게시판들 반복문 for board in board_list: page = 1 page_flag = 0 board_url = board['url'] page_url = Change_page(board_url, page) #현재 페이지 포스트 url 반환 print("\nTarget : ", URL['info'], " :: ", board['tag']) continue_handler(URL['info'] + " :: " + board['tag'], URL, page_url) # 페이지 반복문 while True: if page_flag == 50: page_flag = 0 driver.quit() time.sleep(3) driver = chromedriver() driver = everytime.login(driver) try: print("page_url :::: ", page_url) #현재 url 출력 print("Page : ", page) #현재 페이지 출력 post_urls = Parsing_list_url(main_url, page_url, driver, db) # everytime 고질병 문제 고려, 재시도 if len(post_urls) == 0: time.sleep(2) post_urls = Parsing_list_url(main_url, page_url, driver, db) post_data_prepare = [] # 포스트 반복문 for post_url in post_urls: get_post_data = Parsing_post_data(driver, post_url, URL, board['tag'], db) if get_post_data == "error": break title = get_post_data[1] date = get_post_data[2] print(date, "::::", title) #현재 크롤링한 포스트의 date, title 출력 #게시물의 날짜가 end_date 보다 옛날 글이면 continue, 최신 글이면 append if str(date) <= end_date: continue else: post_data_prepare.append(get_post_data[0]) add_cnt = db_manager(URL, post_data_prepare, db) print("add_OK : ", add_cnt) #DB에 저장된 게시글 수 출력 #DB에 추가된 게시글이 0 이면 break, 아니면 다음페이지 if add_cnt == 0: page_flag = 0 break else: page_flag += 1 page += 1 page_url = Change_page(board_url, page) except Exception as e: error_handler(e, URL, page_url, db) driver.quit() time.sleep(3) driver = chromedriver() driver = everytime.login(driver) break #드라이버 연결 해제 driver.quit()
import sys sys.path.append("..") import everytime friend_timetables = [] ses, res = everytime.login("YOUR_ID", "YOUR_PASSWORD") friends = everytime.get_friend_list(ses) for friend in friends: temp = everytime.get_timetable_user_id(ses, friend["userid"]) friend_timetables.append(temp) union = everytime.union_time_table(friend_timetables) empty = everytime.empty_time_table(friend_timetables) print(union) print(empty) import util print(util.int2datetime(empty))