예제 #1
0
def Parsing_list_url(URL, page_url):
	List = []
	domain = Domain_check(URL['url'])

	#만약 driver이 켜져있으면 끄고, 없으면 그냥 진행
	try:
		driver.quit()
	except:
		pass

	driver = chromedriver()
	driver.get(page_url)

	try:
		WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "td.aL"))) #time.large를 발견하면 에이작스 로딩이 완료됬다는 가정
	except:
		try:
			WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "td.aL")))
		except:
			return (driver, List);
	html = driver.page_source
	bs = BeautifulSoup(html, 'html.parser')

	posts = bs.find("div", {"class": "tbl_container"}).find("tbody").findAll("tr")
	for post in posts:
		if post.find("th") != None:
			continue
		if len(post.find("td").text) <= 1:
			continue
		url_done = domain + "/" + post.find("td", {"class": "aL"}).find("a")['href']
		List.append(url_done)

	data = (driver, List)
	driver.quit()
	return data
예제 #2
0
def Parsing_list_url(URL, page_url):
    List = []
    domain = Domain_check(URL['url'])

    #만약 driver이 켜져있으면 끄고, 없으면 그냥 진행
    try:
        driver.quit()
    except:
        pass

    driver = chromedriver()
    driver.get(page_url)
    WebDriverWait(driver, 100).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "span.li_list")))
    time.sleep(2)
    '''
	for i in range(int(num)):
		driver.find_element_by_xpath('//*[@id="paging"]/li[4]/a').click()
		WebDriverWait(driver, 100).until(EC.presence_of_element_located((By.CSS_SELECTOR, "span.li_num")))
	'''
    html = driver.page_source
    bs = BeautifulSoup(html, 'html.parser')

    try:
        posts1 = bs.find("ul", {"class": 'listContent'}).findAll("li")
        posts2 = bs.find("ul", {"class": 'listContent mb20'}).findAll("li")
        posts = posts1 + posts2
    except:
        try:
            posts1 = bs.find("ul", {"class": 'listContent'}).findAll("li")
            posts2 = bs.find("ul", {"class": 'listContent mb20'}).findAll("li")
            posts = posts1 + posts2
        except:
            data = (driver, List)
            return data
    try:
        for post in posts:
            url = post.find("span", {
                "class": "li_subject li_list2"
            }).find("a")['onclick']
            url = url.split("'")[1]
            url = domain + url
            List.append(url)
    except:
        List = []

    data = (driver, List)

    return data
예제 #3
0
def Parsing_list_url(URL, page_url):
    List = []
    domain = Domain_check(URL['url'])

    #만약 driver이 켜져있으면 끄고, 없으면 그냥 진행
    try:
        driver.quit()
    except:
        pass

    driver = chromedriver()

    List.append(page_url)

    data = (driver, List)

    return data
예제 #4
0
def Parsing_list_url(URL, page_url):
    List = []

    #만약 driver이 켜져있으면 끄고, 없으면 그냥 진행
    try:
        driver.quit()
    except:
        pass

    driver = chromedriver()
    driver.get(page_url)
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "searchForm")))
    time.sleep(2)

    html = driver.page_source
    bs = BeautifulSoup(html, 'html.parser')

    try:
        posts = bs.find("div", {
            "class": "table_wrap"
        }).find("tbody").find_all("tr")
    except:
        try:
            posts = bs.find("div", {
                "class": "table_wrap"
            }).find("tbody").find_all("tr")
        except:
            data = (driver, List)
            return data
    try:
        for post in posts:
            url = (post.find("a")["href"]).split("'")[1]
            url = "https://www.youthcenter.go.kr/board/boardDetail.do?bbsNo=3&ntceStno=" + url + "&pageUrl=board%2Fboard&orderBy=REG_DTM&orderMode=DESC"
            List.append(url)
    except:
        List = []

    data = (driver, List)

    return data
예제 #5
0
def Parsing_list_url(URL, page_url):
    List = []
    domain = Domain_check(URL['url'])

    #만약 driver이 켜져있으면 끄고, 없으면 그냥 진행
    try:
        driver.quit()
    except:
        pass

    driver = chromedriver()
    driver = everytime.login(driver)

    #에브리타임 게시판이 사라졌을 경우 대비
    try:
        driver.get(page_url)
        driver.implicitly_wait(3)
    except:
        data = (driver, List)
        return data

    WebDriverWait(driver, 100).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "a.article")))
    html = driver.page_source
    bs = BeautifulSoup(html, 'html.parser')

    posts = bs.find("div", {"class": 'wrap articles'}).findAll("article")
    if len(posts) == 1:  #게시물이 아무것도 없는 경우
        pass
    else:
        for post in posts:
            url = post.find("a")['href']
            url = domain + url
            List.append(url)

    data = (driver, List)

    return data
예제 #6
0
def Parsing_post_data(driver, post_url, URL, recent_post):
    post_data_prepare = []
    domain = Domain_check(URL['url'])
    end_date = date_cut(URL['info'])
    now_num = 0
    repeat_num = 0
    post_driver = chromedriver()  # 포스트 페이지를 위한 드라이버
    driver.get(post_url)
    if (URL['info'].split("_")[2] == "campustown"):
        WebDriverWait(driver, 30).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "div.card")))
        driver.find_element_by_xpath(
            '//*[@id="ct"]/div[5]/div/div[1]/div/button[2]').click()
        # //*[@id="ct"]/div[4]/div/div[1]/div/button[2]
    else:
        WebDriverWait(driver, 30).until(
            EC.presence_of_element_located(
                (By.CSS_SELECTOR,
                 "div.area_text")))  #div.header을 발견하면 에이작스 로딩이 완료됬다는 가정
    last_posts = [0]
    while 1:
        driver.find_element_by_tag_name("body").send_keys(Keys.END)
        time.sleep(1)

        html = driver.page_source
        bs = BeautifulSoup(html, 'html.parser')

        posts = bs.find("div", {
            "class": 'wrap_postlist'
        }).findAll("div", {"class": "item"})
        #더이상 내릴 수 없으면 break
        if len(last_posts) == len(posts):
            break
        else:
            last_posts = posts

        for post in posts[now_num:]:
            try:
                post_data = {}
                url = post.find("a", {"class": "link"})['href']
                url = domain + url

                try:
                    post_driver.get(url)
                    #driver.get(url)
                except:
                    if len(post_data_prepare) == 0:
                        recent_post = None
                    else:
                        recent_post = post_data_prepare[0]['title']
                    data = (post_data_prepare, recent_post)
                    return data
                try:
                    WebDriverWait(post_driver, 30).until(
                        EC.presence_of_element_located(
                            (By.CSS_SELECTOR,
                             "div.txt_area")))  #a.item을 발견하면 에이작스 로딩이 완료됬다는 가정
                except:
                    if len(post_data_prepare) == 0:
                        recent_post = None
                    else:
                        recent_post = post_data_prepare[0]['title']
                    data = (post_data_prepare, recent_post)
                    return data
                html_post = post_driver.page_source
                bs_post = BeautifulSoup(html_post, 'html.parser')

                if (URL['info'].split("_")[2] == "campustown"):
                    title = bs_post.find("h3", {
                        "class": "tit_h3"
                    }).get_text(" ", strip=True)
                else:
                    if bs_post.find(
                            "div",
                        {"class": "se-module se-module-text se-title-text"
                         }) == None:
                        title = bs_post.find("h3", {
                            "class": "tit_h3"
                        }).get_text(" ", strip=True)
                    else:
                        title = bs_post.find("div", {
                            "class":
                            "se-module se-module-text se-title-text"
                        }).find("span").get_text(" ", strip=True)
                if bs_post.find("p", {"class": "blog_date"}) == None:
                    date = bs_post.find("p", {
                        "class": "se_date"
                    }).get_text(" ", strip=True)
                else:
                    date = bs_post.find("p", {
                        "class": "blog_date"
                    }).get_text(" ", strip=True)
                if date.find("시간") != -1 or date.find("분") != -1 or date.find(
                        "초") != -1:
                    now = datetime.datetime.now().strftime("%Y-%m-%d")
                    date = now + " 00:00:00"
                    date = str(
                        datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S"))
                else:
                    date = date + ":00"
                    date = str(
                        datetime.datetime.strptime(date,
                                                   "%Y. %m. %d. %H:%M:%S"))
                if (URL['info'].split("_")[2] == "campustown"):
                    phrase = bs_post.find("div", {
                        'class': "post_ct"
                    }).get_text(" ", strip=True)
                else:
                    phrase = bs_post.find("div", {
                        'class': "se-main-container"
                    }).get_text(" ", strip=True)
                phrase = post_wash(phrase)  #post 의 공백을 전부 제거하기 위함
                if (URL['info'].split("_")[2] == "campustown"):
                    if bs_post.find("div", {
                            'class': "post_ct"
                    }).find("img", {"id": "img_1"}) is None:
                        img = 3
                    else:
                        img = bs_post.find("div", {
                            "class": "post_ct"
                        }).find("img",
                                {"id": "img_1"})['src']  #게시글의 첫번째 이미지를 가져옴.
                        if 1000 <= len(img):
                            img = 3
                        else:
                            if img.startswith("http://") or img.startswith(
                                    "https://"):  # img가 내부링크인지 외부 링크인지 판단.
                                pass
                            elif img.startswith("//"):
                                img = "http:" + img
                            else:
                                img = domain + img
                else:
                    if bs_post.find("div", {
                            "class": "se-main-container"
                    }).find("img", {"id": "img_2"}) is None:
                        img = 3
                    else:
                        img = bs_post.find("div", {
                            "class": "se-main-container"
                        }).find("img",
                                {"id": "img_2"})['src']  #게시글의 첫번째 이미지를 가져옴.
                        if 1000 <= len(img):
                            img = 3
                        else:
                            if img.startswith("http://") or img.startswith(
                                    "https://"):  # img가 내부링크인지 외부 링크인지 판단.
                                pass
                            elif img.startswith("//"):
                                img = "http:" + img
                            else:
                                img = domain + img
                if img != 3:
                    if img_size(img):
                        pass
                    else:
                        img = 3

                post_data['title'] = title.upper()
                post_data['author'] = "0"
                post_data['date'] = date
                post_data['post'] = phrase.lower()
                post_data['img'] = img
                post_data['url'] = "https://" + url[10:]  # 'm'떼어버리는 작업

                print(date, "::::", title)

                if (date < end_date) or (title.upper() == recent_post):
                    break
                else:
                    post_data_prepare.append(post_data)
            except:
                continue

        now_num = len(posts)
        repeat_num += 1
        if (date <= end_date) or (title.upper() == recent_post):
            break
    if len(post_data_prepare) == 0:
        recent_post = None
    else:
        recent_post = post_data_prepare[0]['title']
    data = (post_data_prepare, recent_post)
    post_driver.close()
    return data
예제 #7
0
def Parsing_post_data(driver, post_url, URL, recent_post):
    post_data_prepare = []
    domain = Domain_check(URL['url'])
    end_date = date_cut(URL['info'])
    now_num = 0
    repeat_num = 0
    post_driver = chromedriver()  # 포스트 페이지를 위한 드라이버
    driver.get(post_url)
    last_posts = [0]
    while 1:
        driver.find_element_by_tag_name("body").send_keys(Keys.END)
        time.sleep(1)

        html = driver.page_source
        bs = BeautifulSoup(html, 'html.parser')
        posts = bs.find("div", {
            "class": 'articlelist'
        }).find("ol", {
            "class": 'group'
        }).find_all("li")
        #더이상 내릴 수 없으면 break
        if len(last_posts) == len(posts):
            break
        else:
            last_posts = posts

        for post in posts[now_num:]:
            try:
                post_data = {}
                url = post.find("a", {"class": "article"})['href']
                url = domain + url
                try:
                    post_driver.get(url)
                    #driver.get(url)
                except:
                    if len(post_data_prepare) == 0:
                        recent_post = None
                    else:
                        recent_post = post_data_prepare[0]['title']
                    data = (post_data_prepare, recent_post)
                    return data
                try:
                    WebDriverWait(post_driver, 30).until(
                        EC.presence_of_element_located(
                            (By.TAG_NAME,
                             "time")))  #a.item을 발견하면 에이작스 로딩이 완료됬다는 가정
                except:
                    if len(post_data_prepare) == 0:
                        recent_post = None
                    else:
                        recent_post = post_data_prepare[0]['title']
                    data = (post_data_prepare, recent_post)
                    return data
                html_post = post_driver.page_source
                bs_post = BeautifulSoup(html_post, 'html.parser')

                title = str(
                    post.find("p", {
                        "class": "text short"
                    }).get_text(" ", strip=True)).split("<br>")[0]
                date = bs_post.find("p", {
                    "class": "profile"
                }).find("time").get_text(" ", strip=True)
                date_len = len(date.split("/"))
                # 작성일 현재 년도 인경우
                if date_len == 2:
                    current_year = str(datetime.datetime.now().year)
                    date = current_year + '/' + date + ":00"
                    date = str(
                        datetime.datetime.strptime(date, "%Y/%m/%d %H:%M:%S"))
                else:
                    date = str(
                        datetime.datetime.strptime(date, "%Y/%m/%d %H:%M:%S"))
                post = bs_post.find("div", {
                    "class": "articleitem"
                }).find("p", {
                    "class": "text"
                }).get_text(" ", strip=True)
                post = post_wash(post)  #post 의 공백을 전부 제거하기 위함

                if bs_post.find("div", {"class": "attaches full"}) is None:
                    img = 3
                else:
                    img = bs_post.find("div", {
                        "class": "attaches full"
                    }).find("img")["src"]  #게시글의 첫번째 이미지를 가져옴.
                    if 1000 <= len(img):
                        img = 3
                    else:
                        if img.startswith("http://") or img.startswith(
                                "https://"):  # img가 내부링크인지 외부 링크인지 판단.
                            pass
                        elif img.startswith("//"):
                            img = "http:" + img
                        else:
                            img = domain + img
                if img != 3:
                    if img_size(img):
                        pass
                    else:
                        img = 3
                post_data['title'] = title.upper()
                post_data['author'] = ""
                post_data['date'] = date
                post_data['post'] = post.lower()
                post_data['img'] = img
                post_data['url'] = url
                print(date, "::::", title)

                if (date < end_date) or (title.upper() == recent_post):
                    break
                else:
                    post_data_prepare.append(post_data)
            except:
                continue

        now_num = len(posts)
        repeat_num += 1
        if (date <= end_date) or (title.upper() == recent_post):
            break
    if len(post_data_prepare) == 0:
        recent_post = None
    else:
        recent_post = post_data_prepare[0]['title']
    data = (post_data_prepare, recent_post)
    post_driver.close()
    return data
예제 #8
0
def Parsing_list_url(URL, page_url, lastly_post, db, driver):
    List = []
    domain = Domain_check(URL['url'])
    end_date = date_cut(URL['info'])
    lastly_num = 0  #한번만 실행하기위한 조건변수
    #lastly_post = get_lastly_post(URL)	#lastly_post 가져온다
    try:
        driver.get(page_url)
    except:
        driver = chromedriver()
        driver = daum.login(driver)
        driver.get(page_url)

    #페이지 구조 변경 예외
    if URL['info'] == "sj30_sejongstation_news":
        data = (driver, List)
        return data

    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "td.headcate")))
    except:
        data = (driver, List)
        return data
    num = 1
    while 1:
        cnt = 0
        if URL['info'].split("_")[2] == 'qna':
            query = '//*[@id="primaryContent"]/table/tbody/tr[2]/td[2]/div[3]/div/a[' + str(
                num) + ']'
        else:
            query = '//*[@id="primaryContent"]/table/tbody/tr[2]/td[2]/div[2]/div/a[' + str(
                num) + ']'
        try:
            driver.find_element_by_xpath(query).click()
        except:
            data = (driver, List)
            return data
        try:
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located(
                    (By.CSS_SELECTOR, "td.headcate")))
        except:
            driver.refresh()
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located(
                    (By.CSS_SELECTOR, "td.headcate")))
        html = driver.page_source
        bs = BeautifulSoup(html, 'html.parser')
        posts = bs.find("table", {
            "class": "bbsList"
        }).find("tbody").findAll("tr")

        for post in posts:
            if post.find("td", {"class": "num"}).find("img") != None:
                continue
            title = post.find("td", {
                "class": "subject"
            }).find("a").get_text(" ", strip=True)
            if post.find("td", {"class": "date"}) == None:
                date = datetime.now()
            else:
                date = post.find("td", {"class": "date"}).text.strip()
            if date.find(":") != -1:
                now = datetime.now().strftime("%Y-%m-%d")
                date = now + " 00:00:00"
            else:
                date = "20" + date + " 00:00:00"
                date = str(datetime.strptime(date, "%Y.%m.%d %H:%M:%S"))

            if date + title == lastly_post:  #이전 최근 글을 만나면, cnt = 0, break 를 함으로서 만나기 전까지의 List를 보낸다.
                cnt = 0
                lastly_num = 1
                break
            elif end_date <= date:
                url = post.find("td", {"class": "subject"}).find("a")['href']
                url = domain + url
                List.append(url)
                cnt += 1
        time.sleep(3)

        #항상 첫번째페이지의 공지를 제외한 첫번째글이 lastly_post가 되도록 지정해줌
        if lastly_num == 1 or lastly_post == 0:
            for post in posts:
                if post.find("td", {"class": "num"}).find("img") != None:
                    continue
                title = post.find("td", {
                    "class": "subject"
                }).find("a").get_text(" ", strip=True)
                date = post.find("td", {"class": "date"}).text.strip()
                if date.find(":") != -1:
                    now = datetime.now().strftime("%Y-%m-%d")
                    date = now + " 00:00:00"
                else:
                    date = "20" + date + " 00:00:00"
                    date = str(datetime.strptime(date, "%Y.%m.%d %H:%M:%S"))
                lastly_post = date + title
                push_lastly_post(URL, lastly_post, db)
                break

        if cnt == 0:  #날짜가 전부 옛날이면 break
            break
        else:  #page를 넘기기 위해 필요한 num이 7이면 7그대로 고정
            if num == 7:
                pass
            else:
                num += 1

    data = (driver, List)
    time.sleep(2)
    return data
예제 #9
0
def everytime_all_board(URL, end_date, db):
    main_url = URL['url']
    board_search_url = "https://everytime.kr/community/search?keyword="
    board_search_word = ['게시판', '갤러리']
    board_list = []
    # driver 연결
    try:
        driver = chromedriver()
        driver = everytime.login(driver)
    except Exception as e:
        error_handler(e, URL, main_url, db)
        return
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "a.article")))
    html = driver.page_source
    bs = BeautifulSoup(html, 'html.parser')
    # 에브리타임 상단 동적 게시판 긁기=============================================================================
    board_group_list = bs.find("div", {
        "id": "submenu"
    }).findAll('div', {"class": "group"})
    for board_group in board_group_list:
        try:
            board_li_list = board_group.find("ul").findAll("li")
            for board_li in board_li_list:
                board_li_dic = {}
                board_li_dic['tag'] = board_li.find("a").text
                if board_li.find("a").text.strip() == "더 보기":
                    continue
                else:
                    board_li_dic['url'] = main_url + board_li.find("a")['href']
                if (board_li_dic['tag'].find("찾기") != -1):
                    continue
                board_list.append(board_li_dic)
        except:
            continue
    # 에브리타임 추가 동적 게시판 긁기
    for search_word in board_search_word:
        try:
            board_search_url_done = board_search_url + search_word
            driver.get(board_search_url_done)
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "a.result")))
            html = driver.page_source
            bs = BeautifulSoup(html, 'html.parser')
            board_a_list = bs.find("div", {
                "class": "searchresults"
            }).findAll('a')
            for board_a in board_a_list:
                board_li_dic = {}
                board_li_dic['tag'] = board_a.find("h3").text
                board_li_dic['url'] = main_url + board_a.get('href')
                board_list.append(board_li_dic)
        except:
            continue
    #===========================================================================================================
    # 동적 게시판들 반복문
    for board in board_list:
        page = 1
        page_flag = 0
        board_url = board['url']
        page_url = Change_page(board_url, page)  #현재 페이지 포스트 url 반환
        print("\nTarget : ", URL['info'], " :: ", board['tag'])
        continue_handler(URL['info'] + " :: " + board['tag'], URL, page_url)
        # 페이지 반복문
        while True:
            if page_flag == 50:
                page_flag = 0
                driver.quit()
                time.sleep(3)
                driver = chromedriver()
                driver = everytime.login(driver)
            try:
                print("page_url :::: ", page_url)  #현재 url 출력
                print("Page : ", page)  #현재 페이지 출력
                post_urls = Parsing_list_url(main_url, page_url, driver, db)
                # everytime 고질병 문제 고려, 재시도
                if len(post_urls) == 0:
                    time.sleep(2)
                    post_urls = Parsing_list_url(main_url, page_url, driver,
                                                 db)
                post_data_prepare = []
                # 포스트 반복문
                for post_url in post_urls:
                    get_post_data = Parsing_post_data(driver, post_url, URL,
                                                      board['tag'], db)
                    if get_post_data == "error":
                        break
                    title = get_post_data[1]
                    date = get_post_data[2]
                    print(date, "::::", title)  #현재 크롤링한 포스트의 date, title 출력
                    #게시물의 날짜가 end_date 보다 옛날 글이면 continue, 최신 글이면 append
                    if str(date) <= end_date:
                        continue
                    else:
                        post_data_prepare.append(get_post_data[0])
                add_cnt = db_manager(URL, post_data_prepare, db)
                print("add_OK : ", add_cnt)  #DB에 저장된 게시글 수 출력
                #DB에 추가된 게시글이 0 이면 break, 아니면 다음페이지
                if add_cnt == 0:
                    page_flag = 0
                    break
                else:
                    page_flag += 1
                    page += 1
                    page_url = Change_page(board_url, page)
            except Exception as e:
                error_handler(e, URL, page_url, db)
                driver.quit()
                time.sleep(3)
                driver = chromedriver()
                driver = everytime.login(driver)
                break
    #드라이버 연결 해제
    driver.quit()
예제 #10
0
def Parsing_post_data(driver, post_url, URL, recent_post):
    post_data_prepare = []
    domain = Domain_check(URL['url'])
    end_date = date_cut(URL['info'])
    now_num = 0
    repeat_num = 0
    post_driver = chromedriver()  # 포스트 페이지를 위한 드라이버
    driver.get(post_url)
    last_posts = [0]
    while 1:
        driver.find_element_by_tag_name("body").send_keys(Keys.END)
        time.sleep(1)

        html = driver.page_source
        bs = BeautifulSoup(html, 'html.parser')
        posts = bs.find("div", {
            "class": 'grid'
        }).find("div", {
            "class": 'grid-item'
        }).find_all("div", {"class": "grid-item"})
        #더이상 내릴 수 없으면 break
        if len(last_posts) == len(posts):
            break
        else:
            last_posts = posts

        for post in posts[now_num:]:
            if post.find("div",
                         {"class": "item onclick"}) is None or post.find(
                             "div", {"class": "item_wrap"}) is None:
                pass
            else:
                try:
                    post_data = {}
                    url = post.find("div", {
                        "class": "item onclick"
                    }).get("onclick").split("'")[1]
                    try:
                        post_driver.get(url)
                        #driver.get(url)
                    except:
                        if len(post_data_prepare) == 0:
                            recent_post = None
                        else:
                            recent_post = post_data_prepare[0]['title']
                        data = (post_data_prepare, recent_post)
                        return data
                    try:
                        WebDriverWait(post_driver, 30).until(
                            EC.presence_of_element_located(
                                (By.TAG_NAME,
                                 "time")))  #a.item을 발견하면 에이작스 로딩이 완료됬다는 가정
                    except:
                        if len(post_data_prepare) == 0:
                            recent_post = None
                        else:
                            recent_post = post_data_prepare[0]['title']
                        data = (post_data_prepare, recent_post)
                        return data
                    html_post = post_driver.page_source
                    bs_post = BeautifulSoup(html_post, 'html.parser')

                    title = bs_post.find("div", {
                        "class":
                        "col-md-12 start_article_info mobile_pre_article"
                    }).find("h1").get_text(" ", strip=True)
                    date = bs_post.find("div", {
                        "class":
                        "col-md-12 start_article_info mobile_pre_article"
                    }).find("h3").get_text(" ", strip=True)
                    date = date + " 00:00:00"
                    date = str(
                        datetime.datetime.strptime(date,
                                                   "%Y . %m . %d %H:%M:%S"))
                    post = bs_post.find("div", {
                        "class": "col-md-12 content_start"
                    }).get_text(" ", strip=True)
                    post = post_wash(post)  #post 의 공백을 전부 제거하기 위함
                    if bs_post.find(
                            "div",
                        {"class": "col-md-12 single_header no_pc"}) is None:
                        img = 3
                    else:
                        img = bs_post.find(
                            "div", {
                                "class": "col-md-12 single_header no_pc"
                            }).find("img")["src"]  #게시글의 첫번째 이미지를 가져옴.
                        if 1000 <= len(img):
                            img = 3
                        else:
                            if img.startswith("http://") or img.startswith(
                                    "https://"):  # img가 내부링크인지 외부 링크인지 판단.
                                pass
                            elif img.startswith("//"):
                                img = "http:" + img
                            else:
                                img = domain + img
                    if img != 3:
                        if img_size(img):
                            pass
                        else:
                            img = 3
                    post_data['title'] = title.upper()
                    post_data['author'] = ""
                    post_data['date'] = date
                    post_data['post'] = post.lower()
                    post_data['img'] = img
                    post_data['url'] = url
                    print(date, "::::", title)

                    if (date < end_date) or (title.upper() == recent_post):
                        break
                    else:
                        post_data_prepare.append(post_data)
                except:
                    continue

        now_num = len(posts)
        repeat_num += 1
        if (date <= end_date) or (title.upper() == recent_post):
            break
    if len(post_data_prepare) == 0:
        recent_post = None
    else:
        recent_post = post_data_prepare[0]['title']
    data = (post_data_prepare, recent_post)
    post_driver.close()
    return data