def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) if bs.find("table", {"class": "bd_lst bd_tb_lst bd_tb"}) is None: if bs.find( "ol", {"class": "bd_lst bd_zine zine zine1 img_load"}) is not None: posts = bs.find("ol", { "class": "bd_lst bd_zine zine zine1 img_load" }).findAll("li") for post in posts: try: url = post.find("a")['href'] except: break url = domain + url List.append(url) else: posts = bs.find("table", { "class": "bd_lst bd_tb_lst bd_tb" }).find("tbody").findAll("tr") for post in posts: url = post.find("a")['href'] url = domain + url List.append(url) return List
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) #리스트 반환 try: posts = bs.find("td", { "id": 'resultsCol' }).findAll( "div", {"class": "jobsearch-SerpJobCard unifiedRow row result clickcard"}) except: time.sleep(3) posts = bs.find("td", { "id": 'resultsCol' }).findAll( "div", {"class": "jobsearch-SerpJobCard unifiedRow row result clickcard"}) posts = bs.select("a.jobtitle") for post in posts: target = post.attrs["href"] page = domain + target List.append(page) return List
def Parsing_list_url(URL, page_url): List = [] domain = Domain_check(URL['url']) #만약 driver이 켜져있으면 끄고, 없으면 그냥 진행 try: driver.quit() except: pass driver = chromedriver() driver.get(page_url) try: WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "td.aL"))) #time.large를 발견하면 에이작스 로딩이 완료됬다는 가정 except: try: WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "td.aL"))) except: return (driver, List); html = driver.page_source bs = BeautifulSoup(html, 'html.parser') posts = bs.find("div", {"class": "tbl_container"}).find("tbody").findAll("tr") for post in posts: if post.find("th") != None: continue if len(post.find("td").text) <= 1: continue url_done = domain + "/" + post.find("td", {"class": "aL"}).find("a")['href'] List.append(url_done) data = (driver, List) driver.quit() return data
def Parsing_list_url(URL, bs): List = [] posts = bs.find("table", {"class": 't-viewz'}).find("tbody").find_all("tr") for post in posts: url = post.find("td", {"class": "subject"}).find("a")["href"] url = "https://www.kosaf.go.kr/ko/notice.do" + url List.append(url) return List
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) posts = bs.find("table", {"class": 'type-2 mg-t-5 contest-table'}).find("tbody").findAll("tr") for post in posts: url = post.find("a")["href"] url = domain + url List.append(url) return List
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) posts = bs.find("ul", {"class": 'list-box-ul clearfix'}).findAll("li") for post in posts: url = post.find("a")["href"] url = domain + url List.append(url) return List
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) posts = bs.find("table", {"class": "bd_lst bd_tb_lst bd_tb"}).find("tbody").findAll("tr"); for post in posts: url = post.find("a")['href'] url = domain + url List.append(url) return List
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) posts = bs.find("tbody").findAll("tr") for post in posts: url = post.find("a")['href'] url = domain + url List.append(url) return List
def Parsing_list_url(URL, bs, pageidx): List = [] domain = Domain_check(URL['url']) posts = bs.find("div", {"id": 'Lists'}).find_all("div", {"class": "item"}) for post in posts: key_value = post.find("a", {"class": "btnViewDetail"}).get("data-id") url = domain + "/archives/" + str( key_value ) + "?listType=list&headerID=0&date=&query=&pageidx=" + str(pageidx) List.append(url) return List
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) #리스트 반환 posts = bs.find("ul", {"class": 'joodJobList'}).findAll("li") for post in posts: target = post.find('a')['href'] page = domain + target List.append(page) return List
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) posts = bs.find("table", {"class": "table list_box"}).findAll("tr") for post in posts: url = post.find("a")['href'] url = domain + url date = post.findAll("td")[1].text.strip() url = url + date List.append(url) return List
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) posts = bs.find("div", { "class": 'boardList boardListService' }).find("ul", { "class": "list_wrap" }).find_all("li") for post in posts: url = post.find("a")["href"] url = domain + "/partspace/" + url List.append(url) return List
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) posts = bs.find("ul", {"data-role": "list"}).findAll("li") #진행중인 포스트만 get for post in posts: if post.find("label", {"class": "CLOSED"}) is None: url = post.find("a")['href'] url = domain + url List.append(url) else: #현재 진행중이 아닌 포스트는 pass pass return List
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) #리스트 반환 posts = bs.find("table", {"class": 'board_list'}).findAll("tr") posts = posts[1:] for post in posts: target = post.find("td", {"class": "tit"}).find('a')['href'] page = domain + target List.append(page) return List
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) #리스트 반환 posts = bs.find("table", {"class": 'bbs_ltype tbl30'}).findAll("tr") posts = posts[1:] for post in posts: target = post.find("a")['href'] page = domain + "/" + target List.append(page) return List
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) if bs.find("div", {"class": "list"}) is None: return List posts_all = bs.find("div", {"class": "list"}).findAll("div", {"class": "list-item"}) posts_closed = bs.find("div", {"class": "list"}).findAll("div", {"class": "list-item closed"}) posts = list(set(posts_all) - set(posts_closed)) for post in posts: url = post.find("a")['href'] url = domain + url List.append(url) return List
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) #리스트 반환 posts = bs.find("ul", {"class": 'filterList'}).findAll("li") for post in posts: if (post.find("div", {"class": "info"}) == None): break target = post.find("div", {"class": "info"}).find('a')['href'] page = domain + target List.append(page) return List
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) posts = bs.find("div", { "id": "list_board" }).find("ul", { "class": "lst-board lst-body" }).findAll("li") for post in posts: url = post.find("a")['href'] url = domain + url List.append(url) return List
def Parsing_list_url(URL, page_url): List = [] domain = Domain_check(URL['url']) #만약 driver이 켜져있으면 끄고, 없으면 그냥 진행 try: driver.quit() except: pass driver = chromedriver() driver.get(page_url) WebDriverWait(driver, 100).until( EC.presence_of_element_located((By.CSS_SELECTOR, "span.li_list"))) time.sleep(2) ''' for i in range(int(num)): driver.find_element_by_xpath('//*[@id="paging"]/li[4]/a').click() WebDriverWait(driver, 100).until(EC.presence_of_element_located((By.CSS_SELECTOR, "span.li_num"))) ''' html = driver.page_source bs = BeautifulSoup(html, 'html.parser') try: posts1 = bs.find("ul", {"class": 'listContent'}).findAll("li") posts2 = bs.find("ul", {"class": 'listContent mb20'}).findAll("li") posts = posts1 + posts2 except: try: posts1 = bs.find("ul", {"class": 'listContent'}).findAll("li") posts2 = bs.find("ul", {"class": 'listContent mb20'}).findAll("li") posts = posts1 + posts2 except: data = (driver, List) return data try: for post in posts: url = post.find("span", { "class": "li_subject li_list2" }).find("a")['onclick'] url = url.split("'")[1] url = domain + url List.append(url) except: List = [] data = (driver, List) return data
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) posts = bs.find("ul", {"class": "idx"}).findAll("a") for post in posts: url = post['href'] url = domain + url title = post.text.strip() if url.find("idx=") != -1: pass else: List.append(url + "$$" + title) return List
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) #리스트 반환 posts = bs.find("table", {"class": 'bbs_ltype'}).findAll("tr") posts = posts[1:] for post in posts: if post.find("a") == None: # td 안에 tr이 존재하는 경우 Continue continue target = post.find("a")['href'] page = domain + "/" + target List.append(page) return List
def Parsing_list_url(URL, page_url): List = [] domain = Domain_check(URL['url']) #만약 driver이 켜져있으면 끄고, 없으면 그냥 진행 try: driver.quit() except: pass driver = chromedriver() List.append(page_url) data = (driver, List) return data
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) #리스트 반환 try: posts = bs.find("ul", {"class": 'basic-list page-list'}).findAll("li") except: return List for post in posts: target = post.find('a')['href'] target_start = target.find('?') target = target[target_start:] page = domain + target List.append(page) return List
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) posts = bs.find("tbody").findAll("tr") for post in posts: if post.find("a") is None: continue else: post_url = post.find("a")["href"] post_url = post_url[2:] #post_url 이 외부링크면 바로 List 추가, 아니면 domain 추가한 후 List 추가 if post_url.startswith("http://"): List.append(post_url) else: post_url = domain + post_url List.append(post_url) return List
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) try: posts = bs.find("div", { "id": "result-list" }).findAll("div", {"class": "media"}) except: return List if len(posts) == 0: pass else: for post in posts: url = post.find("a")['href'] url = domain + url List.append(url) return List
def Parsing_list_url(URL, page_url): List = [] #udream 로그인하는 함수 s = udream.login() page = s.get(page_url).text bs = BeautifulSoup(page, "lxml") #html.parser 오류 lxml 로 가져온다. #리스트 반환 posts = bs.findAll("tr", {"onmouseover": "hctrOn(this)"}) for post in posts: num = post.find("a")["onclick"] post_num = num.split("'")[1] page = URL['post_url'] + post_num List.append(page) s.close() return List
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) posts = bs.find("tbody").findAll('tr') for post in posts: if post.find("a") is None: continue else: post_url = post.find("a")["href"] post_url = post_url.replace( "¤", "¤") #"¤" 문자가 "¤"로 가져와지는 문제로 인해 추가 #post_url 이 외부링크면 바로 List 추가, 아니면 domain 추가한 후 List 추가 if post_url.startswith("http://"): List.append(post_url) else: post_url = domain + "/bbs/" + post_url List.append(post_url) return List
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) tables = bs.find("div", {"align": "center"}).findAll("table") table = tables[2] a_tages = table.findAll("a") for a_tag in a_tages: posts = [] if a_tag["href"].startswith("javascript"): pass else: a_tag = str(a_tag) post = a_tag.split('"')[1] url = domain + post url = url.replace("amp;", "") List.append(url) return List
def Parsing_list_url(URL, page_url): List = [] #만약 driver이 켜져있으면 끄고, 없으면 그냥 진행 try: driver.quit() except: pass driver = chromedriver() driver.get(page_url) WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.ID, "searchForm"))) time.sleep(2) html = driver.page_source bs = BeautifulSoup(html, 'html.parser') try: posts = bs.find("div", { "class": "table_wrap" }).find("tbody").find_all("tr") except: try: posts = bs.find("div", { "class": "table_wrap" }).find("tbody").find_all("tr") except: data = (driver, List) return data try: for post in posts: url = (post.find("a")["href"]).split("'")[1] url = "https://www.youthcenter.go.kr/board/boardDetail.do?bbsNo=3&ntceStno=" + url + "&pageUrl=board%2Fboard&orderBy=REG_DTM&orderMode=DESC" List.append(url) except: List = [] data = (driver, List) return data
def Parsing_list_url(URL, bs): List = [] domain = Domain_check(URL['url']) if bs.find("div", {"class": "list article mt10"}) is None: return List posts = bs.find("div", {"class": "list article mt10"}).findAll("div", {"class": "item article"}) if URL['info'] == "sj7_promotion_research": for post in posts: if post.find("a") is None: continue else: post_num = post.find("a")["onclick"] post_num = post_num.split("'")[1] post_url = URL['post_url'] + post_num List.append(post_url) else: for post in posts: if post.find("a") is None: continue else: post_url = post.find("a")["href"] #post_url 이 외부링크면 바로 List 추가, 아니면 domain 추가한 후 List 추가 if post_url.startswith("http://"): List.append(post_url) else: post_url = domain + "/" + post_url List.append(post_url) return List