Example #1
0
 def getPageCount(self):
     searchUrl = "http://www.hkn24.com/news/articleList.html?page=1&sc_section_code=&sc_sub_section_code=&sc_serial_code=&sc_area=A&sc_level=&sc_article_type=&sc_view_level=&sc_sdate=&sc_edate=&sc_serial_number=&sc_word=" + self.query + "&sc_view_code=&view_type="
     soup = Soup.phantomjs(searchUrl)
     pageCount = int(
         re.sub("\D", "",
                soup.find('font', color='#333333').get_text()))
     return math.floor(pageCount / 20)
Example #2
0
 def getPageCount(self):
     searchUrl = "http://www.khanews.com/news/articleList.html?page=&sc_section_code=&sc_sub_section_code=&sc_serial_code=&sc_area=A&sc_level=&sc_article_type=&sc_view_level=&sc_sdate=2000.01.01&sc_edate=&sc_serial_number=&sc_word=" + self.query + "&view_type="
     soup = Soup.phantomjs(searchUrl)
     tbodys = soup.find("td", bgcolor="#FFFFFF")
     pageCount = int(
         re.sub("\D", "",
                tbodys.find("font", color="#333333").get_text()))
     return math.floor(pageCount / 20)
Example #3
0
 def getPageHrefs(self, count):
     searchUrl = "http://www.hkn24.com/news/articleList.html?page=" + str(
         count
     ) + "&sc_section_code=&sc_sub_section_code=&sc_serial_code=&sc_area=A&sc_level=&sc_article_type=&sc_view_level=&sc_sdate=&sc_edate=&sc_serial_number=&sc_word=" + self.query + "&sc_view_code=&view_type="
     soup = Soup.phantomjs(searchUrl)
     searchBox = soup.find_all('td', class_='ArtList_Title')
     pageHrefs = [
         "http://www.hkn24.com/news/" + x.a.get('href') for x in searchBox
     ]
     return pageHrefs
Example #4
0
 def getPageHrefs(self, count):
     searchUrl = "http://www.doctorsnews.co.kr/news/articleList.html?page=" + str(
         count
     ) + "&sc_section_code=&sc_sub_section_code=&sc_serial_code=&sc_add_section_code=&sc_add_sub_section_code=&sc_add_serial_code=&sc_area=A&sc_level=&sc_m_level=&sc_article_type=&sc_view_level=&sc_sdate=" + self.startDate + "&sc_edate=" + self.endDate + "&sc_serial_number=&sc_word=" + self.query + "&sc_word2=&sc_andor=OR&sc_order_by=I&view_type="
     soupArticle = Soup.phantomjs(searchUrl)
     articleHrefList = soupArticle.find_all("a", class_="news_list_title")
     pageHrefs = [
         "http://www.doctorsnews.co.kr/news/" + x.get('href')
         for x in articleHrefList
     ]
     return pageHrefs
Example #5
0
 def getPageHrefs(self, count):
     searchUrl = "http://www.khanews.com/news/articleList.html?page=" + str(
         count
     ) + "&sc_section_code=&sc_sub_section_code=&sc_serial_code=&sc_area=A&sc_level=&sc_article_type=&sc_view_level=&sc_sdate=2000.01.01&sc_edate=&sc_serial_number=&sc_word=" + self.query + "&view_type="
     soup = Soup.phantomjs(searchUrl)
     searchBox = soup.find("td",
                           bgcolor="#FFFFFF").find_all("font",
                                                       color="#001DD0")
     pageHrefs = [
         "http://www.khanews.com/news/" + x.parent.get('href')
         for x in searchBox
     ]
     return pageHrefs
Example #6
0
 def getPageCount(self):
     searchUrl = "http://www.doctorsnews.co.kr/news/articleList.html?page=1&sc_section_code=&sc_sub_section_code=&sc_serial_code=&sc_add_section_code=&sc_add_sub_section_code=&sc_add_serial_code=&sc_area=A&sc_level=&sc_m_level=&sc_article_type=&sc_view_level=&sc_sdate=" + self.startDate + "&sc_edate=" + self.endDate + "&sc_serial_number=&sc_word=" + self.query + "&sc_word2=&sc_andor=OR&sc_order_by=I&view_type="
     soup = Soup.phantomjs(searchUrl)
     articleCnt = soup.find("tr", height="35").td.get_text()
     maxArticle = int(re.sub("\D", "", articleCnt))
     return math.floor(int(maxArticle / 25))
Example #7
0
 def getPageHrefs(self, count):
     searchUrl = "http://www.doctorstimes.com/news/articleList.html?page="+str(count)+"&sc_section_code=&sc_sub_section_code=&sc_serial_code=&sc_area=A&sc_level=&sc_article_type=&sc_view_level=&sc_sdate=&sc_edate=&sc_serial_number=&sc_word="+self.query+"&sc_word2=&sc_andor=&sc_order_by=E&view_type="
     soup = Soup.phantomjs(searchUrl,'html.parser')
     searchBox = soup.find_all('td',class_='list-titles list-pad-5')
     pageHrefs = ["http://www.doctorstimes.com/news/"+x.a.get('href') for x in searchBox]
     return pageHrefs
Example #8
0
 def getPageCount(self):
     searchUrl = "http://www.doctorstimes.com/news/articleList.html?page=1&sc_section_code=&sc_sub_section_code=&sc_serial_code=&sc_area=A&sc_level=&sc_article_type=&sc_view_level=&sc_sdate=&sc_edate=&sc_serial_number=&sc_word="+self.query+"&sc_word2=&sc_andor=&sc_order_by=E&view_type="
     soup = Soup.phantomjs(searchUrl)
     pageCount = int(re.sub("\D","",soup.select("#article-list > tbody > tr > td > table > tbody > tr:nth-of-type(1) > td > table > tbody > tr > td:nth-of-type(1)")[0].get_text()))
     return math.floor(pageCount/20)