Example #1
0
    def search(word, page=1):
        """调用百度百科的搜索,获取页面信息
        """
        url = Baike.host + "/search"
        get_data = {
            "word": word,
            "pn": (page - 1) * 10,
            "rn": 10,
            "enc": "utf8"
        }
        request = public.make_request(url, get_data=get_data)
        res = urllib2.urlopen(request, timeout=4)
        body = res.read()

        ret = []
        soup = BeautifulSoup(body, 'html.parser')
        for dd in soup.find_all("dd"):
            if dd and dd.a and dd.a.get_text() and dd.a.get_text().endswith(
                    u"_百度百科"):
                item = {
                    "title": dd.a.get_text()[0:-5],
                    "url": dd.a["href"],
                    "desc": dd.p.get_text(),
                }
                ret.append(item)
        return ret
Example #2
0
    def search(word, page=1):
        """调用百度百科的搜索,获取页面信息
        """
        url = Baike.host + "/search"
        get_data = {
            "word": word,
            "pn": (page - 1) * 10,
            "rn": 10,
            "enc": "utf8"
        }
        request = public.make_request(url, get_data=get_data)
        res = urllib2.urlopen(request, timeout=4)
        body = res.read()

        ret = []
        soup = BeautifulSoup(body, 'html.parser')
        for dd in soup.find_all("dd"):
            if dd and dd.a and dd.a.get_text() and dd.a.get_text().endswith(u"_百度百科"):
                item = {
                    "title": dd.a.get_text()[0:-5],
                    "url": dd.a["href"],
                    "desc": dd.p.get_text(),
                }
                ret.append(item)
        return ret
Example #3
0
 def checkCollege(url):
     """检查对应百科是不是百科高校
     """
     body = urllib2.urlopen(public.make_request(url)).read()
     soup = BeautifulSoup(body, 'html.parser')
     if soup.body["class"][-1] == "collegeSmall":
         return True
     else:
         return False
Example #4
0
 def checkCollege(url):
     """检查对应百科是不是百科高校
     """
     body = urllib2.urlopen(public.make_request(url)).read()
     soup = BeautifulSoup(body, 'html.parser')
     if soup.body["class"][-1] == "collegeSmall":
         return True
     else:
         return False
Example #5
0
 def college(page=1):
     url = Baike.host + "/wikitag/api/getlemmas"
     post_data = {
         "limit": 30,
         "timeout": 3000,
         "filterTags": [0, 0, 0, 0, 0, 0, 0],
         "tagId": 60829,
         "fromLemma": False,
         "contentLength": 40,
         "page": page - 1
     }
     request = public.make_request(url, post_data=post_data)
     res = urllib2.urlopen(request)
     res = json.loads(res.read())
     return res["lemmaList"]
Example #6
0
 def college(page=1):
     url = Baike.host + "/wikitag/api/getlemmas"
     post_data = {
         "limit":30,
         "timeout": 3000,
         "filterTags": [0, 0, 0, 0, 0, 0, 0],
         "tagId": 60829,
         "fromLemma": False,
         "contentLength": 40,
         "page": page-1
     }
     request = public.make_request(url, post_data=post_data)
     res = urllib2.urlopen(request)
     res = json.loads(res.read())
     return res["lemmaList"]
Example #7
0
 def detail(url):
     #TODO
     body = urllib2.urlopen(public.make_request(url)).read()
     soup = BeautifulSoup(body, 'html.parser')
     for div in soup.find_all("div", attrs={"class": "baseBox"}):
         print div
Example #8
0
 def detail(url):
     #TODO
     body = urllib2.urlopen(public.make_request(url)).read()
     soup = BeautifulSoup(body, 'html.parser')
     for div in soup.find_all("div", attrs={"class": "baseBox"}):
         print div