Beispiel #1
0
 def get_articles(self):
     column_name = self.get_column_name()
     scroll_loader = ScrollLoader(
         "get", "http://zhuanlan.zhihu.com/api/columns/" + column_name + "/posts?limit=10", 10)
     from Article import Article
     for response in scroll_loader.run():
         yield Article("http://zhuanlan.zhihu.com" + response)
Beispiel #2
0
 def get_articles(self):
     column_name = self.get_column_name()
     scroll_loader = ScrollLoader(
         "get", "http://zhuanlan.zhihu.com/api/columns/" + column_name +
         "/posts?limit=10", 10)
     from Article import Article
     for response in scroll_loader.run():
         yield Article("http://zhuanlan.zhihu.com" + response)
Beispiel #3
0
 def get_followers(self):
     follower_page_url = self.url + '/followers'
     r = requests.get(follower_page_url)
     text = r.text
     soup = BeautifulSoup(text)
     hash_id = get_hash_id(soup)
     _xsrf = get_xsrf(soup)
     scroll_loader = ScrollLoader(
         "post", "http://www.zhihu.com/node/ProfileFollowersListV2", 20, _xsrf, hash_id)
     for response in scroll_loader.run():
         for each in response:
             text += each
     follower_url_list = re.findall(
         r'<a[^>]+href=\"([^>]*)\"\x20class=\"zg-link\"', text)
     for url in follower_url_list:
         yield User(url)
Beispiel #4
0
 def get_followeing_topics(self):
     url = self.url + '/topics'
     r = requests.get(url)
     soup = BeautifulSoup(r.content)
     _xsrf = get_xsrf(soup)
     text = r.text
     scroll_loader = ScrollLoader("post", url, 20, _xsrf=_xsrf, start=0)
     for response in scroll_loader.run():
         for each in response:
             text += each
     topic_list = re.findall(
         r'<a\x20class=\"zm-list-avatar-link\"\x20href=\"([^>]*)\">', text)
     from Topic import Topic
     for url in topic_list:
         #if not topicBloom.is_element_exist(url):
         #    topicBloom.insert_element(url)
         yield Topic("http://www.zhihu.com" + url)
Beispiel #5
0
 def get_followers(self):
     follower_page_url = self.url + '/followers'
     r = requests.get(follower_page_url)
     text = r.text
     soup = BeautifulSoup(text)
     hash_id = get_hash_id(soup)
     _xsrf = get_xsrf(soup)
     scroll_loader = ScrollLoader(
         "post", "http://www.zhihu.com/node/ProfileFollowersListV2", 20,
         _xsrf, hash_id)
     for response in scroll_loader.run():
         for each in response:
             text += each
     follower_url_list = re.findall(
         r'<a[^>]+href=\"([^>]*)\"\x20class=\"zg-link\"', text)
     for url in follower_url_list:
         yield User(url)
Beispiel #6
0
 def get_followeing_topics(self):
     url = self.url + '/topics'
     r = requests.get(url)
     soup = BeautifulSoup(r.content)
     _xsrf = get_xsrf(soup)
     text = r.text
     scroll_loader = ScrollLoader("post", url, 20, _xsrf=_xsrf, start=0)
     for response in scroll_loader.run():
         for each in response:
             text += each
     topic_list = re.findall(
         r'<a\x20class=\"zm-list-avatar-link\"\x20href=\"([^>]*)\">', text)
     from Topic import Topic
     for url in topic_list:
         #if not topicBloom.is_element_exist(url):
         #    topicBloom.insert_element(url)
         yield Topic("http://www.zhihu.com" + url)
Beispiel #7
0
 def get_followers(self):
     url = self.url + '/followers'
     r = requests.get(url)
     soup = BeautifulSoup(r.content)
     _xsrf = get_xsrf(soup)
     text = r.text
     scroll_loader = ScrollLoader("post", url, 20, _xsrf=_xsrf, start=0)
     for response in scroll_loader.run():
         for each in response:
             text += each
     user_list = re.findall(
         r'<a[^>]*\nclass=\"zm-item-link-avatar\"\nhref=\"([^>]*)\">', text)
     from User import User
     for url in user_list:
         user_url = "http://www.zhihu.com" + url
         #if not userBloom.is_element_exist(user_url):
         #    userBloom.insert_element(user_url)
         yield User(user_url)