Esempio n. 1
0
 def get_question_num(self):
     r = requests.get(self.url + "/questions")
     soup1 = BeautifulSoup(r.content)
     try:
         pages = soup1.find("div", class_ = "zm-invite-pager").find_all("span")
         total_pages = pages[len(pages) - 2].find("a").string
         tmp = (int(total_pages) - 1) * 20  # 每页20个,除最后一页以外
         r = requests.get(self.url + "/questions?page=" + total_pages)
         soup2 = BeautifulSoup(r.content)
         question_on_last_page = soup2.find_all("div", class_ = "feed-item feed-item-hook question-item")
         question_num = tmp + len(question_on_last_page)
         return question_num
     except AttributeError:
         return 0
Esempio n. 2
0
 def get_questions(self):
     url = self.url + "/questions?page="
     url_head = "http://www.zhihu.com"
     r = requests.get(url + '1')
     soup = BeautifulSoup(r.content)
     pages = soup.find("div", class_="zm-invite-pager").find_all("span")
     total_pages = int(pages[len(pages) - 2].find("a").string)
     from Question import Question
     for i in range(1, total_pages + 1):
         r = requests.get(url + '%d' % i)
         soup = BeautifulSoup(r.content)
         question_on_this_page = soup.find_all("a", class_="question_link")
         for question_tag in question_on_this_page:
             question_url = url_head + question_tag["href"]
             yield Question(question_url)
Esempio n. 3
0
 def get_questions(self):
     url = self.url + "/questions?page="
     url_head = "http://www.zhihu.com"
     r = requests.get(url + '1')
     soup = BeautifulSoup(r.content)
     pages = soup.find("div", class_ = "zm-invite-pager").find_all("span")
     total_pages = int(pages[len(pages) - 2].find("a").string)
     from Question import Question
     for i in range(1, total_pages + 1):
         r = requests.get(url + '%d' % i)
         soup = BeautifulSoup(r.content)
         question_on_this_page = soup.find_all("a", class_ = "question_link")
         for question_tag in question_on_this_page:
             question_url = url_head + question_tag["href"]
             yield Question(question_url)
Esempio n. 4
0
    def run(self):
        while True:
            self.offset += self.add
            # http://zhuanlan.zhihu.com/api/columns/
            if self.http_method == 'get':
                r = requests.get(self.url + "&offset={0}".format(self.offset))
                if not json.loads(r.text):
                    break
                else:
                    for each_article in json.loads(r.text):
                        yield each_article['url']

            # http://www.zhihu.com/node/ProfileFollowersListV2
            if self.http_method == 'post' and self.start == None:
                params = """\"offset":{0},"order_by":"created","hash_id":"{1}\"""".format(
                    self.offset, self.hash_id)
                payload = {'method': 'next',
                           'params': "{" + params + "}", '_xsrf': self._xsrf}
                r = requests.post(self.url, data=payload)
                self.result = json.loads(r.text)['msg']
                if not self.result:
                    break
                else:
                    yield self.result

            # user_url/topics
            if self.http_method == 'post' and self.start != None:
                payload = {'start': self.start,
                           'offset': self.offset, '_xsrf': self._xsrf}
                r = requests.post(self.url, data=payload)
                self.result = json.loads(r.text)['msg']
                if self.result[0] == 0:
                    break
                else:
                    yield self.result[1:]
Esempio n. 5
0
 def get_answers(self):
     soup = self.soup
     if soup.find("div", class_="zm-invite-pager") is not None:
         total_pages = soup.find("div",
                                 class_="zm-invite-pager").find_all("span")
         total_pages = int(total_pages[len(total_pages) - 2].string)
         for i in range(1, total_pages):
             url = self.url + "?page=%d" % i
             r = requests.get(url)
             soup = BeautifulSoup(r.content)
             tags = soup.find("div", id="zh-list-answer-wrap").find_all(
                 "div", class_="zm-item")
             for tag in tags:
                 question_part = tag.find("h2").find("a")["href"]
                 answer_part = tag.find(
                     "div", class_="zm-item-answer ")["data-atoken"]
                 answer_url = "http://www.zhihu.com" + question_part + "/answer/" + answer_part
                 from Answer import Answer
                 yield Answer(answer_url)
     else:
         tags = soup.find("div", id="zh-list-answer-wrap").find_all(
             "div", class_="zm-item")
         for tag in tags:
             question_part = tag.find("h2").find("a")["href"]
             answer_part = tag.find("div",
                                    class_="zm-item-answer ")["data-atoken"]
             answer_url = "http://www.zhihu.com" + question_part + "/answer/" + answer_part
             from Answer import Answer
             yield Answer(answer_url)
Esempio n. 6
0
 def get_question_num(self):
     r = requests.get(self.url + "/questions")
     soup1 = BeautifulSoup(r.content)
     try:
         pages = soup1.find("div",
                            class_="zm-invite-pager").find_all("span")
         total_pages = pages[len(pages) - 2].find("a").string
         tmp = (int(total_pages) - 1) * 20  # 每页20个,除最后一页以外
         r = requests.get(self.url + "/questions?page=" + total_pages)
         soup2 = BeautifulSoup(r.content)
         question_on_last_page = soup2.find_all(
             "div", class_="feed-item feed-item-hook question-item")
         question_num = tmp + len(question_on_last_page)
         return question_num
     except AttributeError:
         return 0
Esempio n. 7
0
    def get_answers(self):
        soup = self.soup
        if soup.find("div", class_="zm-invite-pager") is not None:
            total_pages = soup.find("div", class_="zm-invite-pager").find_all("span")
            total_pages = int(total_pages[len(total_pages) - 2].string)
            for i in range(1, total_pages):
                url = self.url + "?page=%d" % i
                r = requests.get(url)
                soup = BeautifulSoup(r.content)
                tags = soup.find("div", id="zh-list-answer-wrap").find_all("div", class_="zm-item")
                for tag in tags:
                    question_part = tag.find("h2").find("a")["href"]
                    answer_part = tag.find("div", class_="zm-item-answer ")["data-atoken"]
                    answer_url = "http://www.zhihu.com" + question_part + "/answer/" + answer_part
                    from Answer import Answer

                    yield Answer(answer_url)
        else:
            tags = soup.find("div", id="zh-list-answer-wrap").find_all("div", class_="zm-item")
            for tag in tags:
                question_part = tag.find("h2").find("a")["href"]
                answer_part = tag.find("div", class_="zm-item-answer ")["data-atoken"]
                answer_url = "http://www.zhihu.com" + question_part + "/answer/" + answer_part
                from Answer import Answer

                yield Answer(answer_url)
Esempio n. 8
0
def pull_bitcoin_index():
	global bitcoin_index, viable_currencies
	bitcoin_index = requests.get(sources.bitcoin_index).json()
	if viable_currencies is None:
		viable_currencies = bitcoin_index.keys()
		for currency in viable_currencies:
			viable_currencies_string += currency + ','
		viable_currencies_string = viable_currencies_string[:-1]
Esempio n. 9
0
def pull_currency_index():
	global currency_index, currency_refresh_last_update, trade_available
	new_index = requests.get(sources.currency_index)
	if currency_index != new_index.json():
		time_diff = time.now() - currency_refresh_last_update
		currency_refresh_last_update = time.now()
		if time_diff <= currency_refresh_tolerance:
			currency_index = new_index.json()
			trade_available = True
Esempio n. 10
0
 def get_columns(self):
     post_url = self.url + '/posts'
     r = requests.get(post_url)
     soup = BeautifulSoup(r.content)
     for each_column in soup.find_all("a", "avatar-link"):
         #if not collumnBloom.is_element_exist(each_column['href']):
         #    collumnBloom.insert_element(each_column['href'])
         from Column import Column
         yield Column(each_column['href'])
Esempio n. 11
0
 def get_father(self):
     url = self.url+ "/organize"
     r = requests.get(url)
     soup = BeautifulSoup(r.content)
     parrent_div = soup.find(id="zh-topic-organize-parent-editor")
     parrent_url = parrent_div.find_all("a")
     url_head = "http://www.zhihu.com/topic/"
     for item in parrent_url:
         yield Topic(url_head + item["data-token"])
Esempio n. 12
0
 def get_columns(self):
     post_url = self.url + '/posts'
     r = requests.get(post_url)
     soup = BeautifulSoup(r.content)
     for each_column in soup.find_all("a", "avatar-link"):
         #if not collumnBloom.is_element_exist(each_column['href']):
         #    collumnBloom.insert_element(each_column['href'])
         from Column import Column
         yield Column(each_column['href'])
Esempio n. 13
0
 def get_child(self):
     url = self.url+ "/organize"
     r = requests.get(url)
     soup = BeautifulSoup(r.content)
     child_div = soup.find(id="zh-topic-organize-child-editor")
     child_url = child_div.find_all("a")
     url_head = "http://www.zhihu.com/topic/"
     for item in child_url:
         yield Topic(url_head + item["data-token"])
Esempio n. 14
0
 def get_father(self):
     url = self.url + "/organize"
     r = requests.get(url)
     soup = BeautifulSoup(r.content)
     parrent_div = soup.find(id="zh-topic-organize-parent-editor")
     parrent_url = parrent_div.find_all("a")
     url_head = "http://www.zhihu.com/topic/"
     for item in parrent_url:
         yield Topic(url_head + item["data-token"])
Esempio n. 15
0
 def get_child(self):
     url = self.url + "/organize"
     r = requests.get(url)
     soup = BeautifulSoup(r.content)
     child_div = soup.find(id="zh-topic-organize-child-editor")
     child_url = child_div.find_all("a")
     url_head = "http://www.zhihu.com/topic/"
     for item in child_url:
         yield Topic(url_head + item["data-token"])
Esempio n. 16
0
 def get_edit_time(self):
     url = self.url + "/log"
     r = requests.get(url)
     soup = BeautifulSoup(r.content)
     logs = soup.find_all("div",class_="zm-item")
     timelist = []
     for log in logs:
         timelist.append(log.find("time").string)
     timelist.sort()
     return timelist
Esempio n. 17
0
 def search_xsrf(cls):
     url = "http://www.zhihu.com/"
     from Requests import requests
     r = requests.get(url)
     if int(r.status_code) != 200:
         raise NetworkError(u"验证码请求失败")
     results = re.compile(r"<input\stype=\"hidden\"\sname=\"_xsrf\"\svalue=\"(\S+)\"", re.DOTALL).findall(r.text)
     if len(results) < 1:
         Logging.info(u"提取XSRF 代码失败")
         return None
     return results[0]
Esempio n. 18
0
 def search_xsrf(cls):
     url = "http://www.zhihu.com/"
     from Requests import requests
     r = requests.get(url)
     if int(r.status_code) != 200:
         raise NetworkError(u"验证码请求失败")
     results = re.compile(
         r"<input\stype=\"hidden\"\sname=\"_xsrf\"\svalue=\"(\S+)\"",
         re.DOTALL).findall(r.text)
     if len(results) < 1:
         Logging.info(u"提取XSRF 代码失败")
         return None
     return results[0]
Esempio n. 19
0
 def islogin(cls):
     # check session
     url = "https://www.zhihu.com/settings/profile"
     from Requests import requests
     r = requests.get(url, allow_redirects=False)
     status_code = int(r.status_code)
     if status_code == 301 or status_code == 302:
         # 未登录
         return False
     elif status_code == 200:
         return True
     else:
         Logging.warn(u"网络故障")
         return None
Esempio n. 20
0
 def islogin(cls):
     # check session
     url = "https://www.zhihu.com/settings/profile"
     from Requests import requests
     r = requests.get(url, allow_redirects = False)
     status_code = int(r.status_code)
     if status_code == 301 or status_code == 302:
         # 未登录
         return False
     elif status_code == 200:
         return True
     else:
         Logging.warn(u"网络故障")
         return None
Esempio n. 21
0
 def get_asks(self):
     asks_num = self.get_ask_num()
     if asks_num == 0:
         return
     else:
         for i in xrange((asks_num - 1) / 20 + 1):
             ask_url = self.url + "/asks?page=" + str(i + 1)
             r = requests.get(ask_url)
             soup = BeautifulSoup(r.content)
             for question in soup.find_all("a", class_="question_link"):
                 url = "http://www.zhihu.com" + question["href"]
                 #if not questionBloom.is_element_exist(url):
                 #    questionBloom.insert_element(url)
                 from Question import Question
                 yield Question(url)
Esempio n. 22
0
 def get_answers(self):
     answers_num = self.get_answer_num()
     if answers_num == 0:
         return
     else:
         for i in xrange((answers_num - 1) / 20 + 1):
             answer_url = self.url + "/answers?page=" + str(i + 1)
             r = requests.get(answer_url)
             soup = BeautifulSoup(r.content)
             from Answer import Answer
             for answer_tag in soup.find_all("a", class_="question_link"):
                 answer_url = 'http://www.zhihu.com' + answer_tag["href"]
                 #if not answerBloom.is_element_exist(answer_url):
                 #    answerBloom.insert_element(answer_url)
                 yield Answer(answer_url)
Esempio n. 23
0
 def get_answers(self):
     answers_num = self.get_answer_num()
     if answers_num == 0:
         return
     else:
         for i in xrange((answers_num - 1) / 20 + 1):
             answer_url = self.url + "/answers?page=" + str(i + 1)
             r = requests.get(answer_url)
             soup = BeautifulSoup(r.content)
             from Answer import Answer
             for answer_tag in soup.find_all("a", class_="question_link"):
                 answer_url = 'http://www.zhihu.com' + answer_tag["href"]
                 #if not answerBloom.is_element_exist(answer_url):
                 #    answerBloom.insert_element(answer_url)
                 yield Answer(answer_url)
Esempio n. 24
0
 def get_asks(self):
     asks_num = self.get_ask_num()
     if asks_num == 0:
         return
     else:
         for i in xrange((asks_num - 1) / 20 + 1):
             ask_url = self.url + "/asks?page=" + str(i + 1)
             r = requests.get(ask_url)
             soup = BeautifulSoup(r.content)
             for question in soup.find_all("a", class_="question_link"):
                 url = "http://www.zhihu.com" + question["href"]
                 #if not questionBloom.is_element_exist(url):
                 #    questionBloom.insert_element(url)
                 from Question import Question
                 yield Question(url)
Esempio n. 25
0
 def get_followers(self):
     follower_page_url = self.url + '/followers'
     r = requests.get(follower_page_url)
     text = r.text
     soup = BeautifulSoup(text)
     hash_id = get_hash_id(soup)
     _xsrf = get_xsrf(soup)
     scroll_loader = ScrollLoader(
         "post", "http://www.zhihu.com/node/ProfileFollowersListV2", 20, _xsrf, hash_id)
     for response in scroll_loader.run():
         for each in response:
             text += each
     follower_url_list = re.findall(
         r'<a[^>]+href=\"([^>]*)\"\x20class=\"zg-link\"', text)
     for url in follower_url_list:
         yield User(url)
Esempio n. 26
0
 def parser(self):
     r = requests.get(self.api_url)
     response = json.loads(r.text)
     self.rating = response['rating']
     self.title = response['title']
     self.titleImage = response['titleImage']
     self.topics = response['topics']
     self.author_url = response['author']['profileUrl']
     self.content = response['content']
     self.snapshotUrl = response['snapshotUrl']
     self.publishedTime = response['publishedTime']
     self.column = response['column']['slug']
     self.summary = response['summary']
     self.commentsCount = response['commentsCount']
     self.likesCount = response['likesCount']
     self.comment_url = response['links']['comments']
Esempio n. 27
0
 def parser(self):
     r = requests.get(self.api_url)
     response = json.loads(r.text)
     self.rating = response['rating']
     self.title = response['title']
     self.titleImage = response['titleImage']
     self.topics = response['topics']
     self.author_url = response['author']['profileUrl']
     self.content = response['content']
     self.snapshotUrl = response['snapshotUrl']
     self.publishedTime = response['publishedTime']
     self.column = response['column']['slug']
     self.summary = response['summary']
     self.commentsCount = response['commentsCount']
     self.likesCount = response['likesCount']
     self.comment_url = response['links']['comments']
Esempio n. 28
0
 def get_followeing_topics(self):
     url = self.url + '/topics'
     r = requests.get(url)
     soup = BeautifulSoup(r.content)
     _xsrf = get_xsrf(soup)
     text = r.text
     scroll_loader = ScrollLoader("post", url, 20, _xsrf=_xsrf, start=0)
     for response in scroll_loader.run():
         for each in response:
             text += each
     topic_list = re.findall(
         r'<a\x20class=\"zm-list-avatar-link\"\x20href=\"([^>]*)\">', text)
     from Topic import Topic
     for url in topic_list:
         #if not topicBloom.is_element_exist(url):
         #    topicBloom.insert_element(url)
         yield Topic("http://www.zhihu.com" + url)
Esempio n. 29
0
 def get_comments(self):
     url = "http://www.zhihu.com/node/QuestionCommentBoxV2?params={" + "\"question_id\":{0}".format(
         self.get_data_resourceid()) + "}"
     print url
     r = requests.get(url)
     soup = BeautifulSoup(r.content)
     for comment_div in soup.find_all("div", class_="zm-item-comment"):
         author_url = comment_div.find("a", class_="zg-link")['href']
         content = comment_div.find(
             "div", class_="zm-comment-content").next_element
         date = comment_div.find("span", class_="date").next_element
         like_num = comment_div.find(
             "span", class_="like-num ").next_element
         # Comment(author_url,question_url,answer_url,content,date,like_num)
         from Comment import Comment
         # TODO: comment bloom
         yield Comment(author_url, self.url, None, content, date, like_num)
Esempio n. 30
0
 def get_followeing_topics(self):
     url = self.url + '/topics'
     r = requests.get(url)
     soup = BeautifulSoup(r.content)
     _xsrf = get_xsrf(soup)
     text = r.text
     scroll_loader = ScrollLoader("post", url, 20, _xsrf=_xsrf, start=0)
     for response in scroll_loader.run():
         for each in response:
             text += each
     topic_list = re.findall(
         r'<a\x20class=\"zm-list-avatar-link\"\x20href=\"([^>]*)\">', text)
     from Topic import Topic
     for url in topic_list:
         #if not topicBloom.is_element_exist(url):
         #    topicBloom.insert_element(url)
         yield Topic("http://www.zhihu.com" + url)
Esempio n. 31
0
 def get_followers(self):
     follower_page_url = self.url + '/followers'
     r = requests.get(follower_page_url)
     text = r.text
     soup = BeautifulSoup(text)
     hash_id = get_hash_id(soup)
     _xsrf = get_xsrf(soup)
     scroll_loader = ScrollLoader(
         "post", "http://www.zhihu.com/node/ProfileFollowersListV2", 20,
         _xsrf, hash_id)
     for response in scroll_loader.run():
         for each in response:
             text += each
     follower_url_list = re.findall(
         r'<a[^>]+href=\"([^>]*)\"\x20class=\"zg-link\"', text)
     for url in follower_url_list:
         yield User(url)
Esempio n. 32
0
 def get_followers(self):
     url = self.url + '/followers'
     r = requests.get(url)
     soup = BeautifulSoup(r.content)
     _xsrf = get_xsrf(soup)
     text = r.text
     scroll_loader = ScrollLoader("post", url, 20, _xsrf=_xsrf, start=0)
     for response in scroll_loader.run():
         for each in response:
             text += each
     user_list = re.findall(
         r'<a[^>]*\nclass=\"zm-item-link-avatar\"\nhref=\"([^>]*)\">', text)
     from User import User
     for url in user_list:
         user_url = "http://www.zhihu.com" + url
         #if not userBloom.is_element_exist(user_url):
         #    userBloom.insert_element(user_url)
         yield User(user_url)
Esempio n. 33
0
 def get_upvoters(self):  # 匿名用户先忽略了
     soup = self.soup
     data_aid = soup.find("div", class_="zm-item-answer  zm-item-expanded")["data-aid"]
     request_url = 'http://www.zhihu.com/node/AnswerFullVoteInfoV2'
     r = requests.get(request_url, params={"params": "{\"answer_id\":\"%d\"}" % int(data_aid)})
     soup = BeautifulSoup(r.content)
     voters_info = soup.find_all("span")[1:-1]
     if len(voters_info) == 0:
         return
     else:
         from User import User
         for voter_info in voters_info:
             if voter_info.find('a'):
                 voter_url = "http://www.zhihu.com" + str(voter_info.a["href"])
                 # Bloom
                 #if not userBloom.is_element_exist(voter_url):
                 #   userBloom.is_element_exist(voter_url)
                 yield User(voter_url)
Esempio n. 34
0
    def run(self):
        while True:
            self.offset += self.add
            # http://zhuanlan.zhihu.com/api/columns/
            if self.http_method == 'get':
                r = requests.get(self.url + "&offset={0}".format(self.offset))
                if not json.loads(r.text):
                    break
                else:
                    for each_article in json.loads(r.text):
                        yield each_article['url']

            # http://www.zhihu.com/node/ProfileFollowersListV2
            if self.http_method == 'post' and self.start == None:
                params = """\"offset":{0},"order_by":"created","hash_id":"{1}\"""".format(
                    self.offset, self.hash_id)
                payload = {
                    'method': 'next',
                    'params': "{" + params + "}",
                    '_xsrf': self._xsrf
                }
                r = requests.post(self.url, data=payload)
                self.result = json.loads(r.text)['msg']
                if not self.result:
                    break
                else:
                    yield self.result

            # user_url/topics
            if self.http_method == 'post' and self.start != None:
                payload = {
                    'start': self.start,
                    'offset': self.offset,
                    '_xsrf': self._xsrf
                }
                r = requests.post(self.url, data=payload)
                self.result = json.loads(r.text)['msg']
                if self.result[0] == 0:
                    break
                else:
                    yield self.result[1:]
Esempio n. 35
0
    def download_captcha(cls):
        url = "http://www.zhihu.com/captcha.gif"
        from Requests import requests
        r = requests.get(url, params={"r": random.random()})
        if int(r.status_code) != 200:
            raise NetworkError(u"验证码请求失败")
        image_name = u"verify." + r.headers['content-type'].split("/")[1]
        open(image_name, "wb").write(r.content)
        """
            System platform: https://docs.python.org/2/library/platform.html
        """
        Logging.info(u"正在调用外部程序渲染验证码 ... ")
        if platform.system() == "Linux":
            Logging.info(u"Command: xdg-open %s &" % image_name)
            os.system("xdg-open %s &" % image_name)
        elif platform.system() == "Darwin":
            Logging.info(u"Command: open %s &" % image_name)
            os.system("open %s &" % image_name)
        elif platform.system() == "SunOS":
            os.system("open %s &" % image_name)
        elif platform.system() == "FreeBSD":
            os.system("open %s &" % image_name)
        elif platform.system() == "Unix":
            os.system("open %s &" % image_name)
        elif platform.system() == "OpenBSD":
            os.system("open %s &" % image_name)
        elif platform.system() == "NetBSD":
            os.system("open %s &" % image_name)
        elif platform.system() == "Windows":
            os.system("%s" % image_name)
        else:
            Logging.info(u"我们无法探测你的作业系统,请自行打开验证码 %s 文件,并输入验证码。" %
                         os.path.join(os.getcwd(), image_name))

        sys.stdout.write(termcolor.colored(u"请输入验证码: ", "cyan"))
        captcha_code = raw_input()
        return captcha_code
Esempio n. 36
0
    def download_captcha(cls):
        url = "http://www.zhihu.com/captcha.gif"
        from Requests import requests
        r = requests.get(url, params = {"r": random.random()})
        if int(r.status_code) != 200:
            raise NetworkError(u"验证码请求失败")
        image_name = u"verify." + r.headers['content-type'].split("/")[1]
        open(image_name, "wb").write(r.content)
        """
            System platform: https://docs.python.org/2/library/platform.html
        """
        Logging.info(u"正在调用外部程序渲染验证码 ... ")
        if platform.system() == "Linux":
            Logging.info(u"Command: xdg-open %s &" % image_name)
            os.system("xdg-open %s &" % image_name)
        elif platform.system() == "Darwin":
            Logging.info(u"Command: open %s &" % image_name)
            os.system("open %s &" % image_name)
        elif platform.system() == "SunOS":
            os.system("open %s &" % image_name)
        elif platform.system() == "FreeBSD":
            os.system("open %s &" % image_name)
        elif platform.system() == "Unix":
            os.system("open %s &" % image_name)
        elif platform.system() == "OpenBSD":
            os.system("open %s &" % image_name)
        elif platform.system() == "NetBSD":
            os.system("open %s &" % image_name)
        elif platform.system() == "Windows":
            os.system("%s" % image_name)
        else:
            Logging.info(u"我们无法探测你的作业系统,请自行打开验证码 %s 文件,并输入验证码。" % os.path.join(os.getcwd(), image_name))

        sys.stdout.write(termcolor.colored(u"请输入验证码: ", "cyan"))
        captcha_code = raw_input()
        return captcha_code
Esempio n. 37
0
 def parser(self):
     try:
         r = requests.get(self.url)
         self.soup = BeautifulSoup(r.content)
     except:
         self.parser()
Esempio n. 38
0
 def get_comments(self):
     r = requests.get(self.comment_url)
Esempio n. 39
0
 def parser(self):
     r = requests.get(self.url)
     self.soup = BeautifulSoup(r.content)
Esempio n. 40
0
 def get_comments(self):
     r = requests.get(self.comment_url)