def setUp(self): self.PY2 = sys.version_info[0] == 2 if not self.PY2: self.text_type = [str] else: self.text_type = [unicode, str] self.story = Story.fromid(6115341) # https://news.ycombinator.com/item?id=6115341
def setUp(self): httpretty.HTTPretty.enable() httpretty.register_uri(httpretty.GET, 'https://news.ycombinator.com/', body=get_content('index.html')) httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL, 'item?id=7324236'), body=get_content('7324236.html')) httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL, 'x?fnid=0MonpGsCkcGbA7rcbd2BAP'), body=get_content('7324236-2.html')) httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL, 'x?fnid=jyhCSQtM6ymFazFplS4Gpf'), body=get_content('7324236-3.html')) httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL, 'x?fnid=s3NA4qB6zMT3KHVk1x2MTG'), body=get_content('7324236-4.html')) httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL, 'x?fnid=pFxm5XBkeLtmphVejNZWlo'), body=get_content('7324236-5.html')) story = Story.fromid(7324236) self.comments = story.get_comments()
def setUp(self): httpretty.HTTPretty.enable() httpretty.register_uri(httpretty.GET, 'https://news.ycombinator.com/', body=get_content('index.html')) httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL, 'item?id=7324236'), body=get_content('7324236.html')) httpretty.register_uri( httpretty.GET, '%s/%s' % (constants.BASE_URL, 'x?fnid=0MonpGsCkcGbA7rcbd2BAP'), body=get_content('7324236-2.html')) httpretty.register_uri( httpretty.GET, '%s/%s' % (constants.BASE_URL, 'x?fnid=jyhCSQtM6ymFazFplS4Gpf'), body=get_content('7324236-3.html')) httpretty.register_uri( httpretty.GET, '%s/%s' % (constants.BASE_URL, 'x?fnid=s3NA4qB6zMT3KHVk1x2MTG'), body=get_content('7324236-4.html')) httpretty.register_uri( httpretty.GET, '%s/%s' % (constants.BASE_URL, 'x?fnid=pFxm5XBkeLtmphVejNZWlo'), body=get_content('7324236-5.html')) story = Story.fromid(7324236) self.comments = story.get_comments()
def setUp(self): self.PY2 = sys.version_info[0] == 2 if not self.PY2: self.text_type = [str] else: self.text_type = [unicode, str] self.story = Story.fromid( 6115341) # https://news.ycombinator.com/item?id=6115341
def fromid(self, item_id): """ Initializes an instance of Story for given item_id. It is assumed that the story referenced by item_id is valid and does not raise any HTTP errors. item_id is an int. """ if not item_id: raise Exception('Need an item_id for a story') # get details about a particular story soup = get_item_soup(item_id) # this post has not been scraped, so we explititly get all info story_id = item_id rank = -1 info_table = soup.findChildren('table')[ 2] # to extract meta information about the post info_rows = info_table.findChildren( 'tr') # [0] = title, domain, [1] = points, user, time, comments title_row = info_rows[0].findChildren('td')[1] # title, domain title = title_row.find('a').text try: domain = title_row.find('span').string[2:-2] # domain found is_self = False link = title_row.find('a').get('href') except AttributeError: # self post domain = BASE_URL is_self = True link = '%s/item?id=%s' % (BASE_URL, item_id) meta_row = info_rows[1].findChildren( 'td')[1].contents # points, user, time, comments # [<span id="score_7024626">789 points</span>, u' by ', <a href="user?id=endianswap">endianswap</a>, # u' 8 hours ago | ', <a href="item?id=7024626">238 comments</a>] points = int( re.match(r'^(\d+)\spoint.*', meta_row[0].text).groups()[0]) submitter = meta_row[2].text submitter_profile = '%s/%s' % (BASE_URL, meta_row[2].get('href')) published_time = ' '.join(meta_row[3].strip().split()[:3]) comments_link = '%s/item?id=%s' % (BASE_URL, item_id) try: num_comments = int( re.match(r'(\d+)\s.*', meta_row[4].text).groups()[0]) except AttributeError: num_comments = 0 story = Story(rank, story_id, title, link, domain, points, submitter, published_time, submitter_profile, num_comments, comments_link, is_self) return story
def comments(story_id): """ show comments for the story """ comments = Story.fromid(story_id).get_comments() if not comments: echo(style('no coments for story found!', fg='red')) for comment in comments: echo(style(comment.time_ago.center(15), fg='magenta'), nl=False) echo('by ' + style(str(comment.user), fg='cyan')) echo(comment.body)
def setUp(self): httpretty.HTTPretty.enable() httpretty.register_uri(httpretty.GET, 'https://news.ycombinator.com/', body=get_content('index.html')) httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL, 'item?id=6115341'), body=get_content('6115341.html')) # check py version self.PY2 = sys.version_info[0] == 2 self.hn = HN() self.story = Story.fromid(6115341)
def setUp(self): httpretty.HTTPretty.enable() httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL, 'item?id=6115341'), body=get_content('6115341.html')) self.PY2 = sys.version_info[0] == 2 if not self.PY2: self.text_type = [str] else: self.text_type = [unicode, str] # https://news.ycombinator.com/item?id=6115341 self.story = Story.fromid(6115341)
def test_get_nested_comments_old_story(self): self.story = Story.fromid(7410260) self.comments = self.story.get_comments() comment = self.comments[0].body self.assertEqual(len(comment), 2131)
# print 10 latest stories for story in hn.get_stories(story_type='newest', limit=10): print((story.title.encode('utf-8'))) print(('*' * 50)) print('') # for each story on front page, print top comment for story in hn.get_stories(): print((story.title.encode('utf-8'))) comments = story.get_comments() print((comments[0] if len(comments) > 0 else None)) print(('*' * 10)) # print top 5 comments with nesting for top 5 stories for story in hn.get_stories(story_type='best', limit=5): print((story.title.encode('utf-8'))) comments = story.get_comments() if len(comments) > 0: for comment in comments[:5]: print(('\t' * (comment.level + 1) + comment.body[:min(30, len(comment.body))])) print(('*' * 10)) # get the comments from any custom story story = Story.fromid(6374031) comments = story.get_comments()
print(comments[0] if len(comments) > 0 else None) print('*' * 10) # print top 5 comments with nesting for top 5 stories for story in hn.get_stories(story_type='best', limit=5): print(story.title) comments = story.get_comments() if len(comments) > 0: for comment in comments[:5]: print('\t' * (comment.level + 1) + comment.body[:min(30, len(comment.body))]) print('*' * 10) # get the comments from any custom story story = Story.fromid(6374031) comments = story.get_comments() ########NEW FILE######## __FILENAME__ = test_leaders import unittest from hn import HN, Story from hn import utils, constants from test_utils import get_content, PRESETS_DIR import httpretty class TestGetLeaders(unittest.TestCase):
def test_get_nested_comments(self): self.story = Story.fromid(7404389) self.comments = self.story.get_comments() comment = self.comments[0].body self.assertTrue(len(comment) >= 5508)
def comment(story_id): """ comment story on HackerNews """ story = Story.fromid(story_id) click.launch(story.comments_link)
def setUp(self): # check py version self.PY2 = sys.version_info[0] == 2 self.hn = HN() self.story = Story.fromid(6374031)
def go(story_id): """ go to the story on HackerNews """ story = Story.fromid(story_id) click.launch(story.link)
def setUp(self): self.story = Story.fromid(7324236)
def _build_story(self, all_rows): """ Builds and returns a list of stories (dicts) from the passed source. """ all_stories = [] # list to hold all stories for (info, detail) in all_rows: #-- Get the into about a story --# info_cells = info.findAll('td') # split in 3 cells rank = int(info_cells[0].string[:-1]) title = '%s' % info_cells[2].find('a').string link = info_cells[2].find('a').get('href') is_self = False # by default all stories are linking posts if link.find( 'item?id=' ) is -1: # the link doesn't contains "http" meaning an internal link domain = info_cells[2].find('span').string[ 2:-2] # slice " (abc.com) " else: link = '%s/%s' % (BASE_URL, link) domain = BASE_URL is_self = True #-- Get the into about a story --# #-- Get the detail about a story --# detail_cell = detail.findAll('td')[ 1] # split in 2 cells, we need only second detail_concern = detail_cell.contents # list of details we need, 5 count num_comments = -1 if re.match(r'^(\d+)\spoint.*', detail_concern[0].string) is not None: # can be a link or self post points = int( re.match(r'^(\d+)\spoint.*', detail_concern[0].string).groups()[0]) submitter = '%s' % detail_concern[2].string submitter_profile = '%s/%s' % (BASE_URL, detail_concern[2].get('href')) published_time = ' '.join( detail_concern[3].strip().split()[:3]) comment_tag = detail_concern[4] story_id = int( re.match(r'.*=(\d+)', comment_tag.get('href')).groups()[0]) comments_link = '%s/item?id=%d' % (BASE_URL, story_id) comment_count = re.match(r'(\d+)\s.*', comment_tag.string) try: # regex matched, cast to int num_comments = int(comment_count.groups()[0]) except AttributeError: # did not match, assign 0 num_comments = 0 else: # this is a job post points = 0 submitter = '' submitter_profile = '' published_time = '%s' % detail_concern[0] comment_tag = '' try: story_id = int(re.match(r'.*=(\d+)', link).groups()[0]) except AttributeError: story_id = -1 # job listing that points to external link comments_link = '' comment_count = -1 #-- Get the detail about a story --# story = Story(rank, story_id, title, link, domain, points, submitter, published_time, submitter_profile, num_comments, comments_link, is_self) all_stories.append(story) return all_stories