Ejemplo n.º 1
0
 def setUp(self):
     self.PY2 = sys.version_info[0] == 2
     if not self.PY2:
         self.text_type = [str]
     else:
         self.text_type = [unicode, str]
     self.story = Story.fromid(6115341) # https://news.ycombinator.com/item?id=6115341
    def setUp(self):
        httpretty.HTTPretty.enable()
        httpretty.register_uri(httpretty.GET,
                               'https://news.ycombinator.com/',
                               body=get_content('index.html'))
        httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL,
                                                         'item?id=7324236'),
            body=get_content('7324236.html'))
        httpretty.register_uri(httpretty.GET,
                               '%s/%s' % (constants.BASE_URL,
                                          'x?fnid=0MonpGsCkcGbA7rcbd2BAP'),
            body=get_content('7324236-2.html'))
        httpretty.register_uri(httpretty.GET,
                               '%s/%s' % (constants.BASE_URL,
                                          'x?fnid=jyhCSQtM6ymFazFplS4Gpf'),
            body=get_content('7324236-3.html'))
        httpretty.register_uri(httpretty.GET,
                               '%s/%s' % (constants.BASE_URL,
                                          'x?fnid=s3NA4qB6zMT3KHVk1x2MTG'),
            body=get_content('7324236-4.html'))
        httpretty.register_uri(httpretty.GET,
                               '%s/%s' % (constants.BASE_URL,
                                          'x?fnid=pFxm5XBkeLtmphVejNZWlo'),
            body=get_content('7324236-5.html'))

        story = Story.fromid(7324236)
        self.comments = story.get_comments()
Ejemplo n.º 3
0
    def setUp(self):
        httpretty.HTTPretty.enable()
        httpretty.register_uri(httpretty.GET,
                               'https://news.ycombinator.com/',
                               body=get_content('index.html'))
        httpretty.register_uri(httpretty.GET,
                               '%s/%s' %
                               (constants.BASE_URL, 'item?id=7324236'),
                               body=get_content('7324236.html'))
        httpretty.register_uri(
            httpretty.GET,
            '%s/%s' % (constants.BASE_URL, 'x?fnid=0MonpGsCkcGbA7rcbd2BAP'),
            body=get_content('7324236-2.html'))
        httpretty.register_uri(
            httpretty.GET,
            '%s/%s' % (constants.BASE_URL, 'x?fnid=jyhCSQtM6ymFazFplS4Gpf'),
            body=get_content('7324236-3.html'))
        httpretty.register_uri(
            httpretty.GET,
            '%s/%s' % (constants.BASE_URL, 'x?fnid=s3NA4qB6zMT3KHVk1x2MTG'),
            body=get_content('7324236-4.html'))
        httpretty.register_uri(
            httpretty.GET,
            '%s/%s' % (constants.BASE_URL, 'x?fnid=pFxm5XBkeLtmphVejNZWlo'),
            body=get_content('7324236-5.html'))

        story = Story.fromid(7324236)
        self.comments = story.get_comments()
Ejemplo n.º 4
0
 def setUp(self):
     self.PY2 = sys.version_info[0] == 2
     if not self.PY2:
         self.text_type = [str]
     else:
         self.text_type = [unicode, str]
     self.story = Story.fromid(
         6115341)  # https://news.ycombinator.com/item?id=6115341
Ejemplo n.º 5
0
    def fromid(self, item_id):
        """
        Initializes an instance of Story for given item_id.
        It is assumed that the story referenced by item_id is valid
        and does not raise any HTTP errors.
        item_id is an int.
        """
        if not item_id:
            raise Exception('Need an item_id for a story')
        # get details about a particular story
        soup = get_item_soup(item_id)

        # this post has not been scraped, so we explititly get all info
        story_id = item_id
        rank = -1

        info_table = soup.findChildren('table')[
            2]  # to extract meta information about the post
        info_rows = info_table.findChildren(
            'tr')  # [0] = title, domain, [1] = points, user, time, comments

        title_row = info_rows[0].findChildren('td')[1]  # title, domain
        title = title_row.find('a').text
        try:
            domain = title_row.find('span').string[2:-2]
            # domain found
            is_self = False
            link = title_row.find('a').get('href')
        except AttributeError:
            # self post
            domain = BASE_URL
            is_self = True
            link = '%s/item?id=%s' % (BASE_URL, item_id)

        meta_row = info_rows[1].findChildren(
            'td')[1].contents  # points, user, time, comments
        # [<span id="score_7024626">789 points</span>, u' by ', <a href="user?id=endianswap">endianswap</a>,
        # u' 8 hours ago  | ', <a href="item?id=7024626">238 comments</a>]

        points = int(
            re.match(r'^(\d+)\spoint.*', meta_row[0].text).groups()[0])
        submitter = meta_row[2].text
        submitter_profile = '%s/%s' % (BASE_URL, meta_row[2].get('href'))
        published_time = ' '.join(meta_row[3].strip().split()[:3])
        comments_link = '%s/item?id=%s' % (BASE_URL, item_id)
        try:
            num_comments = int(
                re.match(r'(\d+)\s.*', meta_row[4].text).groups()[0])
        except AttributeError:
            num_comments = 0
        story = Story(rank, story_id, title, link, domain, points, submitter,
                      published_time, submitter_profile, num_comments,
                      comments_link, is_self)
        return story
Ejemplo n.º 6
0
def comments(story_id):
    """ show comments for the story """
    comments = Story.fromid(story_id).get_comments()

    if not comments:
        echo(style('no coments for story found!', fg='red'))

    for comment in comments:
        echo(style(comment.time_ago.center(15), fg='magenta'), nl=False)
        echo('by ' + style(str(comment.user), fg='cyan'))
        echo(comment.body)
Ejemplo n.º 7
0
def comments(story_id):
    """ show comments for the story """
    comments = Story.fromid(story_id).get_comments()

    if not comments:
        echo(style('no coments for story found!', fg='red'))

    for comment in comments:
        echo(style(comment.time_ago.center(15), fg='magenta'), nl=False)
        echo('by ' + style(str(comment.user), fg='cyan'))
        echo(comment.body)
Ejemplo n.º 8
0
    def setUp(self):
        httpretty.HTTPretty.enable()
        httpretty.register_uri(httpretty.GET, 'https://news.ycombinator.com/', 
            body=get_content('index.html'))
        httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL, 'item?id=6115341'), 
            body=get_content('6115341.html'))

        # check py version
        self.PY2 = sys.version_info[0] == 2
        self.hn = HN()
        self.story = Story.fromid(6115341)
Ejemplo n.º 9
0
    def setUp(self):
        httpretty.HTTPretty.enable()
        httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL, 'item?id=6115341'), 
            body=get_content('6115341.html'))

        self.PY2 = sys.version_info[0] == 2
        if not self.PY2:
            self.text_type = [str]
        else:
            self.text_type = [unicode, str]
        # https://news.ycombinator.com/item?id=6115341
        self.story = Story.fromid(6115341)
Ejemplo n.º 10
0
    def setUp(self):
        httpretty.HTTPretty.enable()
        httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL, 'item?id=6115341'), 
            body=get_content('6115341.html'))

        self.PY2 = sys.version_info[0] == 2
        if not self.PY2:
            self.text_type = [str]
        else:
            self.text_type = [unicode, str]
        # https://news.ycombinator.com/item?id=6115341
        self.story = Story.fromid(6115341)
Ejemplo n.º 11
0
    def setUp(self):
        httpretty.HTTPretty.enable()
        httpretty.register_uri(httpretty.GET,
                               'https://news.ycombinator.com/',
                               body=get_content('index.html'))
        httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL,
                                                         'item?id=6115341'),
                               body=get_content('6115341.html'))

        # check py version
        self.PY2 = sys.version_info[0] == 2
        self.hn = HN()
        self.story = Story.fromid(6115341)
 def test_get_nested_comments_old_story(self):
 	self.story = Story.fromid(7410260)
     self.comments = self.story.get_comments()
 	comment = self.comments[0].body
 	self.assertEqual(len(comment), 2131)
Ejemplo n.º 13
0

# print 10 latest stories
for story in hn.get_stories(story_type='newest', limit=10):
    print((story.title.encode('utf-8')))
    print(('*' * 50))
    print('')


# for each story on front page, print top comment
for story in hn.get_stories():
    print((story.title.encode('utf-8')))
    comments = story.get_comments()
    print((comments[0] if len(comments) > 0 else None))
    print(('*' * 10))


# print top 5 comments with nesting for top 5 stories
for story in hn.get_stories(story_type='best', limit=5):
    print((story.title.encode('utf-8')))
    comments = story.get_comments()
    if len(comments) > 0:
        for comment in comments[:5]:
            print(('\t' * (comment.level + 1) +
                  comment.body[:min(30, len(comment.body))]))
    print(('*' * 10))

# get the comments from any custom story
story = Story.fromid(6374031)
comments = story.get_comments()
Ejemplo n.º 14
0
    print(comments[0] if len(comments) > 0 else None)
    print('*' * 10)



# print top 5 comments with nesting for top 5 stories
for story in hn.get_stories(story_type='best', limit=5):
    print(story.title)
    comments = story.get_comments()
    if len(comments) > 0:
        for comment in comments[:5]:
            print('\t' * (comment.level + 1) + comment.body[:min(30, len(comment.body))])
    print('*' * 10)

# get the comments from any custom story
story = Story.fromid(6374031)
comments = story.get_comments()

########NEW FILE########
__FILENAME__ = test_leaders
import unittest

from hn import HN, Story
from hn import utils, constants

from test_utils import get_content, PRESETS_DIR

import httpretty

class TestGetLeaders(unittest.TestCase):
 def test_get_nested_comments(self):
 	self.story = Story.fromid(7404389)
     self.comments = self.story.get_comments()
     comment = self.comments[0].body
     self.assertTrue(len(comment) >= 5508)
 def test_get_nested_comments_old_story(self):
     self.story = Story.fromid(7410260)
     self.comments = self.story.get_comments()
     comment = self.comments[0].body
     self.assertEqual(len(comment), 2131)
 def test_get_nested_comments(self):
     self.story = Story.fromid(7404389)
     self.comments = self.story.get_comments()
     comment = self.comments[0].body
     self.assertTrue(len(comment) >= 5508)
Ejemplo n.º 18
0
def comment(story_id):
    """ comment story on HackerNews """
    story = Story.fromid(story_id)
    click.launch(story.comments_link)
Ejemplo n.º 19
0
 def setUp(self):
     # check py version
     self.PY2 = sys.version_info[0] == 2
     self.hn = HN()
     self.story = Story.fromid(6374031)
Ejemplo n.º 20
0
def go(story_id):
    """ go to the story on HackerNews """
    story = Story.fromid(story_id)
    click.launch(story.link)
Ejemplo n.º 21
0
 def setUp(self):
     # check py version
     self.PY2 = sys.version_info[0] == 2
     self.hn = HN()
     self.story = Story.fromid(6374031)
Ejemplo n.º 22
0
def go(story_id):
    """ go to the story on HackerNews """
    story = Story.fromid(story_id)
    click.launch(story.link)
 def setUp(self):
     self.story = Story.fromid(7324236)
Ejemplo n.º 24
0
def comment(story_id):
    """ comment story on HackerNews """
    story = Story.fromid(story_id)
    click.launch(story.comments_link)
 def setUp(self):
     self.story = Story.fromid(7324236)
Ejemplo n.º 26
0
    def _build_story(self, all_rows):
        """
        Builds and returns a list of stories (dicts) from the passed source.
        """
        all_stories = []  # list to hold all stories

        for (info, detail) in all_rows:

            #-- Get the into about a story --#
            info_cells = info.findAll('td')  # split in 3 cells

            rank = int(info_cells[0].string[:-1])
            title = '%s' % info_cells[2].find('a').string
            link = info_cells[2].find('a').get('href')

            is_self = False  # by default all stories are linking posts

            if link.find(
                    'item?id='
            ) is -1:  # the link doesn't contains "http" meaning an internal link
                domain = info_cells[2].find('span').string[
                    2:-2]  # slice " (abc.com) "
            else:
                link = '%s/%s' % (BASE_URL, link)
                domain = BASE_URL
                is_self = True
            #-- Get the into about a story --#

            #-- Get the detail about a story --#
            detail_cell = detail.findAll('td')[
                1]  # split in 2 cells, we need only second
            detail_concern = detail_cell.contents  # list of details we need, 5 count

            num_comments = -1

            if re.match(r'^(\d+)\spoint.*',
                        detail_concern[0].string) is not None:
                # can be a link or self post
                points = int(
                    re.match(r'^(\d+)\spoint.*',
                             detail_concern[0].string).groups()[0])
                submitter = '%s' % detail_concern[2].string
                submitter_profile = '%s/%s' % (BASE_URL,
                                               detail_concern[2].get('href'))
                published_time = ' '.join(
                    detail_concern[3].strip().split()[:3])
                comment_tag = detail_concern[4]
                story_id = int(
                    re.match(r'.*=(\d+)', comment_tag.get('href')).groups()[0])
                comments_link = '%s/item?id=%d' % (BASE_URL, story_id)
                comment_count = re.match(r'(\d+)\s.*', comment_tag.string)
                try:
                    # regex matched, cast to int
                    num_comments = int(comment_count.groups()[0])
                except AttributeError:
                    # did not match, assign 0
                    num_comments = 0
            else:  # this is a job post
                points = 0
                submitter = ''
                submitter_profile = ''
                published_time = '%s' % detail_concern[0]
                comment_tag = ''
                try:
                    story_id = int(re.match(r'.*=(\d+)', link).groups()[0])
                except AttributeError:
                    story_id = -1  # job listing that points to external link
                comments_link = ''
                comment_count = -1
            #-- Get the detail about a story --#

            story = Story(rank, story_id, title, link, domain, points,
                          submitter, published_time, submitter_profile,
                          num_comments, comments_link, is_self)

            all_stories.append(story)

        return all_stories