class TestPagination(unittest.TestCase):

    def setUp(self):
        self.hn = HN()
    
    def tearDown(self):
        pass
    
    def test_pagination_top_for_2_pages(self):
        """
        Checks if the pagination works for the front page.
        """
        stories = self.hn.get_stories(page_limit=2)
        self.assertEqual(len(stories), 2 * 30)
    
    def test_pagination_newest_for_3_pages(self):
        """
        Checks if the pagination works for the newest page.
        """
        stories = self.hn.get_stories(story_type='newest', page_limit=3)
        self.assertEqual(len(stories), 3 * 30)
        
    def test_pagination_best_for_2_pages(self):
        """
        Checks if the pagination works for the best page.
        """
        stories = self.hn.get_stories(story_type='best', page_limit=2)
        self.assertEqual(len(stories), 2 * 30)
Example #2
0
def get_best_articles(n):
    """
    Retrieves n best articles
    returns a list of dictionaries, dicts represent article
    """
    hn = HN()
    type = 'best'
    stories = hn.get_stories(story_type=type, limit=n)
    article_list = []
    for article in stories:
        article_dict = {
            'id': article.story_id,
            'title': article.title,
            'points': article.points,
            'comments': article.comments_link,
            'submitter': article.submitter,
            'url': article.link,
            'self': article.is_self,
            'domain': article.domain,
            'profile': article.submitter_profile,
            'time': article.published_time,
            'num_comments': article.num_comments,
            'rank': article.rank
        }
        article_list.append(article_dict)
    return article_list
    def populate_menu(self, m):
        # create and populate menu
        hn = None
        hn = HN()
        for story in hn.get_stories()[:num_stories]:
            story_item = bytes.decode(story["title"])

            #point_item = Gtk.button(Label=story["points"])

            menu_item = Gtk.MenuItem(story_item)

            m.append(menu_item)

            # this is where you would connect your menu item up with a function:

            menu_item.connect("activate", self.menuitem_response, story)

            # show the items
            menu_item.show()

        sep_item = Gtk.SeparatorMenuItem()
        sep_item.show()
        m.append(sep_item)
        quit_item = Gtk.MenuItem("Quit")
        quit_item.connect("activate", Gtk.main_quit)
        quit_item.show()
        m.append(quit_item)

        self.indie.set_menu(m)
Example #4
0
def update_via_hn(sleep_time = 1):
	current_time = datetime.now()
	date_code = current_time.strftime('%Y%m%d')
	data_file_path = './story_metadata/story_log_' + date_code + '.json'
	data_file = open(data_file_path, 'a')
	hackernews = HN()
	
	try:
		print "Getting front page stories..."
		front_page_stories = hackernews.get_stories()
		print "Getting new stories..."
		new_stories = hackernews.get_stories(story_type='newest')
	except Exception, err:
		if sleep_time <= 8:
			print err
			time.sleep(sleep_time)
			update_via_hn(sleep_time = sleep_time*2)
		return
Example #5
0
def get_hn(qtd = 1,email=None, subject = None):
    #pega as Notícias do Hacker News
    from hn import HN

    novidades = HN()
    results = []
    for s in novidades.get_stories(story_type='newest', limit = qtd):
        results.append(s.link)

    if qtd > 1:
        return results
    else:
        return results[0]
Example #6
0
def download(collection, story_type, page_limit):
    hn = HN()
    ids = []
    for s in hn.get_stories(story_type=story_type, page_limit=page_limit):
        story = {
            "_id":                  s.story_id,
            "rank":                 s.rank,
            "story_id":             s.story_id,
            "title":                s.title,
            "is_self":              s.is_self,
            "link":                 s.link,
            "domain":               s.domain,
            "points":               s.points,
            "submitter":            s.submitter,
            "submitter_profile":    s.submitter_profile,
            "published_time":       s.published_time,
            "num_comments":         s.num_comments,
            "comments_link":        s.comments_link,
            "time":                 datetime.now()
        }
        story_id = collection.save(story)
        ids.append(story_id)
    return len(ids)
Example #7
0
class TestPagination(unittest.TestCase):
    def setUp(self):
        # check py version
        self.PY2 = sys.version_info[0] == 2
        self.hn = HN()

    def tearDown(self):
        pass

    def test_more_link_top(self):
        """
        Checks if the "More" link at the bottom of homepage works.
        """
        soup = utils.get_soup()
        fnid = self.hn._get_next_page(soup)
        expected = 'news2'
        self.assertEqual(len(fnid), len(expected))

    def test_more_link_best(self):
        """
        Checks if the "More" link at the bottom of best page works.
        """
        soup = utils.get_soup(page='best')
        fnid = self.hn._get_next_page(soup)
        expected = 'x?fnid=te9bsVN2BAx0XOpRmUjcY4'
        self.assertEqual(len(fnid), len(expected))

    def test_more_link_newest(self):
        """
        Checks if the "More" link at the bottom of newest page works.
        """
        soup = utils.get_soup(page='newest')
        fnid = self.hn._get_next_page(soup)
        expected = 'x?fnid=te9bsVN2BAx0XOpRmUjcY4'
        self.assertEqual(len(fnid), len(expected))

    def test_get_zipped_rows(self):
        """
        Tests HN._get_zipped_rows for best page.
        """
        soup = utils.get_soup(page='best')
        rows = self.hn._get_zipped_rows(soup)
        if self.PY2:
            self.assertEqual(len(rows), 30)
        else:
            rows = [row for row in rows]
            self.assertEqual(len(rows), 30)

    def test_pagination_top_for_0_limit(self):
        """
        Checks if the pagination works for 0 limit.
        """
        stories = [story for story in self.hn.get_stories(limit=0)]
        self.assertEqual(len(stories), 30)

    def test_pagination_top_for_2_pages(self):
        """
        Checks if the pagination works for the front page.
        """
        stories = [story for story in self.hn.get_stories(limit=2 * 30)]
        self.assertEqual(len(stories), 2 * 30)

    def test_pagination_newest_for_3_pages(self):
        """
        Checks if the pagination works for the newest page.
        """
        stories = [
            story
            for story in self.hn.get_stories(story_type='newest', limit=3 * 30)
        ]
        self.assertEqual(len(stories), 3 * 30)

    def test_pagination_best_for_2_pages(self):
        """
        Checks if the pagination works for the best page.
        """
        stories = [
            story
            for story in self.hn.get_stories(story_type='best', limit=2 * 30)
        ]
        self.assertEqual(len(stories), 2 * 30)
Example #8
0
#Before anything install the Hackernews API
from hn import HN

hn = HN()

# print top 10 stories from homepage
for story in hn.get_stories()[:10]:
    story.print_story()
    print '*' * 50
    print ''
"""
# print 10 latest stories
for story in hn.get_stories(story_type='newest')[:10]:
    story.print_story()
    print '*' * 50
    print ''

# print all self posts from the homepage
for story in hn.get_stories():
    if story.is_self_post:
        story.print_story()
        print '*' * 50
        print ''
"""

# print the top 10 stories from /best page
for story in hn.get_stories(story_type='best')[:10]:
    story.print_story()
    print '*' * 50
    print ''
Example #9
0
class TestStoriesDict(unittest.TestCase):
    def setUp(self):
        # check py version
        PY2 = sys.version_info[0] == 2
        if not PY2:
            self.text_type = [str]
        else:
            self.text_type = [unicode, str]

        self.hn = HN()
        self.top_stories = [story for story in self.hn.get_stories()]
        self.newest_stories = [
            story for story in self.hn.get_stories(story_type='newest')
        ]
        self.best_stories = [
            story for story in self.hn.get_stories(story_type='best')
        ]

    def tearDown(self):
        pass

    def test_stories_dict_structure_top(self):
        """
        Checks data type of each field of each story from front page.
        """
        for story in self.top_stories:
            # testing for unicode or string
            # because the types are mixed sometimes
            assert type(story.rank) == int
            assert type(story.story_id) == int
            assert type(story.title) in self.text_type
            assert type(story.link) in self.text_type
            assert type(story.domain) in self.text_type
            assert type(story.points) == int
            assert type(story.submitter) in self.text_type
            assert type(story.published_time) in self.text_type
            assert type(story.submitter_profile) in self.text_type
            assert type(story.num_comments) == int
            assert type(story.comments_link) in self.text_type
            assert type(story.is_self) == bool

    def test_stories_dict_structure_newest(self):
        """
        Checks data type of each field of each story from newest page
        """
        for story in self.newest_stories:
            # testing for unicode or string
            # because the types are mixed sometimes
            assert type(story.rank) == int
            assert type(story.story_id) == int
            assert type(story.title) in self.text_type
            assert type(story.link) in self.text_type
            assert type(story.domain) in self.text_type
            assert type(story.points) == int
            assert type(story.submitter) in self.text_type
            assert type(story.published_time) in self.text_type
            assert type(story.submitter_profile) in self.text_type
            assert type(story.num_comments) == int
            assert type(story.comments_link) in self.text_type
            assert type(story.is_self) == bool

    def test_stories_dict_structure_best(self):
        """
        Checks data type of each field of each story from best page
        """
        for story in self.best_stories:
            # testing for unicode or string
            # because the types are mixed sometimes
            assert type(story.rank) == int
            assert type(story.story_id) == int
            assert type(story.title) in self.text_type
            assert type(story.link) in self.text_type
            assert type(story.domain) in self.text_type
            assert type(story.points) == int
            assert type(story.submitter) in self.text_type
            assert type(story.published_time) in self.text_type
            assert type(story.submitter_profile) in self.text_type
            assert type(story.num_comments) == int
            assert type(story.comments_link) in self.text_type
            assert type(story.is_self) == bool

    def test_stories_dict_length_top(self):
        """
        Checks if the dict returned by scraping the front page of HN is 30.
        """
        self.assertEqual(len(self.top_stories), 30)

    def test_stories_dict_length_best(self):
        """
        Checks if the dict returned by scraping the best page of HN is 30.
        """
        self.assertEqual(len(self.best_stories), 30)

    def test_stories_dict_length_top_newest(self):
        """
        Checks if the dict returned by scraping the newest page of HN is 30.
        """
        self.assertEqual(len(self.newest_stories), 30)
Example #10
0
from hn import HN

hn = HN()

# print the first 2 pages of newest stories
for story in hn.get_stories(story_type='newest', limit=60):
    print(story.rank, story.title)

# Select messages

Example #11
0
#!/usr/bin/env python

from hn import HN, Story

hn = HN()

# a generator over 30 stories from top page
top_iter = hn.get_stories(limit=30)


# print top stories from homepage
for story in top_iter:
    print((story.title.encode('utf-8')))
    # print('[{0}] "{1}" by {2}'.format(story.points, story.title,
    #                                   story.submitter))


# print 10 latest stories
for story in hn.get_stories(story_type='newest', limit=10):
    print((story.title.encode('utf-8')))
    print(('*' * 50))
    print('')


# for each story on front page, print top comment
for story in hn.get_stories():
    print((story.title.encode('utf-8')))
    comments = story.get_comments()
    print((comments[0] if len(comments) > 0 else None))
    print(('*' * 10))
Example #12
0
class TestStoriesDict(unittest.TestCase):
    
    def setUp(self):
        httpretty.HTTPretty.enable()
        httpretty.register_uri(httpretty.GET, 'https://news.ycombinator.com/', 
            body=get_content('index.html'))
        httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL, 'best'), 
            body=get_content('best.html'))
        httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL, 'newest'), 
            body=get_content('newest.html'))

        # check py version
        PY2 = sys.version_info[0] == 2
        if not PY2:
            self.text_type = [str]
        else:
            self.text_type = [unicode, str]

        self.hn = HN()
        self.top_stories = [story for story in self.hn.get_stories()]
        self.newest_stories = [story for story in self.hn.get_stories(story_type='newest')]
        self.best_stories = [story for story in self.hn.get_stories(story_type='best')]
    
    def tearDown(self):
        httpretty.HTTPretty.disable()
    
    
    def test_stories_dict_structure_top(self):
        """
        Checks data type of each field of each story from front page.
        """
        for story in self.top_stories:
            # testing for unicode or string
            # because the types are mixed sometimes
            assert type(story.rank) == int
            assert type(story.story_id) == int
            assert type(story.title) in self.text_type
            assert type(story.link) in self.text_type
            assert type(story.domain) in self.text_type
            assert type(story.points) == int
            assert type(story.submitter) in self.text_type
            assert type(story.published_time) in self.text_type
            assert type(story.submitter_profile) in self.text_type
            assert type(story.num_comments) == int
            assert type(story.comments_link) in self.text_type
            assert type(story.is_self) == bool
    
    def test_stories_dict_structure_newest(self):
        """
        Checks data type of each field of each story from newest page
        """
        for story in self.newest_stories:
            # testing for unicode or string
            # because the types are mixed sometimes
            assert type(story.rank) == int
            assert type(story.story_id) == int
            assert type(story.title) in self.text_type
            assert type(story.link) in self.text_type
            assert type(story.domain) in self.text_type
            assert type(story.points) == int
            assert type(story.submitter) in self.text_type
            assert type(story.published_time) in self.text_type
            assert type(story.submitter_profile) in self.text_type
            assert type(story.num_comments) == int
            assert type(story.comments_link) in self.text_type
            assert type(story.is_self) == bool
    
    def test_stories_dict_structure_best(self):
        """
        Checks data type of each field of each story from best page
        """
        for story in self.best_stories:
            # testing for unicode or string
            # because the types are mixed sometimes
            assert type(story.rank) == int
            assert type(story.story_id) == int
            assert type(story.title) in self.text_type
            assert type(story.link) in self.text_type
            assert type(story.domain) in self.text_type
            assert type(story.points) == int
            assert type(story.submitter) in self.text_type
            assert type(story.published_time) in self.text_type
            assert type(story.submitter_profile) in self.text_type
            assert type(story.num_comments) == int
            assert type(story.comments_link) in self.text_type
            assert type(story.is_self) == bool
    
    def test_stories_dict_length_top(self):
        """
        Checks if the dict returned by scraping the front page of HN is 30.
        """
        self.assertEqual(len(self.top_stories), 30)
    
    def test_stories_dict_length_best(self):
        """
        Checks if the dict returned by scraping the best page of HN is 30.
        """
        self.assertEqual(len(self.best_stories), 30)
        
    def test_stories_dict_length_top_newest(self):
        """
        Checks if the dict returned by scraping the newest page of HN is 30.
        """
        self.assertEqual(len(self.newest_stories), 30)
Example #13
0
#!/usr/bin/env python

from hn import HN

hn = HN()


# print top 10 stories from homepage
for story in hn.get_stories()[:10]:
    story.print_story()
    print '*' * 50
    print ''

"""
# print 10 latest stories
for story in hn.get_stories(story_type='newest')[:10]:
    story.print_story()
    print '*' * 50
    print ''

# print all self posts from the homepage
for story in hn.get_stories():
    if story.is_self_post:
        story.print_story()
        print '*' * 50
        print ''
"""

# print the top 10 stories from /best page
for story in hn.get_stories(story_type='best')[:10]:
    story.print_story()
Example #14
0
	print "[*] Usage: "+sys.argv[0]+" <commands>"
	print "[*] Commands: "
	print "[*] help/h/? = show help"
	print "[*] top/t <amount> = view top stories. Additional argument: sort (points/name/rank), default is by rank."
	print "[*] comment/cm <index> <amount> = view top comments from a story. Additional argument: firstlevel (f/all), default is all."
	print "[*] open/o <index> = open the story in a web browser."

elif sys.argv[1].lower() in ("top", "t"):
	if len(sys.argv) < 3:
		print "[!] Invalid amount of arguments! Exiting."
		sys.exit(1)
	if not sys.argv[2].isdigit():
		print "[!] Amount of stories shown needs to be a number! Exiting."
		sys.exit(1)

	top_iter = hn.get_stories(limit=int(sys.argv[2]))
	stories = list(top_iter)[:int(sys.argv[2])]
	for index, story in enumerate(stories):
		story.index = index
	if len(sys.argv) <= 3:
		sys.argv.append('rank')
	sort_type = sys.argv[3].lower()
	if not (sort_type in ("points", "name", "rank")):
		print "[!] Invalid sorting argument - defaulting to rank."
		sort_type = "rank"

	if sort_type == "rank":
		pass
	elif sort_type == "points":
		tuples, newstories = [], []
		for index, story in enumerate(stories):
Example #15
0
# Before anything install the Hackernews API
from hn import HN

hn = HN()


# print top 10 stories from homepage
for story in hn.get_stories()[:10]:
    story.print_story()
    print "*" * 50
    print ""

"""
# print 10 latest stories
for story in hn.get_stories(story_type='newest')[:10]:
    story.print_story()
    print '*' * 50
    print ''

# print all self posts from the homepage
for story in hn.get_stories():
    if story.is_self_post:
        story.print_story()
        print '*' * 50
        print ''
"""

# print the top 10 stories from /best page
for story in hn.get_stories(story_type="best")[:10]:
    story.print_story()
    print "*" * 50
Example #16
0
#!/usr/bin/env python

from hn import HN, Story

hn = HN()

# a generator over 30 stories from top page
top_iter = hn.get_stories(limit=30)


# print top stories from homepage
for story in top_iter:
    print(story.title.encode('utf-8'))
    # print('[{0}] "{1}" by {2}'.format(story.points, story.title,
    #                                   story.submitter))


# print 10 latest stories
for story in hn.get_stories(story_type='newest', limit=10):
    print(story.title.encode('utf-8'))
    print('*' * 50)
    print('')


# for each story on front page, print top comment
for story in hn.get_stories():
    print(story.title.encode('utf-8'))
    comments = story.get_comments()
    print(comments[0] if len(comments) > 0 else None)
    print('*' * 10)
Example #17
0
class TestPagination(unittest.TestCase):

    def setUp(self):
        httpretty.HTTPretty.enable()
        httpretty.register_uri(httpretty.GET, 'https://news.ycombinator.com/', 
            body=get_content('index.html'))
        httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL, 'best'), 
            body=get_content('best.html'))
        httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL, 'newest'), 
            body=get_content('newest.html'))
        httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL, 'x?fnid=WK2fLO5cPAJ9DnZbm8XOFR'), 
            body=get_content('best2.html'))
        httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL, 'news2'), 
            body=get_content('news2.html'))

        # check py version
        self.PY2 = sys.version_info[0] == 2
        self.hn = HN()

    def tearDown(self):
        httpretty.HTTPretty.disable()
    
    def test_more_link_top(self):
        """
        Checks if the "More" link at the bottom of homepage works.
        """
        soup = utils.get_soup()
        fnid = self.hn._get_next_page(soup)[-5:]
        expected = 'news2'
        self.assertEqual(len(fnid), len(expected))
        
    def test_more_link_best(self):
        """
        Checks if the "More" link at the bottom of best page works.
        """
        soup = utils.get_soup(page='best')
        fnid = self.hn._get_next_page(soup)[-29:]
        expected = 'x?fnid=te9bsVN2BAx0XOpRmUjcY4'
        self.assertEqual(len(fnid), len(expected))
        
    def test_more_link_newest(self):
        """
        Checks if the "More" link at the bottom of newest page works.
        """
        soup = utils.get_soup(page='newest')
        fnid = self.hn._get_next_page(soup)[-29:]
        expected = 'x?fnid=te9bsVN2BAx0XOpRmUjcY4'
        self.assertEqual(len(fnid), len(expected))
    
    def test_get_zipped_rows(self):
        """
        Tests HN._get_zipped_rows for best page.
        """
        soup = utils.get_soup(page='best')
        rows = self.hn._get_zipped_rows(soup)
        if self.PY2:
            self.assertEqual(len(rows), 30)
        else:
            rows = [row for row in rows]
            self.assertEqual(len(rows), 30)
    
    def test_pagination_top_for_0_limit(self):
        """
        Checks if the pagination works for 0 limit.
        """
        stories = [story for story in self.hn.get_stories(limit=0)]
        self.assertEqual(len(stories), 30)
    
    def test_pagination_top_for_2_pages(self):
        """
        Checks if the pagination works for the front page.
        """
        stories = [story for story in self.hn.get_stories(limit=2*30)]
        self.assertEqual(len(stories), 2 * 30)
    
    def test_pagination_newest_for_3_pages(self):
        """
        Checks if the pagination works for the newest page.
        """
        stories = [story for story in self.hn.get_stories(story_type='newest', limit=3*30)]
        self.assertEqual(len(stories), 3 * 30)
        
    def test_pagination_best_for_2_pages(self):
        """
        Checks if the pagination works for the best page.
        """
        stories = [story for story in self.hn.get_stories(story_type='best', limit=2*30)]
        self.assertEqual(len(stories), 2 * 30)
Example #18
0
from time import sleep
from subprocess import call
hn = HN()



#myre = '[fF]irefox'


myre = '[jJ][sS]'

myre2 = '[jJ]avascript'

# print top stories from homepage
while True:
  for story in hn.get_stories():
    if re.search(myre, story.title) or re.search(myre2, story.title):
        print(story.title)
        call(['say',story.title])
        var = raw_input("Continue?")
  sleep(60);

#print('[{0}] "{1}" by {2}'.format(story.points, story.title, story.submitter))

'''
# print 10 latest stories
for story in hn.get_stories(story_type='newest')[:10]:
    story.title
    print('*' * 50)
    print('')
Example #19
0
def get_item_soup(story_id):
    """
    Returns a bs4 object of the requested story
    """
    return get_soup(page='item?id=' + str(story_id))

########NEW FILE########
__FILENAME__ = my_test_bot
#!/usr/bin/env python

from hn import HN, Story

hn = HN()

top_iter = hn.get_stories(limit=30) # a generator over 30 stories from top page


# print top stories from homepage
for story in top_iter:
    print(story.title)
    #print('[{0}] "{1}" by {2}'.format(story.points, story.title, story.submitter))


# print 10 latest stories
for story in hn.get_stories(story_type='newest', limit=10):
    story.title
    print('*' * 50)
    print('')

#!/usr/bin/env python

from hn import HN

hn = HN()


# print top 10 stories from homepage
for story in hn.get_stories()[:10]:
    print story
    print '*' * 50
    print ''

"""
# print 10 latest stories
for story in hn.get_stories(story_type='newest')[:10]:
    story["title"]
    print '*' * 50
    print ''
"""

"""
# print the top 10 stories from /best page
for story in hn.get_stories(story_type='best')[:10]:
    story["title"]
    print '*' * 50
    print ''
"""
Example #21
0
#!/usr/bin/env python

from hn import HN, Story

hn = HN()

top_iter = hn.get_stories(
    limit=30)  # a generator over 30 stories from top page

# print top stories from homepage
for story in top_iter:
    print(story.title)
    #print('[{0}] "{1}" by {2}'.format(story.points, story.title, story.submitter))

# print 10 latest stories
for story in hn.get_stories(story_type='newest', limit=10):
    story.title
    print('*' * 50)
    print('')

# for each story on front page, print top comment
for story in hn.get_stories():
    print(story.title)
    comments = story.get_comments()
    print(comments[0] if len(comments) > 0 else None)
    print('*' * 10)

# print top 5 comments with nesting for top 5 stories
for story in hn.get_stories(story_type='best', limit=5):
    print(story.title)
    comments = story.get_comments()
class Test_Stories_Dict(object):
    
    @classmethod
    def setup(self):
        # check py version
        PY2 = sys.version_info[0] == 2
        if not PY2:
            self.text_type = [str]
        else:
            self.text_type = [unicode, str]
        
        self.hn = HN()
        self.top_stories = self.hn.get_stories()
        self.newest_stories = self.hn.get_stories(story_type='newest')
        self.best_stories = self.hn.get_stories(story_type='best')
    
    @classmethod
    def teardown(self):
        pass
    
    
    @with_setup(setup, teardown)
    def test_stories_dict_structure_top(self):
        """
        Checks data type of each field of each story from front page.
        """
        for story in self.top_stories:
            # testing for unicode or string
            # because the types are mixed sometimes
            assert type(story.rank) == int
            assert type(story.story_id) == int
            assert type(story.title) in self.text_type
            assert type(story.link) in self.text_type
            assert type(story.domain) in self.text_type
            assert type(story.points) == int
            assert type(story.submitter) in self.text_type
            assert type(story.published_time) in self.text_type
            assert type(story.submitter_profile) in self.text_type
            assert type(story.num_comments) == int
            assert type(story.comments_link) in self.text_type
            assert type(story.is_self) == bool
    
    @with_setup(setup, teardown)
    def test_stories_dict_structure_newest(self):
        """
        Checks data type of each field of each story from newest page
        """
        for story in self.newest_stories:
            # testing for unicode or string
            # because the types are mixed sometimes
            assert type(story.rank) == int
            assert type(story.story_id) == int
            assert type(story.title) in self.text_type
            assert type(story.link) in self.text_type
            assert type(story.domain) in self.text_type
            assert type(story.points) == int
            assert type(story.submitter) in self.text_type
            assert type(story.published_time) in self.text_type
            assert type(story.submitter_profile) in self.text_type
            assert type(story.num_comments) == int
            assert type(story.comments_link) in self.text_type
            assert type(story.is_self) == bool
    
    @with_setup(setup, teardown)
    def test_stories_dict_structure_best(self):
        """
        Checks data type of each field of each story from best page
        """
        for story in self.best_stories:
            # testing for unicode or string
            # because the types are mixed sometimes
            assert type(story.rank) == int
            assert type(story.story_id) == int
            assert type(story.title) in self.text_type
            assert type(story.link) in self.text_type
            assert type(story.domain) in self.text_type
            assert type(story.points) == int
            assert type(story.submitter) in self.text_type
            assert type(story.published_time) in self.text_type
            assert type(story.submitter_profile) in self.text_type
            assert type(story.num_comments) == int
            assert type(story.comments_link) in self.text_type
            assert type(story.is_self) == bool
    
    @with_setup(setup, teardown)
    def test_stories_dict_length_top(self):
        """
        Checks if the dict returned by scraping the front page of HN is 30.
        """
        assert len(self.top_stories) == 30
    
    @with_setup(setup, teardown)
    def test_stories_dict_length_best(self):
        """
        Checks if the dict returned by scraping the best page of HN is 30.
        """
        assert len(self.best_stories) == 30
        
    @with_setup(setup, teardown)
    def test_stories_dict_length_top_newest(self):
        """
        Checks if the dict returned by scraping the newest page of HN is 30.
        """
        assert len(self.newest_stories) == 30
Example #23
0
#!/usr/bin/env python

from hn import HN

hn = HN()

top_iter = hn.get_stories(limit=60) # a generator over 60 stories from top page

'''
# print top stories from homepage
for story in top_iter:
    print(story.title)
    #print('[{0}] "{1}" by {2}'.format(story.points, story.title, story.submitter))
'''
'''
# print 10 latest stories
for story in hn.get_stories(story_type='newest', limit=10):
    story.title
    print('*' * 50)
    print('')
'''

'''
# for each story on front page, print top comment
for story in hn.get_stories():
    print(story.title)
    comments = story.get_comments()
    print(comments[0] if len(comments) > 0 else None)
    print('*' * 10)
'''
Example #24
0
class TestStoriesDict(unittest.TestCase):

    def setUp(self):
        httpretty.HTTPretty.enable()
        httpretty.register_uri(httpretty.GET,
                               'https://news.ycombinator.com/',
                               body=get_content('index.html'))
        httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL,
                                                         'best'),
                               body=get_content('best.html'))
        httpretty.register_uri(httpretty.GET, '%s/%s' % (constants.BASE_URL,
                                                         'newest'),
                               body=get_content('newest.html'))

        # check py version
        PY2 = sys.version_info[0] == 2
        if not PY2:
            self.text_type = [str]
        else:
            self.text_type = [unicode, str]

        self.hn = HN()
        self.top_stories = [story for story in self.hn.get_stories()]
        self.newest_stories = [story for story in self.hn.get_stories(
            story_type='newest')]
        self.best_stories = [story for story in self.hn.get_stories(
            story_type='best')]

    def tearDown(self):
        httpretty.HTTPretty.disable()

    def test_stories_dict_structure_top(self):
        """
        Checks data type of each field of each story from front page.
        """
        for story in self.top_stories:
            # testing for unicode or string
            # because the types are mixed sometimes
            assert type(story.rank) == int
            assert type(story.story_id) == int
            assert type(story.title) in self.text_type
            assert type(story.link) in self.text_type
            assert type(story.domain) in self.text_type
            assert type(story.points) == int
            assert type(story.submitter) in self.text_type
            assert type(story.published_time) in self.text_type
            assert type(story.submitter_profile) in self.text_type
            assert type(story.num_comments) == int
            assert type(story.comments_link) in self.text_type
            assert type(story.is_self) == bool

    def test_stories_dict_structure_newest(self):
        """
        Checks data type of each field of each story from newest page
        """
        for story in self.newest_stories:
            # testing for unicode or string
            # because the types are mixed sometimes
            assert type(story.rank) == int
            assert type(story.story_id) == int
            assert type(story.title) in self.text_type
            assert type(story.link) in self.text_type
            assert type(story.domain) in self.text_type
            assert type(story.points) == int
            assert type(story.submitter) in self.text_type
            assert type(story.published_time) in self.text_type
            assert type(story.submitter_profile) in self.text_type
            assert type(story.num_comments) == int
            assert type(story.comments_link) in self.text_type
            assert type(story.is_self) == bool

    def test_stories_dict_structure_best(self):
        """
        Checks data type of each field of each story from best page
        """
        for story in self.best_stories:
            # testing for unicode or string
            # because the types are mixed sometimes
            assert type(story.rank) == int
            assert type(story.story_id) == int
            assert type(story.title) in self.text_type
            assert type(story.link) in self.text_type
            assert type(story.domain) in self.text_type
            assert type(story.points) == int
            assert type(story.submitter) in self.text_type
            assert type(story.published_time) in self.text_type
            assert type(story.submitter_profile) in self.text_type
            assert type(story.num_comments) == int
            assert type(story.comments_link) in self.text_type
            assert type(story.is_self) == bool

    def test_stories_dict_length_top(self):
        """
        Checks if the dict returned by scraping the front page of HN is 30.
        """
        self.assertEqual(len(self.top_stories), 30)

    def test_stories_dict_length_best(self):
        """
        Checks if the dict returned by scraping the best page of HN is 30.
        """
        self.assertEqual(len(self.best_stories), 30)

    def test_stories_dict_length_top_newest(self):
        """
        Checks if the dict returned by scraping the newest page of HN is 30.
        """
        self.assertEqual(len(self.newest_stories), 30)
Example #25
0
class TestPagination(unittest.TestCase):

    def setUp(self):
        # check py version
        self.PY2 = sys.version_info[0] == 2
        self.hn = HN()
    
    def tearDown(self):
        pass
    
    def test_more_link_top(self):
        """
        Checks if the "More" link at the bottom of homepage works.
        """
        soup = utils.get_soup()
        fnid = self.hn._get_next_page(soup)
        expected = 'news2'
        self.assertEqual(len(fnid), len(expected))
        
    def test_more_link_best(self):
        """
        Checks if the "More" link at the bottom of best page works.
        """
        soup = utils.get_soup(page='best')
        fnid = self.hn._get_next_page(soup)
        expected = 'x?fnid=te9bsVN2BAx0XOpRmUjcY4'
        self.assertEqual(len(fnid), len(expected))
        
    def test_more_link_newest(self):
        """
        Checks if the "More" link at the bottom of newest page works.
        """
        soup = utils.get_soup(page='newest')
        fnid = self.hn._get_next_page(soup)
        expected = 'x?fnid=te9bsVN2BAx0XOpRmUjcY4'
        self.assertEqual(len(fnid), len(expected))
    
    def test_get_zipped_rows(self):
        """
        Tests HN._get_zipped_rows for best page.
        """
        soup = utils.get_soup(page='best')
        rows = self.hn._get_zipped_rows(soup)
        if self.PY2:
            self.assertEqual(len(rows), 30)
        else:
            rows = [row for row in rows]
            self.assertEqual(len(rows), 30)
    
    def test_pagination_top_for_0_limit(self):
        """
        Checks if the pagination works for 0 limit.
        """
        stories = [story for story in self.hn.get_stories(limit=0)]
        self.assertEqual(len(stories), 30)
    
    def test_pagination_top_for_2_pages(self):
        """
        Checks if the pagination works for the front page.
        """
        stories = [story for story in self.hn.get_stories(limit=2*30)]
        self.assertEqual(len(stories), 2 * 30)
    
    def test_pagination_newest_for_3_pages(self):
        """
        Checks if the pagination works for the newest page.
        """
        stories = [story for story in self.hn.get_stories(story_type='newest', limit=3*30)]
        self.assertEqual(len(stories), 3 * 30)
        
    def test_pagination_best_for_2_pages(self):
        """
        Checks if the pagination works for the best page.
        """
        stories = [story for story in self.hn.get_stories(story_type='best', limit=2*30)]
        self.assertEqual(len(stories), 2 * 30)
Example #26
0
from hn import HN

hn = HN()

'''
# print top 10 stories from homepage
for story in hn.get_stories():
    print story
    print '*' * 50
    print ''
'''
'''
# print 10 latest stories
for story in hn.get_stories(story_type='newest')[:10]:
    story["title"]
    print '*' * 50
    print ''
'''

'''
# print the top 10 stories from /best page
for story in hn.get_stories(story_type='best')[:10]:
    print story["title"]
    print '*' * 50
    print ''
'''

# print the first 2 pages of newest stories
for story in hn.get_stories(story_type='newest', page_limit=2):
    print story["rank"], story["title"]
    print