Example #1
0
def get_news(from_num, num_headlines):
	hn = HackerNews()
	print "Starting HN"
	news_lst = []
	for story_id in hn.top_stories(limit=num_headlines):
		news_lst.append(hn.get_item(story_id).title + "\n")
	return news_lst
def refresh_posts():
    hn = HackerNews()
    for story in hn.top_stories(limit=10):  # Only viewing top 10 posts on HN
        story_id = hn.get_item(story)

        #  Tweets title, story URL, and comments
        if len(story_id.title) > 76:  # Adjusting for max tweet length
            story_title = (story_id.title.rsplit(' ', 1)[0] + '\n')
        else:
            story_title = (story_id.title + '\n')

        story_comments = ('Cmts: https://news.ycombinator.com/item?id=%s' %
                          str(story_id.item_id))

        #  Check to see if post has an external link
        if story_id.url is None:
            try:  # If tweet is a duplicate, ignores the post and doesn't tweet
                api.update_status(story_title + story_comments)
            except tweepy.error.TweepError:
                continue
        else:
            story_url = ('Link: ' + story_id.url + '\n')
            # If tweet is a duplicate, ignores the post and doesn't tweet
            try:
                api.update_status(story_title + story_url + story_comments)
            except tweepy.error.TweepError:
                continue
class Hacker(object):
    def __init__(self, vim):
        self.vim = vim
        self.hn = HackerNews()
        self.urls = None


    @neovim.command("Test")
    def test(self):
        self.vim.command("vsplit")

    @neovim.command('HackerNews')
    def fill_buffer(self):

        stories = []
        urls = {}
        for story in self.hn.top_stories()[0:30]:
            item = self.hn.get_item(story)
            stories.append(item.title)
            urls[item.title] = item.url

        self.vim.command("split HackerNews")
        self.vim.command("buffer HackerNews")
        self.vim.command("set buftype=nofile")
        self.vim.command("set bufhidden=hide")
        self.vim.command("setlocal noswapfile")
        self.vim.current.buffer[:] = stories
        self.urls = urls

    @neovim.command('HackerOpen')
    def autocmd_handler(self):
        url = self.urls[self.vim.current.line]
        webbrowser.open_new_tab(url)
def getHNData(verbose=False, limit=100, sub="showstories"):
    from hackernews import HackerNews
    from hackernews import settings
    import hoverpy, time, os
    dbpath = "data/hn.%s.db" % sub
    with hoverpy.HoverPy(recordMode="once", dbpath=dbpath) as hp:
        if not hp.mode() == "capture":
            settings.supported_api_versions[
                "v0"] = "http://hacker-news.firebaseio.com/v0/"
        hn = HackerNews()
        titles = []
        print("GETTING HACKERNEWS %s DATA" % sub)
        subs = {
            "showstories": hn.show_stories,
            "askstories": hn.ask_stories,
            "jobstories": hn.job_stories,
            "topstories": hn.top_stories
        }
        start = time.time()
        for story_id in subs[sub](limit=limit):
            story = hn.get_item(story_id)
            if verbose:
                print(story.title.lower())
            titles.append(story.title.lower())
        print("got %i hackernews titles in %f seconds" %
              (len(titles), time.time() - start))
        return titles
Example #5
0
def handler(event, context):
    hn = HackerNews()
    results = []
    for story_id in hn.top_stories(limit=10):
        results.append(hn.get_item(story_id).title)

    return json.dumps(results)
Example #6
0
class TestGetItem(unittest.TestCase):
    def setUp(self):
        self.hn = HackerNews()

    def test_get_item(self):
        item = self.hn.get_item(8863)
        self.assertIsInstance(item, Item)
        self.assertEqual(item.item_id, 8863)
        self.assertEqual(item.by, "dhouston")
Example #7
0
class TestGetItem(unittest.TestCase):

    def setUp(self):
        self.hn = HackerNews()

    def test_get_item(self):
        item = self.hn.get_item(8863)
        self.assertIsInstance(item, Item)
        self.assertEqual(item.item_id, 8863)
        self.assertEqual(item.by, "dhouston")
def get_hackernews_article():
    hn_wrapper = HackerNews()
    index = random.choice(hn_wrapper.top_stories())
    story = hn_wrapper.get_item(index)

    result = story.title
    if story.url is not None:
        result += "\n" + story.url

    return result
    def getHN_stories(self, article_limit):
        hn = HackerNews()

        articles_to_retrieve = int(article_limit*1.5)
        top_story_ids = hn.top_stories(limit=articles_to_retrieve)

        stories = []
        for story_id in top_story_ids:
            stories.append(hn.get_item(story_id))

        return stories
Example #10
0
class TestGetItem(unittest.TestCase):
    def setUp(self):
        self.hn = HackerNews()

    def test_get_item(self):
        item = self.hn.get_item(8863)
        self.assertIsInstance(item, Item)
        self.assertEqual(item.item_id, 8863)
        self.assertEqual(item.by, "dhouston")
        self.assertEqual(repr(item), ('<hackernews.Item: 8863 - My YC app: '
                                      'Dropbox - Throw away your USB drive>'))

    def test_invalid_item(self):
        self.assertRaises(InvalidItemID, self.hn.get_item, 0)

    def test_get_item_expand(self):
        item = self.hn.get_item(8863, expand=True)
        self.assertIsInstance(item, Item)
        self.assertEqual(item.item_id, 8863)
        self.assertIsInstance(item.by, User)
        self.assertIsInstance(item.kids[0], Item)

    def tearDown(self):
        self.hn.session.close()
def getNews():

    hn = HackerNews()
    story_id = []
    items_list = []

    #add the stories ids
    for stories in hn.top_stories(limit=100):
        story_id.append(stories)

    #get the ids and exrtact the useful information out of it
    for ids in story_id:
        items_list.append(hn.get_item(ids))

    return items_list
Example #12
0
def updateHackerNews():
    sql = 'truncate discussion'
    database_execute(sql)
    hn = HackerNews()
    id=1
    stories=hn.top_stories(limit=30)
    for story_id in stories:
        item=hn.get_item(story_id)
        id=story_id
        url="https://news.ycombinator.com/item?id="+str(story_id)
        title=item.title.replace("'","")
        score=item.score
        sql = "insert into discussion values('%s','%s','%s','%s')"%(id,title,url,score)
        #FL.debug(sql)
        database_execute(sql)
    return "success"
Example #13
0
def update_hackernews(user, update):
    hn = HackerNews()

    for story_id in hn.top_stories(limit=15):
        post = postObject()
        hn_story = hn.get_item(story_id)
        message = hn_story.text

        post.mainlabel = hn_story.title.encode('ascii', 'ignore')
        post.time = str(hn_story.submission_time)
        post.sublabel = str(hn_story.score) + " points by " + hn_story.by
        post.message = message if message is not None else "Read more"
        post.type = 'hackernews'
        post.link = "https://news.ycombinator.com/"
        if post.mainlabel not in user.hackerNewsFeed:
            update.append(post.to_json())
            user.hackerNewsFeed.append(post.mainlabel)
    return update
Example #14
0
def sync_with_hacker_news():
    hn = HackerNews()
    for story_id in hn.top_stories(limit=90):
        story = hn.get_item(story_id)
        persisted_news_item = NewsItem.query.get(story_id)
        if persisted_news_item:
            print "Updating story:", story_id
            persisted_news_item.upvotes = story.score
            persisted_news_item.comments = comment_count(story)
        else:
            print "Adding story:", story_id
            news_item = NewsItem(
                id=story_id, url=story.url, posted_on=story.submission_time,
                upvotes=story.score,
                comments=comment_count(story))
            db.session.add(news_item)
            for user in User.query.all():
                db.session.add(UserNewsItem(user=user, news_item=news_item))
    db.session.commit()
Example #15
0
    def process(self, msg):
        """
        `hn:` top\n
        `hn: last
        """

        params = msg.extract_parameters(self.parameters)

        from hackernews import HackerNews
        hn = HackerNews()

        [
            msg.reply(
                "{title} - {score} - {url}".format(**hn.get_item(s).__dict__))
            for s in (hn.new_stories(int(params['limit'])) if params['hn'] ==
                      "last" else hn.top_stories(int(params['limit'])))
        ]

        return True
Example #16
0
def scrape_hacker_news():
    hn = HackerNews()
    item_id_list = hn.top_stories()
    stories_list = []
    for item_id in item_id_list:
        print item_id
        try:
            # is_pres_count = StoryModel.objects.filter(story_id=item_id).count()

            # if (is_pres_count > 0):
            # 	continue

            try:
                hn_story = hn.get_item(item_id)
                # print hn_story
                page = urllib2.urlopen(hn_story.url)
                bs = BeautifulSoup(page.read())

                content = bs.get_text()
                content = ' '.join(word for word in content.split('\n')
                                   if word != '')
                story = StoryModel(
                    story_id=hn_story.item_id,
                    title=hn_story.title,
                    link=hn_story.url,
                    points=hn_story.score,
                    # content = content,
                    # submitter=hn_story
                    published_time=hn_story.submission_time)
                # story.save()

                stories_list.append(story)
            except Exception as e:
                print("error while retrieving : %s" % (e))
                continue
        except Exception as e:
            print("error while retrieving : %s" % (e))
            continue
    print("the size of the story list is %s" % (len(stories_list)))
    return stories_list
Example #17
0
class HN:
    def __init__(self, speaker):
        self.speaker = speaker
        self.hn = HackerNews()

    def get_top_stories(self):
        ids = self.hn.top_stories(limit=10)

        for id in ids:
            item = self.hn.get_item(id)
            print(item.title)
            self.speaker.say(item.title)

            #time.sleep(5)

    def check_command(self, data):
        if "news" in data:
            if internet_on() == False:
                self.speaker.say("no internet connection try later")
                return false

            if "check" in data:
                self.get_top_stories()
Example #18
0
    def test_save_item(self):

        hn = HackerNews()
        item_id_list = hn.top_stories()
        for item_id in item_id_list[:5]:

            try:
                is_pres_count = StoryModel.objects.filter(
                    story_id=item_id).count()

                if (is_pres_count == 0):
                    continue
            except Exception as e:
                print("Error occured : %s" % (e))
                continue

            hn_story = hn.get_item(item_id)
            story = StoryModel(
                title=hn_story.title,
                link=hn_story.url,
                points=hn_story.score,
                # submitter=hn_story
                published_time=hn_story.submission_time)
            story.save()
Example #19
0
class HackNews:

	def __init__(self):
		self.hn = HackerNews()
		self.jsonObj = []

	def displayHackNews(self, jobsOrHeadlines):
		if jobsOrHeadlines == "headlines":
			return self.topStories()

		elif jobsOrHeadlines == "jobs":
			return self.jobAds()

		else:
			resp.message("Oops, wrong catagory! Text us: 'HACKNEWS: jobs' or 'HACKNEWS: headlines'")

	def topStories(self):
		uncleanHeadline = ""
		cleanHeadline = ""

		textReturn = ""

		for story_id in self.hn.top_stories(limit=10):
			uncleanHeadline = str(self.hn.get_item(story_id))
			uncleanHeadline = uncleanHeadline.split(' - ', 1 )
			cleanHeadline = uncleanHeadline[1][:-1]

			textReturn += cleanHeadline + '\n\n'

			self.jsonObj.append({ "title" : cleanHeadline })

		if(cleanHeadline and cleanHeadline != ""):
			self.jsonObj.append({ "sucess" : "true" })
		else:
			self.jsonObj.append({ "sucess" : "false" })

		return textReturn

	def jobAds(self):

		textReturn = ""

		numLoops = 0
		maxLoops = 10

		for story_id in self.hn.top_stories():
			numLoops += 1

			story = self.hn.get_item(story_id)

			if numLoops >= 10:
				break

			if story.item_type == 'job':

				uncleanHeadline = str(story)
				uncleanHeadline = uncleanHeadline.split(' - ', 1 )

				cleanHeadline = uncleanHeadline[1][:-1]

				textReturn += cleanHeadline + '\n'

				if cleanHeadline and cleanHeadline != "":
					self.jsonObj.append({ "title" : cleanHeadline })




		if textReturn == "":
			textReturn += "No jobs have been posted in Top Stories, try again tomorrow!"
			self.jsonObj.append({ "sucess" : "false" })
		else:
			self.jsonObj.append({ "sucess" : "true" })

		return textReturn

	def convertToJson(self):

		return self.jsonObj
Example #20
0
print "CONNECTED TO " + dbURL

links = []
tagMap = {}
tagSet = set()

# Make tag set and tag map
for tag in db.tags.find():
    tagSet.add(tag["name"].lower())
    # Make tag map to get back to correct casing
    tagMap[tag["name"].lower()] = tag["name"]

# Get new links
for story_id in hn.top_stories(limit=1000):
    item = hn.get_item(story_id)
    url = item.url
    print item

    # Check if link is already in database
    if db.unrelatedlinks.find_one({'url': item.url}) is not None:
        continue

    try:
        response = requests.get(url)
    except:
        continue

    # Get description
    soup = BeautifulSoup(response.text)
    metas = soup.find_all('meta')
Example #21
0
"""Downloads items from HN

"""

from hackernews import HackerNews
import string

hn = HackerNews()

with open('amazon.txt', 'w') as f:
    for i in range(50, 20000):
        item = hn.get_item(i)
        if item.text:
            s = item.text
            s = filter(lambda x: x in string.printable, s)
            if 'amazon' in s:
                print s
                print type(s)
                f.write(s)
                f.flush()
                f.write("\n")
Example #22
0
class HackNews:
    def __init__(self):
        self.hn = HackerNews()
        self.jsonObj = []

    def displayHackNews(self, jobsOrHeadlines):
        if jobsOrHeadlines == "headlines":
            return self.topStories()

        elif jobsOrHeadlines == "jobs":
            return self.jobAds()

        else:
            resp.message(
                "Oops, wrong catagory! Text us: 'HACKNEWS: jobs' or 'HACKNEWS: headlines'"
            )

    def topStories(self):
        uncleanHeadline = ""
        cleanHeadline = ""

        textReturn = ""

        for story_id in self.hn.top_stories(limit=10):
            uncleanHeadline = str(self.hn.get_item(story_id))
            uncleanHeadline = uncleanHeadline.split(' - ', 1)
            cleanHeadline = uncleanHeadline[1][:-1]

            textReturn += cleanHeadline + '\n\n'

            self.jsonObj.append({"title": cleanHeadline})

        if (cleanHeadline and cleanHeadline != ""):
            self.jsonObj.append({"sucess": "true"})
        else:
            self.jsonObj.append({"sucess": "false"})

        return textReturn

    def jobAds(self):

        textReturn = ""

        numLoops = 0
        maxLoops = 10

        for story_id in self.hn.top_stories():
            numLoops += 1

            story = self.hn.get_item(story_id)

            if numLoops >= 10:
                break

            if story.item_type == 'job':

                uncleanHeadline = str(story)
                uncleanHeadline = uncleanHeadline.split(' - ', 1)

                cleanHeadline = uncleanHeadline[1][:-1]

                textReturn += cleanHeadline + '\n'

                if cleanHeadline and cleanHeadline != "":
                    self.jsonObj.append({"title": cleanHeadline})

        if textReturn == "":
            textReturn += "No jobs have been posted in Top Stories, try again tomorrow!"
            self.jsonObj.append({"sucess": "false"})
        else:
            self.jsonObj.append({"sucess": "true"})

        return textReturn

    def convertToJson(self):

        return self.jsonObj
Example #23
0
from hackernews import HackerNews
from .models import Story

hn = HackerNews()

for stories in hn.top_stories(limit=10):

    stories = hn.get_item(stories)
    print stories.title
    print stories.url
    print stories.score
    print stories.by
    print stories.submission_time
    print stories.item_id

    Story.objects.create(title=stories.title, url = stories.url, \
    score = stories.score, submitter = stories.by, \
    timestamp = stories.submission_time, hn_id = stories.item_id)
Example #24
0
							places.append("New York Bagels")
							places.append("Karl Strauss")

							number = random.randint(0,len(places) - 1)
							sc.rtm_send_message(chan, "You should go to %s to for food." % places[number])						
####JIRA STUFF
						elif "!helpdesk" in message:
							request = message[10:]
							new_issue = j.create_issue(project="IT", summary=request, description="Created by Slack", issuetype={'name':'Service Request'}, reporter={"name": email}) #edit project ID to match.
							sc.rtm_send_message(chan, "Your helpdesk ticket for '%s' has been created." % request)
####Hacker News Stuff
						elif "!hn" in message:
							n=0
							sc.rtm_send_message(chan,"Top 2 HackerNews Stories:")
							for story_id in hn.top_stories(limit=2):
								derp = hn.get_item(story_id)
								derp = str(derp)
								print "derp is:"
								print derp
								herp = derp
								print "herp is:"
								print herp
								derpy = derp.split(":")[1]
								print "derpy is:"
								print derpy
								derpy = derpy.split("-")[0]
								print "derpy is"
								print derpy
								derpy = derpy.strip()
								print "derpy is"
								print derpy
hn = HackerNews()

hn_items = []
index = 0

try:
    hn_id = load_obj('last_hn_id')
    print('loaded id:', hn_id)
except:
    hn_id = hn.get_max_item()
    print('max_id from hn:', hn_id)

while len(hn_items) < 5:
    index += 1
    try:
        item = hn.get_item(hn_id)
    except:
        print('hn_id invalid', hn_id)
        hn_id -= 1
        continue
    # print('item', type(item))
    if item.item_type == "story" and item.url is not None:
        if len(hn_items) % 5 == 0:
            print(len(hn_items))
        hn_items.append({
            "url": item.url,
            "hn_id": item.item_id,
            "date": item.submission_time,
            "title": item.title
        })
    hn_id -= 1
Example #26
0
"""Downloads items from HN

"""

from hackernews import HackerNews
import string


hn = HackerNews()

with open('amazon.txt','w') as f:
    for i in range(50, 20000):
        item = hn.get_item(i)
        if item.text:
            s = item.text
            s = filter(lambda x: x in string.printable, s)
            if 'amazon' in s:
                print s
                print type(s)
                f.write(s)
                f.flush()
                f.write("\n")


    #have to make tApi return cast as a string so we can write it

    tweets = tApi.home_timeline()
    for tweet in tweets:
        f.write(tweet.text)

finally:
    f.close()
analyze('data.txt')
os.remove('data.txt')
#HN
print("HACKER NEWS: ")
try:
    f = open("data.txt", 'w')
    for story_id in hn.top_stories(limit=10):
        f.write(str(hn.get_item(story_id)))
finally:
    f.close()
analyze('data.txt')
os.remove('data.txt')
#Google Search
print("GOOGLE SEARCH FOR {}".format(searchTerm))
try:
    f = open("data.txt", 'w')
    results = search(searchTerm)
    f.write((str(results)))

finally:
    f.close()
analyze('data.txt')
os.remove('data.txt')
    a=time.strftime("%m/%d/%Y, %H:%M:%S")

    if(id>latest_id):
        latest_id=id
    data={'id':id,'title':title,'by':by,'time':a}
    print('publishing {}'.format(data))
    producer.send('story',value=data)
    print('\n')

while(True):
    story=hn.new_stories(limit=1)
    if(len(story)==0 or story[0].item_id<=latest_id):
        continue
    else:
        start=latest_id+1
        for i in range(start,story[0].item_id+1):
            curr_story=hn.get_item(i)
            if(curr_story.item_type!='story'):
                continue
            title=curr_story.title
            id=curr_story.item_id
            by=curr_story.by
            time=item.submission_time
            a=time.strftime("%m/%d/%Y, %H:%M:%S")
            data={'id':id,'title':title,'by':by,'time':a}
            latest_id=id
            print('publishing {}'.format(data))
            producer.send('story',value=data)
            print('\n')
    sleep(5)
Example #29
0
from hackernews import HackerNews
hn = HackerNews()
from datetime import datetime
import matplotlib
from matplotlib import pyplot as plt

story_tup_list = []
comment_tup = ()
get_comments = False
now = datetime.now()

top_story_ids = hn.top_stories(limit=30)

for story_id in top_story_ids:
    story = hn.get_item(story_id)
    story_tup = (story.title, story.score, story.submission_time)
    story_tup_list.append(story_tup)

    if (get_comments == True):
        for comment_id in story.kids:
            comment = hn.get_item(comment_id)
            comment_tup = (comment.submission_time, comment.text)

story_tup_list.sort(key=lambda tup: tup[1], reverse=True)
for story in story_tup_list[:5]:
    print story[0], story[1]
    string_list = story[0].split(" ")
    print string_list, now - story[2]

plt.plot()