def get_news(from_num, num_headlines): hn = HackerNews() print "Starting HN" news_lst = [] for story_id in hn.top_stories(limit=num_headlines): news_lst.append(hn.get_item(story_id).title + "\n") return news_lst
def refresh_posts(): hn = HackerNews() for story in hn.top_stories(limit=10): # Only viewing top 10 posts on HN story_id = hn.get_item(story) # Tweets title, story URL, and comments if len(story_id.title) > 76: # Adjusting for max tweet length story_title = (story_id.title.rsplit(' ', 1)[0] + '\n') else: story_title = (story_id.title + '\n') story_comments = ('Cmts: https://news.ycombinator.com/item?id=%s' % str(story_id.item_id)) # Check to see if post has an external link if story_id.url is None: try: # If tweet is a duplicate, ignores the post and doesn't tweet api.update_status(story_title + story_comments) except tweepy.error.TweepError: continue else: story_url = ('Link: ' + story_id.url + '\n') # If tweet is a duplicate, ignores the post and doesn't tweet try: api.update_status(story_title + story_url + story_comments) except tweepy.error.TweepError: continue
class Hacker(object): def __init__(self, vim): self.vim = vim self.hn = HackerNews() self.urls = None @neovim.command("Test") def test(self): self.vim.command("vsplit") @neovim.command('HackerNews') def fill_buffer(self): stories = [] urls = {} for story in self.hn.top_stories()[0:30]: item = self.hn.get_item(story) stories.append(item.title) urls[item.title] = item.url self.vim.command("split HackerNews") self.vim.command("buffer HackerNews") self.vim.command("set buftype=nofile") self.vim.command("set bufhidden=hide") self.vim.command("setlocal noswapfile") self.vim.current.buffer[:] = stories self.urls = urls @neovim.command('HackerOpen') def autocmd_handler(self): url = self.urls[self.vim.current.line] webbrowser.open_new_tab(url)
def getHNData(verbose=False, limit=100, sub="showstories"): from hackernews import HackerNews from hackernews import settings import hoverpy, time, os dbpath = "data/hn.%s.db" % sub with hoverpy.HoverPy(recordMode="once", dbpath=dbpath) as hp: if not hp.mode() == "capture": settings.supported_api_versions[ "v0"] = "http://hacker-news.firebaseio.com/v0/" hn = HackerNews() titles = [] print("GETTING HACKERNEWS %s DATA" % sub) subs = { "showstories": hn.show_stories, "askstories": hn.ask_stories, "jobstories": hn.job_stories, "topstories": hn.top_stories } start = time.time() for story_id in subs[sub](limit=limit): story = hn.get_item(story_id) if verbose: print(story.title.lower()) titles.append(story.title.lower()) print("got %i hackernews titles in %f seconds" % (len(titles), time.time() - start)) return titles
def handler(event, context): hn = HackerNews() results = [] for story_id in hn.top_stories(limit=10): results.append(hn.get_item(story_id).title) return json.dumps(results)
class TestGetItem(unittest.TestCase): def setUp(self): self.hn = HackerNews() def test_get_item(self): item = self.hn.get_item(8863) self.assertIsInstance(item, Item) self.assertEqual(item.item_id, 8863) self.assertEqual(item.by, "dhouston")
class TestGetItem(unittest.TestCase): def setUp(self): self.hn = HackerNews() def test_get_item(self): item = self.hn.get_item(8863) self.assertIsInstance(item, Item) self.assertEqual(item.item_id, 8863) self.assertEqual(item.by, "dhouston")
def get_hackernews_article(): hn_wrapper = HackerNews() index = random.choice(hn_wrapper.top_stories()) story = hn_wrapper.get_item(index) result = story.title if story.url is not None: result += "\n" + story.url return result
def getHN_stories(self, article_limit): hn = HackerNews() articles_to_retrieve = int(article_limit*1.5) top_story_ids = hn.top_stories(limit=articles_to_retrieve) stories = [] for story_id in top_story_ids: stories.append(hn.get_item(story_id)) return stories
class TestGetItem(unittest.TestCase): def setUp(self): self.hn = HackerNews() def test_get_item(self): item = self.hn.get_item(8863) self.assertIsInstance(item, Item) self.assertEqual(item.item_id, 8863) self.assertEqual(item.by, "dhouston") self.assertEqual(repr(item), ('<hackernews.Item: 8863 - My YC app: ' 'Dropbox - Throw away your USB drive>')) def test_invalid_item(self): self.assertRaises(InvalidItemID, self.hn.get_item, 0) def test_get_item_expand(self): item = self.hn.get_item(8863, expand=True) self.assertIsInstance(item, Item) self.assertEqual(item.item_id, 8863) self.assertIsInstance(item.by, User) self.assertIsInstance(item.kids[0], Item) def tearDown(self): self.hn.session.close()
def getNews(): hn = HackerNews() story_id = [] items_list = [] #add the stories ids for stories in hn.top_stories(limit=100): story_id.append(stories) #get the ids and exrtact the useful information out of it for ids in story_id: items_list.append(hn.get_item(ids)) return items_list
def updateHackerNews(): sql = 'truncate discussion' database_execute(sql) hn = HackerNews() id=1 stories=hn.top_stories(limit=30) for story_id in stories: item=hn.get_item(story_id) id=story_id url="https://news.ycombinator.com/item?id="+str(story_id) title=item.title.replace("'","") score=item.score sql = "insert into discussion values('%s','%s','%s','%s')"%(id,title,url,score) #FL.debug(sql) database_execute(sql) return "success"
def update_hackernews(user, update): hn = HackerNews() for story_id in hn.top_stories(limit=15): post = postObject() hn_story = hn.get_item(story_id) message = hn_story.text post.mainlabel = hn_story.title.encode('ascii', 'ignore') post.time = str(hn_story.submission_time) post.sublabel = str(hn_story.score) + " points by " + hn_story.by post.message = message if message is not None else "Read more" post.type = 'hackernews' post.link = "https://news.ycombinator.com/" if post.mainlabel not in user.hackerNewsFeed: update.append(post.to_json()) user.hackerNewsFeed.append(post.mainlabel) return update
def sync_with_hacker_news(): hn = HackerNews() for story_id in hn.top_stories(limit=90): story = hn.get_item(story_id) persisted_news_item = NewsItem.query.get(story_id) if persisted_news_item: print "Updating story:", story_id persisted_news_item.upvotes = story.score persisted_news_item.comments = comment_count(story) else: print "Adding story:", story_id news_item = NewsItem( id=story_id, url=story.url, posted_on=story.submission_time, upvotes=story.score, comments=comment_count(story)) db.session.add(news_item) for user in User.query.all(): db.session.add(UserNewsItem(user=user, news_item=news_item)) db.session.commit()
def process(self, msg): """ `hn:` top\n `hn: last """ params = msg.extract_parameters(self.parameters) from hackernews import HackerNews hn = HackerNews() [ msg.reply( "{title} - {score} - {url}".format(**hn.get_item(s).__dict__)) for s in (hn.new_stories(int(params['limit'])) if params['hn'] == "last" else hn.top_stories(int(params['limit']))) ] return True
def scrape_hacker_news(): hn = HackerNews() item_id_list = hn.top_stories() stories_list = [] for item_id in item_id_list: print item_id try: # is_pres_count = StoryModel.objects.filter(story_id=item_id).count() # if (is_pres_count > 0): # continue try: hn_story = hn.get_item(item_id) # print hn_story page = urllib2.urlopen(hn_story.url) bs = BeautifulSoup(page.read()) content = bs.get_text() content = ' '.join(word for word in content.split('\n') if word != '') story = StoryModel( story_id=hn_story.item_id, title=hn_story.title, link=hn_story.url, points=hn_story.score, # content = content, # submitter=hn_story published_time=hn_story.submission_time) # story.save() stories_list.append(story) except Exception as e: print("error while retrieving : %s" % (e)) continue except Exception as e: print("error while retrieving : %s" % (e)) continue print("the size of the story list is %s" % (len(stories_list))) return stories_list
class HN: def __init__(self, speaker): self.speaker = speaker self.hn = HackerNews() def get_top_stories(self): ids = self.hn.top_stories(limit=10) for id in ids: item = self.hn.get_item(id) print(item.title) self.speaker.say(item.title) #time.sleep(5) def check_command(self, data): if "news" in data: if internet_on() == False: self.speaker.say("no internet connection try later") return false if "check" in data: self.get_top_stories()
def test_save_item(self): hn = HackerNews() item_id_list = hn.top_stories() for item_id in item_id_list[:5]: try: is_pres_count = StoryModel.objects.filter( story_id=item_id).count() if (is_pres_count == 0): continue except Exception as e: print("Error occured : %s" % (e)) continue hn_story = hn.get_item(item_id) story = StoryModel( title=hn_story.title, link=hn_story.url, points=hn_story.score, # submitter=hn_story published_time=hn_story.submission_time) story.save()
class HackNews: def __init__(self): self.hn = HackerNews() self.jsonObj = [] def displayHackNews(self, jobsOrHeadlines): if jobsOrHeadlines == "headlines": return self.topStories() elif jobsOrHeadlines == "jobs": return self.jobAds() else: resp.message("Oops, wrong catagory! Text us: 'HACKNEWS: jobs' or 'HACKNEWS: headlines'") def topStories(self): uncleanHeadline = "" cleanHeadline = "" textReturn = "" for story_id in self.hn.top_stories(limit=10): uncleanHeadline = str(self.hn.get_item(story_id)) uncleanHeadline = uncleanHeadline.split(' - ', 1 ) cleanHeadline = uncleanHeadline[1][:-1] textReturn += cleanHeadline + '\n\n' self.jsonObj.append({ "title" : cleanHeadline }) if(cleanHeadline and cleanHeadline != ""): self.jsonObj.append({ "sucess" : "true" }) else: self.jsonObj.append({ "sucess" : "false" }) return textReturn def jobAds(self): textReturn = "" numLoops = 0 maxLoops = 10 for story_id in self.hn.top_stories(): numLoops += 1 story = self.hn.get_item(story_id) if numLoops >= 10: break if story.item_type == 'job': uncleanHeadline = str(story) uncleanHeadline = uncleanHeadline.split(' - ', 1 ) cleanHeadline = uncleanHeadline[1][:-1] textReturn += cleanHeadline + '\n' if cleanHeadline and cleanHeadline != "": self.jsonObj.append({ "title" : cleanHeadline }) if textReturn == "": textReturn += "No jobs have been posted in Top Stories, try again tomorrow!" self.jsonObj.append({ "sucess" : "false" }) else: self.jsonObj.append({ "sucess" : "true" }) return textReturn def convertToJson(self): return self.jsonObj
print "CONNECTED TO " + dbURL links = [] tagMap = {} tagSet = set() # Make tag set and tag map for tag in db.tags.find(): tagSet.add(tag["name"].lower()) # Make tag map to get back to correct casing tagMap[tag["name"].lower()] = tag["name"] # Get new links for story_id in hn.top_stories(limit=1000): item = hn.get_item(story_id) url = item.url print item # Check if link is already in database if db.unrelatedlinks.find_one({'url': item.url}) is not None: continue try: response = requests.get(url) except: continue # Get description soup = BeautifulSoup(response.text) metas = soup.find_all('meta')
"""Downloads items from HN """ from hackernews import HackerNews import string hn = HackerNews() with open('amazon.txt', 'w') as f: for i in range(50, 20000): item = hn.get_item(i) if item.text: s = item.text s = filter(lambda x: x in string.printable, s) if 'amazon' in s: print s print type(s) f.write(s) f.flush() f.write("\n")
class HackNews: def __init__(self): self.hn = HackerNews() self.jsonObj = [] def displayHackNews(self, jobsOrHeadlines): if jobsOrHeadlines == "headlines": return self.topStories() elif jobsOrHeadlines == "jobs": return self.jobAds() else: resp.message( "Oops, wrong catagory! Text us: 'HACKNEWS: jobs' or 'HACKNEWS: headlines'" ) def topStories(self): uncleanHeadline = "" cleanHeadline = "" textReturn = "" for story_id in self.hn.top_stories(limit=10): uncleanHeadline = str(self.hn.get_item(story_id)) uncleanHeadline = uncleanHeadline.split(' - ', 1) cleanHeadline = uncleanHeadline[1][:-1] textReturn += cleanHeadline + '\n\n' self.jsonObj.append({"title": cleanHeadline}) if (cleanHeadline and cleanHeadline != ""): self.jsonObj.append({"sucess": "true"}) else: self.jsonObj.append({"sucess": "false"}) return textReturn def jobAds(self): textReturn = "" numLoops = 0 maxLoops = 10 for story_id in self.hn.top_stories(): numLoops += 1 story = self.hn.get_item(story_id) if numLoops >= 10: break if story.item_type == 'job': uncleanHeadline = str(story) uncleanHeadline = uncleanHeadline.split(' - ', 1) cleanHeadline = uncleanHeadline[1][:-1] textReturn += cleanHeadline + '\n' if cleanHeadline and cleanHeadline != "": self.jsonObj.append({"title": cleanHeadline}) if textReturn == "": textReturn += "No jobs have been posted in Top Stories, try again tomorrow!" self.jsonObj.append({"sucess": "false"}) else: self.jsonObj.append({"sucess": "true"}) return textReturn def convertToJson(self): return self.jsonObj
from hackernews import HackerNews from .models import Story hn = HackerNews() for stories in hn.top_stories(limit=10): stories = hn.get_item(stories) print stories.title print stories.url print stories.score print stories.by print stories.submission_time print stories.item_id Story.objects.create(title=stories.title, url = stories.url, \ score = stories.score, submitter = stories.by, \ timestamp = stories.submission_time, hn_id = stories.item_id)
places.append("New York Bagels") places.append("Karl Strauss") number = random.randint(0,len(places) - 1) sc.rtm_send_message(chan, "You should go to %s to for food." % places[number]) ####JIRA STUFF elif "!helpdesk" in message: request = message[10:] new_issue = j.create_issue(project="IT", summary=request, description="Created by Slack", issuetype={'name':'Service Request'}, reporter={"name": email}) #edit project ID to match. sc.rtm_send_message(chan, "Your helpdesk ticket for '%s' has been created." % request) ####Hacker News Stuff elif "!hn" in message: n=0 sc.rtm_send_message(chan,"Top 2 HackerNews Stories:") for story_id in hn.top_stories(limit=2): derp = hn.get_item(story_id) derp = str(derp) print "derp is:" print derp herp = derp print "herp is:" print herp derpy = derp.split(":")[1] print "derpy is:" print derpy derpy = derpy.split("-")[0] print "derpy is" print derpy derpy = derpy.strip() print "derpy is" print derpy
hn = HackerNews() hn_items = [] index = 0 try: hn_id = load_obj('last_hn_id') print('loaded id:', hn_id) except: hn_id = hn.get_max_item() print('max_id from hn:', hn_id) while len(hn_items) < 5: index += 1 try: item = hn.get_item(hn_id) except: print('hn_id invalid', hn_id) hn_id -= 1 continue # print('item', type(item)) if item.item_type == "story" and item.url is not None: if len(hn_items) % 5 == 0: print(len(hn_items)) hn_items.append({ "url": item.url, "hn_id": item.item_id, "date": item.submission_time, "title": item.title }) hn_id -= 1
"""Downloads items from HN """ from hackernews import HackerNews import string hn = HackerNews() with open('amazon.txt','w') as f: for i in range(50, 20000): item = hn.get_item(i) if item.text: s = item.text s = filter(lambda x: x in string.printable, s) if 'amazon' in s: print s print type(s) f.write(s) f.flush() f.write("\n")
#have to make tApi return cast as a string so we can write it tweets = tApi.home_timeline() for tweet in tweets: f.write(tweet.text) finally: f.close() analyze('data.txt') os.remove('data.txt') #HN print("HACKER NEWS: ") try: f = open("data.txt", 'w') for story_id in hn.top_stories(limit=10): f.write(str(hn.get_item(story_id))) finally: f.close() analyze('data.txt') os.remove('data.txt') #Google Search print("GOOGLE SEARCH FOR {}".format(searchTerm)) try: f = open("data.txt", 'w') results = search(searchTerm) f.write((str(results))) finally: f.close() analyze('data.txt') os.remove('data.txt')
a=time.strftime("%m/%d/%Y, %H:%M:%S") if(id>latest_id): latest_id=id data={'id':id,'title':title,'by':by,'time':a} print('publishing {}'.format(data)) producer.send('story',value=data) print('\n') while(True): story=hn.new_stories(limit=1) if(len(story)==0 or story[0].item_id<=latest_id): continue else: start=latest_id+1 for i in range(start,story[0].item_id+1): curr_story=hn.get_item(i) if(curr_story.item_type!='story'): continue title=curr_story.title id=curr_story.item_id by=curr_story.by time=item.submission_time a=time.strftime("%m/%d/%Y, %H:%M:%S") data={'id':id,'title':title,'by':by,'time':a} latest_id=id print('publishing {}'.format(data)) producer.send('story',value=data) print('\n') sleep(5)
from hackernews import HackerNews hn = HackerNews() from datetime import datetime import matplotlib from matplotlib import pyplot as plt story_tup_list = [] comment_tup = () get_comments = False now = datetime.now() top_story_ids = hn.top_stories(limit=30) for story_id in top_story_ids: story = hn.get_item(story_id) story_tup = (story.title, story.score, story.submission_time) story_tup_list.append(story_tup) if (get_comments == True): for comment_id in story.kids: comment = hn.get_item(comment_id) comment_tup = (comment.submission_time, comment.text) story_tup_list.sort(key=lambda tup: tup[1], reverse=True) for story in story_tup_list[:5]: print story[0], story[1] string_list = story[0].split(" ") print string_list, now - story[2] plt.plot()