class TestGetMaxItem(unittest.TestCase): def setUp(self): self.hn = HackerNews() def test_get_max_item(self): max_item_id = self.hn.get_max_item() self.assertIsInstance(max_item_id, int) def test_get_max_item_expand(self): max_item = self.hn.get_max_item(expand=True) self.assertIsInstance(max_item, Item) def tearDown(self): self.hn.session.close()
class TestGetMaxItem(unittest.TestCase): def setUp(self): self.hn = HackerNews() def test_get_max_item(self): max_item_id = self.hn.get_max_item() self.assertIsInstance(max_item_id, int)
if new_before == None: new_before = old_before config['before'] = new_before def read_new_stories(count): with requests.Session() as s: for story_id in hn.new_stories(count): response = s.get('{0}{1}/{2}.json'.format(hn.base_url, 'item', story_id)) if response.status_code != requests.codes.ok: raise HTTPError response = response.json() if not response: raise InvalidItemID yield story_id, Item(response) new_before = hn.get_max_item() old_before = config.get('hn_before', new_before) print "Fetching hacker news posts", new_before - old_before if new_before > old_before: for story_id, story in read_new_stories(new_before - old_before): if story_id > new_before: new_before = story_id if story_id <= old_before: continue if story.deleted: continue #if story.url != None: # domain = urlparse(story.url).netloc submission_time = story.submission_time all_submissions.append((submission_time, 'hn', story)) config['hn_before'] = new_before
def load_obj(name): with open('./pickle-files/' + name + '.pkl', 'rb') as f: return pickle.load(f) hn = HackerNews() hn_items = [] index = 0 try: hn_id = load_obj('last_hn_id') print('loaded id:', hn_id) except: hn_id = hn.get_max_item() print('max_id from hn:', hn_id) while len(hn_items) < 5: index += 1 try: item = hn.get_item(hn_id) except: print('hn_id invalid', hn_id) hn_id -= 1 continue # print('item', type(item)) if item.item_type == "story" and item.url is not None: if len(hn_items) % 5 == 0: print(len(hn_items)) hn_items.append({