Example #1
0
class TestGetMaxItem(unittest.TestCase):
    def setUp(self):
        self.hn = HackerNews()

    def test_get_max_item(self):
        max_item_id = self.hn.get_max_item()
        self.assertIsInstance(max_item_id, int)

    def test_get_max_item_expand(self):
        max_item = self.hn.get_max_item(expand=True)
        self.assertIsInstance(max_item, Item)

    def tearDown(self):
        self.hn.session.close()
Example #2
0
class TestGetMaxItem(unittest.TestCase):

    def setUp(self):
        self.hn = HackerNews()

    def test_get_max_item(self):
        max_item_id = self.hn.get_max_item()
        self.assertIsInstance(max_item_id, int)

    def test_get_max_item_expand(self):
        max_item = self.hn.get_max_item(expand=True)
        self.assertIsInstance(max_item, Item)

    def tearDown(self):
        self.hn.session.close()
Example #3
0
class TestGetMaxItem(unittest.TestCase):
    def setUp(self):
        self.hn = HackerNews()

    def test_get_max_item(self):
        max_item_id = self.hn.get_max_item()
        self.assertIsInstance(max_item_id, int)
Example #4
0
class TestGetMaxItem(unittest.TestCase):

    def setUp(self):
        self.hn = HackerNews()

    def test_get_max_item(self):
        max_item_id = self.hn.get_max_item()
        self.assertIsInstance(max_item_id, int)
Example #5
0
if new_before == None:
    new_before = old_before
config['before'] = new_before

def read_new_stories(count):
    with requests.Session() as s:
        for story_id in hn.new_stories(count):
            response = s.get('{0}{1}/{2}.json'.format(hn.base_url, 'item', story_id))
            if response.status_code != requests.codes.ok:
                raise HTTPError
            response = response.json()
            if not response:
                raise InvalidItemID
            yield story_id, Item(response)

new_before = hn.get_max_item()
old_before = config.get('hn_before', new_before)
print "Fetching hacker news posts", new_before - old_before
if new_before > old_before:
    for story_id, story in read_new_stories(new_before - old_before):
        if story_id > new_before:
            new_before = story_id
        if story_id <= old_before:
            continue
        if story.deleted:
            continue
        #if story.url != None:
        #    domain = urlparse(story.url).netloc
        submission_time = story.submission_time
        all_submissions.append((submission_time, 'hn', story))
config['hn_before'] = new_before
def load_obj(name):
    with open('./pickle-files/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)


hn = HackerNews()

hn_items = []
index = 0

try:
    hn_id = load_obj('last_hn_id')
    print('loaded id:', hn_id)
except:
    hn_id = hn.get_max_item()
    print('max_id from hn:', hn_id)

while len(hn_items) < 5:
    index += 1
    try:
        item = hn.get_item(hn_id)
    except:
        print('hn_id invalid', hn_id)
        hn_id -= 1
        continue
    # print('item', type(item))
    if item.item_type == "story" and item.url is not None:
        if len(hn_items) % 5 == 0:
            print(len(hn_items))
        hn_items.append({