def test_parse_ensure_temporary_error_on_unknown_status(self): response = feedparser.FeedParserDict() response.status = 500 f = create_dynamic_parse_func(response) scraper = Scraper(f, self.feed) with self.assertRaises(TemporaryFeedError): scraper.parse(False)
def test_parse_ensure_broken_feed_on_404(self): response = feedparser.FeedParserDict() response.status = 404 f = create_dynamic_parse_func(response) scraper = Scraper(f, self.feed) with self.assertRaises(BrokenFeed): scraper.parse(False)
def test_parse_ensure_proper_return_on_success(self): response = feedparser.FeedParserDict() response.status = 200 response['feed'] = feedparser.FeedParserDict() response['feed']['title'] = 'Bola' f = create_dynamic_parse_func(response) scraper = Scraper(f, self.feed) self.assertEqual(response, scraper.parse(False))
def test_find_last_updated(self): expected_time_raw = datetime.now() + timedelta(days=1) expected_time = make_aware(datetime.fromtimestamp(time.mktime(expected_time_raw.timetuple()))) entry_list = [ {'updated_parsed': datetime.now().timetuple()}, {'published_parsed': expected_time_raw.timetuple()}, {'created_parsed': (datetime.now() - timedelta(days=1)).timetuple()} ] scraper = Scraper(create_dynamic_parse_func(None), self.feed) response = scraper._find_last_updated(entry_list) self.assertEqual(expected_time, response)
def test_has_updated_past(self): self.feed.last_updated_at = make_aware(datetime.now()) future = datetime.now() - timedelta(days=1) entry_list = [ {'updated_parsed': future.timetuple()}, ] feed_dict = feedparser.FeedParserDict() feed_dict['entries'] = entry_list f = create_dynamic_parse_func(feed_dict) scraper = Scraper(f, self.feed) self.assertFalse(scraper._has_updated(feed_dict, False))
def get(self, request, *args, **kwargs): feed = self.get_object() scraper = Scraper(feedparser.parse, feed) updated, parsed_feed = scraper.check_feed() if updated: feed.update_feed_data(parsed_feed) feed.update_feed_entries(parsed_feed.entries) feed.save() messages.success(request, 'Found new updates. Enjoy!') else: messages.info(request, 'Nothing new yet...') return redirect(reverse('feed_detail', kwargs={'pk': feed.pk}))
def form_valid(self, form): feed = form.save(commit=False) scraper = Scraper(feedparser.parse, feed) updated, parsed_feed = scraper.check_feed() if updated: feed.update_feed_data(parsed_feed) feed.save() feed.update_feed_entries(parsed_feed.entries) feed.save() # Sets success URL to the feed's detail page self.success_url = reverse('feed_detail', kwargs={'pk': feed.pk}) return super().form_valid(form)
def do_work(feed): print('Starting work on {}'.format(feed)) time.sleep(.1) # pretend to do some lengthy work. # Make sure the whole print completes or threads can mix up output in one line. with lock: print(threading.current_thread().name, feed) scraper = Scraper(feedparser.parse, feed) updated, parsed_feed = scraper.check_feed() if updated: print('Found updates for feed {}'.format(feed)) feed.update_feed_data(parsed_feed) feed.update_feed_entries(parsed_feed.entries) feed.save() else: print('No updates for feed {}'.format(feed)) print('Feed {} done'.format(feed))