def test_well_formed(self): for feed_type in FeedHandlerTestCase.FEED_TYPES: text = None with open(f'api/tests/test_files/{feed_type}/well_formed.xml', 'r') as f: text = f.read() feed_handler.text_2_d(text)
def test_malformed(self): for feed_type in FeedHandlerTestCase.FEED_TYPES: text = None with open(f'api/tests/test_files/{feed_type}/malformed.xml', 'r') as f: text = f.read() with self.assertRaises(QueryException): feed_handler.text_2_d(text)
def _save_feed(url): response = None try: response = rss_requests.get(url) response.raise_for_status() except requests.exceptions.RequestException: raise QueryException('feed not found', 404) with transaction.atomic(): d = feed_handler.text_2_d(response.text) feed = feed_handler.d_feed_2_feed(d.feed, url) feed.with_subscription_data() feed.save() feed_entries = [] for d_entry in d.get('entries', []): feed_entry = None try: feed_entry = feed_handler.d_entry_2_feed_entry(d_entry) except ValueError: # pragma: no cover continue feed_entry.feed = feed feed_entries.append(feed_entry) models.FeedEntry.objects.bulk_create(feed_entries) return feed
def test_d_feed_2_feed_entry_no_content(self): text = None with open('api/tests/test_files/atom_1.0/well_formed_no_content.xml', 'r') as f: text = f.read() d = feed_handler.text_2_d(text) with self.assertRaises(ValueError): feed_handler.d_entry_2_feed_entry(d.entries[0])
def test_d_feed_2_feed_entry_plaintext(self): text = None with open('api/tests/test_files/atom_1.0/well_formed_text.xml', 'r') as f: text = f.read() d = feed_handler.text_2_d(text) feed_entry = feed_handler.d_entry_2_feed_entry(d.entries[0]) self.assertIsInstance(feed_entry, models.FeedEntry)
def test_d_feed_2_feed_entry(self): for feed_type in FeedHandlerTestCase.FEED_TYPES: text = None with open(f'api/tests/test_files/{feed_type}/well_formed.xml', 'r') as f: text = f.read() d = feed_handler.text_2_d(text) feed_entry = feed_handler.d_entry_2_feed_entry(d.entries[0]) self.assertIsInstance(feed_entry, models.FeedEntry)
def test_d_entry_2_entry_tags(self): for feed_type in FeedHandlerTestCase.FEED_TYPES: text = None with open(f'api/tests/test_files/{feed_type}/well_formed.xml', 'r') as f: text = f.read() d = feed_handler.text_2_d(text) entry_tags = feed_handler.d_entry_2_entry_tags(d.entries[0]) self.assertIs(type(entry_tags), frozenset) self.assertGreater(len(entry_tags), 0, f'{feed_type} is empty') for entry_tag in entry_tags: self.assertIs(type(entry_tag), str)
def test_d_feed_2_feed(self): for feed_type in FeedHandlerTestCase.FEED_TYPES: text = None with open(f'api/tests/test_files/{feed_type}/well_formed.xml', 'r') as f: text = f.read() d = feed_handler.text_2_d(text) url = 'http://www.example.com' feed = feed_handler.d_feed_2_feed(d.feed, url) self.assertEqual(feed.feed_url, url) self.assertEqual(feed.title, d.feed.get('title')) self.assertEqual(feed.home_url, d.feed.get('link'))
def scrape_feed(feed, response_text): d = feed_handler.text_2_d(response_text) new_feed_entries = [] for d_entry in d.get('entries', []): feed_entry = None try: feed_entry = feed_handler.d_entry_2_feed_entry(d_entry) except ValueError: # pragma: no cover continue old_feed_entry = None old_feed_entry_get_kwargs = { 'feed': feed, 'url': feed_entry.url, } if feed_entry.updated_at is None: old_feed_entry_get_kwargs['updated_at__isnull'] = True else: old_feed_entry_get_kwargs['updated_at'] = feed_entry.updated_at try: old_feed_entry = models.FeedEntry.objects.get( **old_feed_entry_get_kwargs) except models.FeedEntry.DoesNotExist: pass if old_feed_entry is not None: old_feed_entry.id = feed_entry.id old_feed_entry.content = feed_entry.content old_feed_entry.author_name = feed_entry.author_name old_feed_entry.created_at = feed_entry.created_at old_feed_entry.updated_at = feed_entry.updated_at old_feed_entry.save(update_fields=[ 'id', 'content', 'author_name', 'created_at', 'updated_at' ]) else: feed_entry.feed = feed new_feed_entries.append(feed_entry) models.FeedEntry.objects.bulk_create(new_feed_entries) feed.db_updated_at = datetime.datetime.utcnow()
def _generate_feed(url): # pragma: testing-subscription-setup-daemon-do-subscription response = rss_requests.get(url) response.raise_for_status() d = feed_handler.text_2_d(response.text) feed = feed_handler.d_feed_2_feed(d.feed, url) feed.save() feed_entries = [] for d_entry in d.get('entries', []): feed_entry = None try: feed_entry = feed_handler.d_entry_2_feed_entry(d_entry) except ValueError: continue feed_entry.feed = feed feed_entries.append(feed_entry) models.FeedEntry.objects.bulk_create(feed_entries) return feed
# monkey-patch the feed_handler logging feed_handler.logger = logger parser = argparse.ArgumentParser() parser.add_argument('feed_url') parser.add_argument('-s', '--save', action='store_true') parser.add_argument('-f', '--print-feed', action='store_true') parser.add_argument('-e', '--print-entries', action='store_true') args = parser.parse_args() response = rss_requests.get(args.feed_url) response.raise_for_status() d = feed_handler.text_2_d(response.text) feed = feed_handler.d_feed_2_feed(d.feed, args.feed_url) feed_entries = [] for index, d_entry in enumerate(d.get('entries', [])): feed_entry = None try: feed_entry = feed_handler.d_entry_2_feed_entry(d_entry) except ValueError: # pragma: no cover logger().exception(f'unable to parse d_entry {index}') continue feed_entry.feed = feed