def test_simple_xml(self, mock): self._populate_mock(mock, status=200, test_file="rss_xhtml_body.xml", content_type="application/rss+xml") ls = timezone.now() src = Source(name="test1", feed_url=BASE_URL, interval=0, last_success=ls, last_change=ls) src.save() # Read the feed once to get the 1 post and the etag read_feed(src) src.refresh_from_db() self.assertEqual(src.status_code, 200) self.assertEqual(src.posts.count(), 1) # got the one post self.assertEqual(src.interval, 60) self.assertEqual(src.etag, "an-etag") self.assertNotEqual(src.last_success, ls) self.assertNotEqual(src.last_change, ls)
def test_etags(self, mock): self._populate_mock(mock, status=200, test_file="rss_xhtml_body.xml", content_type="application/xml+rss") self._populate_mock(mock, status=304, test_file="empty_file.txt", content_type="application/xml+rss", etag="an-etag") src = Source(name="test1", feed_url=BASE_URL, interval=0) src.save() # Read the feed once to get the 1 post and the etag read_feed(src) self.assertEqual(src.status_code, 200) self.assertEqual(src.post_set.count(), 1) # got the one post self.assertEqual(src.interval, 60) self.assertEqual(src.etag, "an-etag") # Read the feed again to get a 304 and a small increment to the interval read_feed(src) self.assertEqual(src.post_set.count(), 1) # should have no more self.assertEqual(src.status_code, 304) self.assertEqual(src.interval, 70) self.assertTrue(src.live)
def test_perm_redirect(self, mock): new_url = "http://new.feed.com/" self._populate_mock(mock, status=301, test_file="empty_file.txt", content_type="text/plain", headers={"Location": new_url}) self._populate_mock(mock, status=200, test_file="rss_xhtml_body.xml", content_type="application/xml+rss", url=new_url) src = Source(name="test1", feed_url=BASE_URL, interval=0) src.save() read_feed(src) self.assertEqual(src.status_code, 301) self.assertEqual(src.interval, 60) self.assertEqual(src.feed_url, new_url) read_feed(src) self.assertEqual(src.status_code, 200) self.assertEqual(src.post_set.count(), 1) self.assertEqual(src.interval, 60) self.assertTrue(src.live)
def test_fucking_cloudflare(self, mock): self._populate_mock(mock, status=200, test_file="json_simple_two_entry.json", content_type="application/json") self._populate_mock(mock, status=403, test_file="json_simple_two_entry.json", content_type="application/json", is_cloudflare=True) src = Source(name="test1", feed_url=BASE_URL, interval=0, is_cloudflare=False) src.save() # Read the feed once to get the 1 post and the etag read_feed(src) self.assertEqual(src.status_code, 403) src = Source(name="test1", feed_url=BASE_URL, interval=0, is_cloudflare=True) src.save() # Read the feed once to get the 1 post and the etag read_feed(src) self.assertEqual(src.status_code, 200)
def test_temp_redirect(self, mock): new_url = "http://new.feed.com/" self._populate_mock(mock, status=302, test_file="empty_file.txt", content_type="text/plain", headers={"Location": new_url}) self._populate_mock(mock, status=200, test_file="rss_xhtml_body.xml", content_type="application/xml+rss", url=new_url) src = Source(name="test1", feed_url=BASE_URL, interval=0) src.save() self.assertIsNone(src.last_302_start) read_feed(src) src.refresh_from_db() self.assertEqual(src.status_code, 200) self.assertEqual(src.last_302_url, new_url) # this is where went self.assertIsNotNone(src.last_302_start) self.assertEqual(src.posts.count(), 1) # after following redirect will have 1 post self.assertEqual(src.interval, 60) self.assertTrue(src.live) # do it all again - shouldn't change read_feed(src) src.refresh_from_db() self.assertEqual(src.status_code, 200) # it returned a page, but not a feed self.assertEqual(src.last_302_url, new_url) # this is where went self.assertIsNotNone(src.last_302_start) self.assertEqual(src.posts.count(), 1) # after following redirect will have 1 post self.assertEqual(src.interval, 80) self.assertTrue(src.live) # now we test making it permaent src.last_302_start = timezone.now() - timedelta(days=365) src.save() read_feed(src) src.refresh_from_db() self.assertEqual(src.status_code, 200) self.assertEqual(src.last_302_url, ' ') self.assertIsNone(src.last_302_start) self.assertEqual(src.posts.count(), 1) self.assertEqual(src.interval, 100) self.assertEqual(src.feed_url, new_url) self.assertTrue(src.live)
def importopml(request): theFile = request.FILES["opml"].read() count = 0 dom = minidom.parseString(theFile) imported = [] sources = dom.getElementsByTagName("outline") for s in sources: url = s.getAttribute("xmlUrl") if url.strip() != "": ns = Source.objects.filter(feed_url=url) if ns.count() > 0: # feed already exists - so there may already be a user subscription for it ns = ns[0] us = Subscription.objects.filter(source=ns).filter( user=request.user) if us.count() == 0: us = Subscription(source=ns, user=request.user, name=ns.display_name) if ns.max_index > 10: # don't flood people with all these old things us.last_read = ns.max_index - 10 us.save() count += 1 ns.num_subs = ns.subscriptions.count() ns.save() else: # Feed does not already exist it must also be a new sub ns = Source() ns.due_poll = datetime.datetime.utcnow().replace(tzinfo=utc) ns.site_url = s.getAttribute("htmlUrl") ns.feed_url = url # probably best to see that there isn't a match here :) ns.name = s.getAttribute("title") ns.save() us = Subscription(source=ns, user=request.user, name=ns.display_name) us.save() count += 1 imported.append(ns) vals = {} vals["imported"] = imported vals["count"] = count return render(request, 'importopml.html', vals)
def create(self, validated_data): """ main purpose of thi overwritten method is to add request.user to the Source model @param validated_data: inherited parameter @return: instance """ user = validated_data.get("user") request = self.context.get("request") if not user and request: validated_data.update({"user": request.user}) instance = Source(**validated_data) instance.save() return instance
def test_feed_gone(self, mock): self._populate_mock(mock, status=410, test_file="empty_file.txt", content_type="text/plain") src = Source(name="test1", feed_url=BASE_URL, interval=0) src.save() read_feed(src) self.assertEqual(src.status_code, 410) # it returned a page, but not a feed self.assertEqual(src.post_set.count(), 0) # can't have got any self.assertFalse(src.live)
def test_server_error_2(self, mock): self._populate_mock(mock, status=503, test_file="empty_file.txt", content_type="text/plain") src = Source(name="test1", feed_url=BASE_URL, interval=0) src.save() read_feed(src) self.assertEqual(src.status_code, 503) # error! self.assertEqual(src.post_set.count(), 0) # can't have got any self.assertTrue(src.live) self.assertEqual(src.interval, 120)
class RSSFeedWidget(DashboardWidget): template = "dashboardfeeds/rss.html" url = models.URLField(verbose_name=_("RSS feed source URL")) base_url = models.URLField( verbose_name=_("Base URL of related website"), help_text=_("The widget will have a link to visit a related website to read more news"), ) text_only = models.BooleanField( verbose_name=_("Text only"), help_text=_("Do not show an image to depict the news item"), default=False, ) rss_source = models.ForeignKey(Source, on_delete=models.CASCADE, editable=False, null=True) def save(self, *args, **kwargs): # Update the linked RSS source object to transfer data into django-feeds if not self.rss_source: self.rss_source = Source() self.rss_source.name = self.title self.rss_source.feed_url = self.url self.rss_source.site_url = self.base_url self.rss_source.last_success = datetime.datetime.utcnow() self.rss_source.last_change = datetime.datetime.utcnow() self.rss_source.live = self.active self.rss_source.save() super().save(*args, **kwargs) def get_context(self, request): posts = self.rss_source.posts.all().order_by("-created") post = posts[0] if len(posts) > 0 else None feed = { "title": self.title, "url": self.rss_source.feed_url, "base_url": self.rss_source.site_url, "base_image": self.rss_source.image_url, "result": post, "hide_image": self.text_only, } return feed class Meta: verbose_name = _("RSS Widget") verbose_name_plural = _("RSS Widgets")
def test_simple_json(self, mock): self._populate_mock(mock, status=200, test_file="json_simple_two_entry.json", content_type="application/json") src = Source(name="test1", feed_url=BASE_URL, interval=0) src.save() # Read the feed once to get the 1 post and the etag read_feed(src) self.assertEqual(src.status_code, 200) self.assertEqual(src.posts.count(), 2) # got the one post self.assertEqual(src.interval, 60) self.assertEqual(src.etag, "an-etag")
def test_sanitize_1(self, mock): self._populate_mock(mock, status=200, test_file="json_simple_two_entry.json", content_type="application/json") src = Source(name="test1", feed_url=BASE_URL, interval=0) src.save() # Read the feed once to get the 1 post and the etag read_feed(src) self.assertEqual(src.status_code, 200) p = src.post_set.all()[0] self.assertFalse("<script>" in p.body)
def test_not_a_feed(self, mock): self._populate_mock(mock, status=200, test_file="spurious_text_file.txt", content_type="text/plain") src = Source(name="test1", feed_url=BASE_URL, interval=0) src.save() read_feed(src) self.assertEqual(src.status_code, 200) # it returned a page, but not a feed self.assertEqual(src.post_set.count(), 0) # can't have got any self.assertEqual(src.interval, 120) self.assertTrue(src.live)
def test_simple_xml(self, mock): self._populate_mock(mock, status=200, test_file="rss_xhtml_body.xml", content_type="application/rss+xml") src = Source(name="test1", feed_url=BASE_URL, interval=0) src.save() # Read the feed once to get the 1 post and the etag read_feed(src) self.assertEqual(src.status_code, 200) self.assertEqual(src.post_set.count(), 1) # got the one post self.assertEqual(src.interval, 60) self.assertEqual(src.etag, "an-etag")
def test_podcast(self, mock): self._populate_mock(mock, status=200, test_file="podcast.json", content_type="application/json") src = Source(name="test1", feed_url=BASE_URL, interval=0) src.save() # read the feed to update the name read_feed(src) self.assertEqual(src.status_code, 200) post = src.post_set.all()[0] self.assertEqual(post.enclosures.count(), 1)
def test_feed_not_found(self, mock): self._populate_mock(mock, status=404, test_file="empty_file.txt", content_type="text/plain") src = Source(name="test1", feed_url=BASE_URL, interval=0) src.save() read_feed(src) src.refresh_from_db() self.assertEqual(src.status_code, 404) # it returned a page, but not a feed self.assertEqual(src.posts.count(), 0) # can't have got any self.assertTrue(src.live) self.assertEqual(src.interval, 120)
def test_sanitize_2(self, mock): """ Another test that the sanitization is going on. This time we have stolen a test case from the feedparser libarary """ self._populate_mock(mock, status=200, test_file="sanitizer_bad_comment.json", content_type="application/json") src = Source(name="test1", feed_url=BASE_URL, interval=0) src.save() # read the feed to update the name read_feed(src) self.assertEqual(src.status_code, 200) self.assertEqual(src.name, "safe")
def test_podcast(self, mock): self._populate_mock(mock, status=200, test_file="podcast.xml", content_type="application/rss+xml") src = Source(name="test1", feed_url=BASE_URL, interval=0) src.save() # Read the feed once to get the 1 post and the etag read_feed(src) self.assertEqual( src.description, 'SU: Three nerds discussing tech, Apple, programming, and loosely related matters.' ) self.assertEqual(src.post_set.all()[0].enclosures.count(), 1)
def test_sanitize_1(self, mock): """ Make sure feedparser's sanitization is running """ self._populate_mock(mock, status=200, test_file="rss_xhtml_body.xml", content_type="application/rss+xml") src = Source(name="test1", feed_url=BASE_URL, interval=0) src.save() # Read the feed once to get the 1 post and the etag read_feed(src) self.assertEqual(src.status_code, 200) p = src.post_set.all()[0] self.assertFalse("<script>" in p.body)
def test_sanitize_attrs(self, mock): self._populate_mock(mock, status=200, test_file="sanitizer_img_attrs.xml", content_type="application/rss+xml") src = Source(name="test1", feed_url=BASE_URL, interval=0) src.save() # read the feed to update the name read_feed(src) self.assertEqual(src.status_code, 200) body = src.post_set.all()[0].body self.assertTrue("<img" in body) self.assertFalse("align=" in body) self.assertFalse("hspace=" in body)
def test_permission_denied(self, mock): self._populate_mock(mock, status=403, test_file="empty_file.txt", content_type="text/plain") ls = timezone.now() src = Source(name="test1", feed_url=BASE_URL, interval=0, last_success=ls) src.save() read_feed(src) src.refresh_from_db() self.assertEqual(src.status_code, 403) # it returned a page, but not a feed self.assertEqual(src.posts.count(), 0) # can't have got any self.assertFalse(src.live)
def addfeed(request): try: feed = "" if request.method == 'GET': if request.GET.__contains__("feed"): feed = request.GET["feed"] groups = Subscription.objects.filter( Q(user=request.user) & Q(source=None)) return render(request, "addfeed.html", { "feed": feed, "groups": groups }) else: if request.POST.__contains__("feed"): feed = request.POST["feed"] headers = { "User-Agent": "FeedThing/3.3 (+http://%s; Initial Feed Crawler)" % request.META["HTTP_HOST"], "Cache-Control": "no-cache,max-age=0", "Pragma": "no-cache" } # identify ourselves and also stop our requests getting picked up by google's cache ret = requests.get(feed, headers=headers, verify=False, timeout=15) # can I be bothered to check return codes here? I think not on balance isFeed = False content_type = "Not Set" if "Content-Type" in ret.headers: content_type = ret.headers["Content-Type"] feed_title = feed body = ret.text.strip() if "xml" in content_type or body[0:1] == "<": ff = feedparser.parse( body) # are we a feed? # imported by django-feed-reader isFeed = (len(ff.entries) > 0) if isFeed: feed_title = ff.feed.title if "json" in content_type or body[0:1] == "{": data = json.loads(body) isFeed = "items" in data and len(data["items"]) > 0 if isFeed: feed_title = data["title"] if not isFeed: soup = BeautifulSoup(body) feedcount = 0 rethtml = "" for l in soup.findAll(name='link'): if l.has_attr("rel") and l.has_attr("type"): if l['rel'][0] == "alternate" and ( l['type'] == 'application/atom+xml' or l['type'] == 'application/rss+xml' or l['type'] == 'application/json'): feedcount += 1 try: name = l['title'] except Exception as ex: name = "Feed %d" % feedcount rethtml += '<li><form method="post" onsubmit="return false;"> <input type="hidden" name="feed" id="feed-%d" value="%s"><a href="#" onclick="addFeed(%d)" class="btn btn-xs btn-default">Subscribe</a> - %s</form></li>' % ( feedcount, urljoin(feed, l['href']), feedcount, name) feed = urljoin( feed, l['href'] ) # store this in case there is only one feed and we wind up importing it # TODO: need to accout for relative URLs here # if feedcount == 1: # just 1 feed found, let's import it now # ret = fetch(f) # isFeed = True if feedcount == 0: return HttpResponse("No feeds found") else: return HttpResponse(rethtml) if isFeed: parent = None if request.POST["group"] != "0": parent = get_object_or_404(Subscription, id=int(request.POST["group"])) if parent.user != request.user: return HttpResponse( "<div>Internal error.<!--bad group --></div>") s = Source.objects.filter(feed_url=feed) if s.count() > 0: # feed already exists s = s[0] us = Subscription.objects.filter( Q(user=request.user) & Q(source=s)) if us.count() > 0: return HttpResponse( "<div>Already subscribed to this feed </div>") else: us = Subscription(source=s, user=request.user, name=s.display_name, parent=parent) if s.max_index > 10: # don't flood people with all these old things us.last_read = s.max_index - 10 us.save() s.num_subs = s.subscriptions.count() s.save() return HttpResponse("<div>Imported feed %s</div>" % us.name) # need to start checking feed parser errors here ns = Source() # ns.due_poll = datetime.datetime.utcnow().replace(tzinfo=utc) # print request.POST["river"] # ns.inRiver = (request.POST["river"] == "yes") ns.name = feed_title ns.feed_url = feed ns.save() us = Subscription(source=ns, user=request.user, name=ns.display_name, parent=parent) us.save() # you see really, I could parse out the items here and insert them rather than wait for them to come back round in the refresh cycle return HttpResponse("<div>Imported feed %s</div>" % ns.name) except Exception as xx: return HttpResponse("<div>Error %s: %s</div>" % (xx.__class__.__name__, str(xx)))