Ejemplo n.º 1
0
    def test_simple_xml(self, mock):

        self._populate_mock(mock,
                            status=200,
                            test_file="rss_xhtml_body.xml",
                            content_type="application/rss+xml")

        ls = timezone.now()

        src = Source(name="test1",
                     feed_url=BASE_URL,
                     interval=0,
                     last_success=ls,
                     last_change=ls)
        src.save()

        # Read the feed once to get the 1 post  and the etag
        read_feed(src)
        src.refresh_from_db()

        self.assertEqual(src.status_code, 200)
        self.assertEqual(src.posts.count(), 1)  # got the one post
        self.assertEqual(src.interval, 60)
        self.assertEqual(src.etag, "an-etag")
        self.assertNotEqual(src.last_success, ls)
        self.assertNotEqual(src.last_change, ls)
Ejemplo n.º 2
0
    def test_etags(self, mock):

        self._populate_mock(mock,
                            status=200,
                            test_file="rss_xhtml_body.xml",
                            content_type="application/xml+rss")
        self._populate_mock(mock,
                            status=304,
                            test_file="empty_file.txt",
                            content_type="application/xml+rss",
                            etag="an-etag")

        src = Source(name="test1", feed_url=BASE_URL, interval=0)
        src.save()

        # Read the feed once to get the 1 post  and the etag
        read_feed(src)
        self.assertEqual(src.status_code, 200)
        self.assertEqual(src.post_set.count(), 1)  # got the one post
        self.assertEqual(src.interval, 60)
        self.assertEqual(src.etag, "an-etag")

        # Read the feed again to get a 304 and a small increment to the interval
        read_feed(src)
        self.assertEqual(src.post_set.count(), 1)  # should have no more
        self.assertEqual(src.status_code, 304)
        self.assertEqual(src.interval, 70)
        self.assertTrue(src.live)
Ejemplo n.º 3
0
    def test_perm_redirect(self, mock):

        new_url = "http://new.feed.com/"
        self._populate_mock(mock,
                            status=301,
                            test_file="empty_file.txt",
                            content_type="text/plain",
                            headers={"Location": new_url})
        self._populate_mock(mock,
                            status=200,
                            test_file="rss_xhtml_body.xml",
                            content_type="application/xml+rss",
                            url=new_url)

        src = Source(name="test1", feed_url=BASE_URL, interval=0)
        src.save()

        read_feed(src)
        self.assertEqual(src.status_code, 301)
        self.assertEqual(src.interval, 60)
        self.assertEqual(src.feed_url, new_url)

        read_feed(src)
        self.assertEqual(src.status_code, 200)
        self.assertEqual(src.post_set.count(), 1)
        self.assertEqual(src.interval, 60)
        self.assertTrue(src.live)
Ejemplo n.º 4
0
    def test_fucking_cloudflare(self, mock):

        self._populate_mock(mock,
                            status=200,
                            test_file="json_simple_two_entry.json",
                            content_type="application/json")
        self._populate_mock(mock,
                            status=403,
                            test_file="json_simple_two_entry.json",
                            content_type="application/json",
                            is_cloudflare=True)

        src = Source(name="test1",
                     feed_url=BASE_URL,
                     interval=0,
                     is_cloudflare=False)
        src.save()

        # Read the feed once to get the 1 post  and the etag
        read_feed(src)
        self.assertEqual(src.status_code, 403)

        src = Source(name="test1",
                     feed_url=BASE_URL,
                     interval=0,
                     is_cloudflare=True)
        src.save()

        # Read the feed once to get the 1 post  and the etag
        read_feed(src)
        self.assertEqual(src.status_code, 200)
Ejemplo n.º 5
0
    def test_temp_redirect(self, mock):

        new_url = "http://new.feed.com/"
        self._populate_mock(mock,
                            status=302,
                            test_file="empty_file.txt",
                            content_type="text/plain",
                            headers={"Location": new_url})
        self._populate_mock(mock,
                            status=200,
                            test_file="rss_xhtml_body.xml",
                            content_type="application/xml+rss",
                            url=new_url)

        src = Source(name="test1", feed_url=BASE_URL, interval=0)
        src.save()

        self.assertIsNone(src.last_302_start)

        read_feed(src)
        src.refresh_from_db()

        self.assertEqual(src.status_code, 200)
        self.assertEqual(src.last_302_url, new_url)  # this is where  went
        self.assertIsNotNone(src.last_302_start)
        self.assertEqual(src.posts.count(),
                         1)  # after following redirect will have 1 post
        self.assertEqual(src.interval, 60)
        self.assertTrue(src.live)

        # do it all again -  shouldn't change
        read_feed(src)
        src.refresh_from_db()

        self.assertEqual(src.status_code,
                         200)  # it returned a page, but not a  feed
        self.assertEqual(src.last_302_url, new_url)  # this is where  went
        self.assertIsNotNone(src.last_302_start)
        self.assertEqual(src.posts.count(),
                         1)  # after following redirect will have 1 post
        self.assertEqual(src.interval, 80)
        self.assertTrue(src.live)

        # now we test making it permaent
        src.last_302_start = timezone.now() - timedelta(days=365)
        src.save()
        read_feed(src)
        src.refresh_from_db()

        self.assertEqual(src.status_code, 200)
        self.assertEqual(src.last_302_url, ' ')
        self.assertIsNone(src.last_302_start)
        self.assertEqual(src.posts.count(), 1)
        self.assertEqual(src.interval, 100)
        self.assertEqual(src.feed_url, new_url)
        self.assertTrue(src.live)
Ejemplo n.º 6
0
def importopml(request):
    theFile = request.FILES["opml"].read()

    count = 0
    dom = minidom.parseString(theFile)
    imported = []

    sources = dom.getElementsByTagName("outline")
    for s in sources:

        url = s.getAttribute("xmlUrl")
        if url.strip() != "":
            ns = Source.objects.filter(feed_url=url)
            if ns.count() > 0:

                # feed already exists - so there may already be a user subscription for it
                ns = ns[0]
                us = Subscription.objects.filter(source=ns).filter(
                    user=request.user)
                if us.count() == 0:
                    us = Subscription(source=ns,
                                      user=request.user,
                                      name=ns.display_name)

                    if ns.max_index > 10:  # don't flood people with all these old things
                        us.last_read = ns.max_index - 10

                    us.save()
                    count += 1

                ns.num_subs = ns.subscriptions.count()
                ns.save()

            else:
                # Feed does not already exist it must also be a new sub
                ns = Source()
                ns.due_poll = datetime.datetime.utcnow().replace(tzinfo=utc)
                ns.site_url = s.getAttribute("htmlUrl")
                ns.feed_url = url  # probably best to see that there isn't a match here :)
                ns.name = s.getAttribute("title")
                ns.save()

                us = Subscription(source=ns,
                                  user=request.user,
                                  name=ns.display_name)
                us.save()

                count += 1

            imported.append(ns)

    vals = {}
    vals["imported"] = imported
    vals["count"] = count
    return render(request, 'importopml.html', vals)
 def create(self, validated_data):
     """
     main purpose of thi overwritten method is to add request.user to the Source model
     @param validated_data: inherited parameter
     @return: instance
     """
     user = validated_data.get("user")
     request = self.context.get("request")
     if not user and request:
         validated_data.update({"user": request.user})
     instance = Source(**validated_data)
     instance.save()
     return instance
Ejemplo n.º 8
0
    def test_feed_gone(self, mock):

        self._populate_mock(mock,
                            status=410,
                            test_file="empty_file.txt",
                            content_type="text/plain")

        src = Source(name="test1", feed_url=BASE_URL, interval=0)
        src.save()

        read_feed(src)
        self.assertEqual(src.status_code,
                         410)  # it returned a page, but not a  feed
        self.assertEqual(src.post_set.count(), 0)  # can't have got any
        self.assertFalse(src.live)
Ejemplo n.º 9
0
    def test_server_error_2(self, mock):

        self._populate_mock(mock,
                            status=503,
                            test_file="empty_file.txt",
                            content_type="text/plain")

        src = Source(name="test1", feed_url=BASE_URL, interval=0)
        src.save()

        read_feed(src)
        self.assertEqual(src.status_code, 503)  # error!
        self.assertEqual(src.post_set.count(), 0)  # can't have got any
        self.assertTrue(src.live)
        self.assertEqual(src.interval, 120)
class RSSFeedWidget(DashboardWidget):
    template = "dashboardfeeds/rss.html"

    url = models.URLField(verbose_name=_("RSS feed source URL"))
    base_url = models.URLField(
        verbose_name=_("Base URL of related website"),
        help_text=_("The widget will have a link to visit a related website to read more news"),
    )
    text_only = models.BooleanField(
        verbose_name=_("Text only"),
        help_text=_("Do not show an image to depict the news item"),
        default=False,
    )

    rss_source = models.ForeignKey(Source, on_delete=models.CASCADE, editable=False, null=True)

    def save(self, *args, **kwargs):
        # Update the linked RSS source object to transfer data into django-feeds
        if not self.rss_source:
            self.rss_source = Source()
        self.rss_source.name = self.title
        self.rss_source.feed_url = self.url
        self.rss_source.site_url = self.base_url

        self.rss_source.last_success = datetime.datetime.utcnow()
        self.rss_source.last_change = datetime.datetime.utcnow()

        self.rss_source.live = self.active
        self.rss_source.save()

        super().save(*args, **kwargs)

    def get_context(self, request):
        posts = self.rss_source.posts.all().order_by("-created")
        post = posts[0] if len(posts) > 0 else None
        feed = {
            "title": self.title,
            "url": self.rss_source.feed_url,
            "base_url": self.rss_source.site_url,
            "base_image": self.rss_source.image_url,
            "result": post,
            "hide_image": self.text_only,
        }
        return feed

    class Meta:
        verbose_name = _("RSS Widget")
        verbose_name_plural = _("RSS Widgets")
Ejemplo n.º 11
0
    def test_simple_json(self, mock):

        self._populate_mock(mock,
                            status=200,
                            test_file="json_simple_two_entry.json",
                            content_type="application/json")

        src = Source(name="test1", feed_url=BASE_URL, interval=0)
        src.save()

        # Read the feed once to get the 1 post  and the etag
        read_feed(src)
        self.assertEqual(src.status_code, 200)
        self.assertEqual(src.posts.count(), 2)  # got the one post
        self.assertEqual(src.interval, 60)
        self.assertEqual(src.etag, "an-etag")
Ejemplo n.º 12
0
    def test_sanitize_1(self, mock):

        self._populate_mock(mock,
                            status=200,
                            test_file="json_simple_two_entry.json",
                            content_type="application/json")

        src = Source(name="test1", feed_url=BASE_URL, interval=0)
        src.save()

        # Read the feed once to get the 1 post  and the etag
        read_feed(src)
        self.assertEqual(src.status_code, 200)
        p = src.post_set.all()[0]

        self.assertFalse("<script>" in p.body)
Ejemplo n.º 13
0
    def test_not_a_feed(self, mock):

        self._populate_mock(mock,
                            status=200,
                            test_file="spurious_text_file.txt",
                            content_type="text/plain")

        src = Source(name="test1", feed_url=BASE_URL, interval=0)
        src.save()

        read_feed(src)
        self.assertEqual(src.status_code,
                         200)  # it returned a page, but not a  feed
        self.assertEqual(src.post_set.count(), 0)  # can't have got any
        self.assertEqual(src.interval, 120)
        self.assertTrue(src.live)
Ejemplo n.º 14
0
    def test_simple_xml(self, mock):

        self._populate_mock(mock,
                            status=200,
                            test_file="rss_xhtml_body.xml",
                            content_type="application/rss+xml")

        src = Source(name="test1", feed_url=BASE_URL, interval=0)
        src.save()

        # Read the feed once to get the 1 post  and the etag
        read_feed(src)
        self.assertEqual(src.status_code, 200)
        self.assertEqual(src.post_set.count(), 1)  # got the one post
        self.assertEqual(src.interval, 60)
        self.assertEqual(src.etag, "an-etag")
Ejemplo n.º 15
0
    def test_podcast(self, mock):

        self._populate_mock(mock,
                            status=200,
                            test_file="podcast.json",
                            content_type="application/json")

        src = Source(name="test1", feed_url=BASE_URL, interval=0)
        src.save()

        # read the feed to update the name
        read_feed(src)
        self.assertEqual(src.status_code, 200)

        post = src.post_set.all()[0]

        self.assertEqual(post.enclosures.count(), 1)
Ejemplo n.º 16
0
    def test_feed_not_found(self, mock):

        self._populate_mock(mock,
                            status=404,
                            test_file="empty_file.txt",
                            content_type="text/plain")

        src = Source(name="test1", feed_url=BASE_URL, interval=0)
        src.save()

        read_feed(src)
        src.refresh_from_db()

        self.assertEqual(src.status_code,
                         404)  # it returned a page, but not a  feed
        self.assertEqual(src.posts.count(), 0)  # can't have got any
        self.assertTrue(src.live)
        self.assertEqual(src.interval, 120)
Ejemplo n.º 17
0
    def test_sanitize_2(self, mock):
        """
            Another test that the sanitization is going on.  This time we have 
            stolen a test case from the feedparser libarary
        """

        self._populate_mock(mock,
                            status=200,
                            test_file="sanitizer_bad_comment.json",
                            content_type="application/json")

        src = Source(name="test1", feed_url=BASE_URL, interval=0)
        src.save()

        # read the feed to update the name
        read_feed(src)
        self.assertEqual(src.status_code, 200)
        self.assertEqual(src.name, "safe")
Ejemplo n.º 18
0
    def test_podcast(self, mock):

        self._populate_mock(mock,
                            status=200,
                            test_file="podcast.xml",
                            content_type="application/rss+xml")

        src = Source(name="test1", feed_url=BASE_URL, interval=0)
        src.save()

        # Read the feed once to get the 1 post  and the etag
        read_feed(src)

        self.assertEqual(
            src.description,
            'SU: Three nerds discussing tech, Apple, programming, and loosely related matters.'
        )

        self.assertEqual(src.post_set.all()[0].enclosures.count(), 1)
Ejemplo n.º 19
0
    def test_sanitize_1(self, mock):
        """
            Make sure feedparser's sanitization is running
        """

        self._populate_mock(mock,
                            status=200,
                            test_file="rss_xhtml_body.xml",
                            content_type="application/rss+xml")

        src = Source(name="test1", feed_url=BASE_URL, interval=0)
        src.save()

        # Read the feed once to get the 1 post  and the etag
        read_feed(src)
        self.assertEqual(src.status_code, 200)
        p = src.post_set.all()[0]

        self.assertFalse("<script>" in p.body)
Ejemplo n.º 20
0
    def test_sanitize_attrs(self, mock):

        self._populate_mock(mock,
                            status=200,
                            test_file="sanitizer_img_attrs.xml",
                            content_type="application/rss+xml")

        src = Source(name="test1", feed_url=BASE_URL, interval=0)
        src.save()

        # read the feed to update the name
        read_feed(src)
        self.assertEqual(src.status_code, 200)

        body = src.post_set.all()[0].body

        self.assertTrue("<img" in body)
        self.assertFalse("align=" in body)
        self.assertFalse("hspace=" in body)
Ejemplo n.º 21
0
    def test_permission_denied(self, mock):

        self._populate_mock(mock,
                            status=403,
                            test_file="empty_file.txt",
                            content_type="text/plain")

        ls = timezone.now()

        src = Source(name="test1",
                     feed_url=BASE_URL,
                     interval=0,
                     last_success=ls)
        src.save()

        read_feed(src)
        src.refresh_from_db()

        self.assertEqual(src.status_code,
                         403)  # it returned a page, but not a  feed
        self.assertEqual(src.posts.count(), 0)  # can't have got any
        self.assertFalse(src.live)
Ejemplo n.º 22
0
def addfeed(request):
    try:
        feed = ""
        if request.method == 'GET':
            if request.GET.__contains__("feed"):
                feed = request.GET["feed"]
            groups = Subscription.objects.filter(
                Q(user=request.user) & Q(source=None))

            return render(request, "addfeed.html", {
                "feed": feed,
                "groups": groups
            })

        else:

            if request.POST.__contains__("feed"):
                feed = request.POST["feed"]

            headers = {
                "User-Agent":
                "FeedThing/3.3 (+http://%s; Initial Feed Crawler)" %
                request.META["HTTP_HOST"],
                "Cache-Control":
                "no-cache,max-age=0",
                "Pragma":
                "no-cache"
            }  # identify ourselves and also stop our requests getting picked up by google's cache

            ret = requests.get(feed, headers=headers, verify=False, timeout=15)
            # can I be bothered to check return codes here?  I think not on balance

            isFeed = False

            content_type = "Not Set"
            if "Content-Type" in ret.headers:
                content_type = ret.headers["Content-Type"]

            feed_title = feed

            body = ret.text.strip()
            if "xml" in content_type or body[0:1] == "<":
                ff = feedparser.parse(
                    body)  # are we a feed?  # imported by django-feed-reader
                isFeed = (len(ff.entries) > 0)
                if isFeed:
                    feed_title = ff.feed.title
            if "json" in content_type or body[0:1] == "{":
                data = json.loads(body)
                isFeed = "items" in data and len(data["items"]) > 0
                if isFeed:
                    feed_title = data["title"]

            if not isFeed:

                soup = BeautifulSoup(body)
                feedcount = 0
                rethtml = ""
                for l in soup.findAll(name='link'):
                    if l.has_attr("rel") and l.has_attr("type"):
                        if l['rel'][0] == "alternate" and (
                                l['type'] == 'application/atom+xml'
                                or l['type'] == 'application/rss+xml'
                                or l['type'] == 'application/json'):
                            feedcount += 1
                            try:
                                name = l['title']
                            except Exception as ex:
                                name = "Feed %d" % feedcount
                            rethtml += '<li><form method="post" onsubmit="return false;"> <input type="hidden" name="feed" id="feed-%d" value="%s"><a href="#" onclick="addFeed(%d)" class="btn btn-xs btn-default">Subscribe</a> - %s</form></li>' % (
                                feedcount, urljoin(feed,
                                                   l['href']), feedcount, name)
                            feed = urljoin(
                                feed, l['href']
                            )  # store this in case there is only one feed and we wind up importing it
                            # TODO: need to accout for relative URLs here
                # if feedcount == 1:
                # just 1 feed found, let's import it now

                #   ret = fetch(f)
                #   isFeed = True
                if feedcount == 0:
                    return HttpResponse("No feeds found")
                else:
                    return HttpResponse(rethtml)

            if isFeed:
                parent = None
                if request.POST["group"] != "0":
                    parent = get_object_or_404(Subscription,
                                               id=int(request.POST["group"]))
                    if parent.user != request.user:
                        return HttpResponse(
                            "<div>Internal error.<!--bad group --></div>")

                s = Source.objects.filter(feed_url=feed)
                if s.count() > 0:
                    # feed already exists
                    s = s[0]
                    us = Subscription.objects.filter(
                        Q(user=request.user) & Q(source=s))
                    if us.count() > 0:
                        return HttpResponse(
                            "<div>Already subscribed to this feed </div>")
                    else:
                        us = Subscription(source=s,
                                          user=request.user,
                                          name=s.display_name,
                                          parent=parent)

                        if s.max_index > 10:  # don't flood people with all these old things
                            us.last_read = s.max_index - 10

                        us.save()

                        s.num_subs = s.subscriptions.count()
                        s.save()

                        return HttpResponse("<div>Imported feed %s</div>" %
                                            us.name)

                # need to start checking feed parser errors here
                ns = Source()
                # ns.due_poll = datetime.datetime.utcnow().replace(tzinfo=utc)

                # print request.POST["river"]
                # ns.inRiver = (request.POST["river"] == "yes")

                ns.name = feed_title
                ns.feed_url = feed

                ns.save()

                us = Subscription(source=ns,
                                  user=request.user,
                                  name=ns.display_name,
                                  parent=parent)
                us.save()

                # you see really, I could parse out the items here and insert them rather than wait for them to come back round in the refresh cycle

                return HttpResponse("<div>Imported feed %s</div>" % ns.name)
    except Exception as xx:
        return HttpResponse("<div>Error %s: %s</div>" %
                            (xx.__class__.__name__, str(xx)))