Ejemplo n.º 1
0
    def import_feed(self, feed):
        from apps.feeds.models import Item
        import xml.etree.ElementTree as ET

        """
        获取美团网xml
        """
        feed_tmp = urllib2.urlopen(feed.rss_url, timeout=30).read()
        """
        获取根
        """
        feed_xml = ET.fromstring(feed_tmp)
        feed_data = feed_xml.getchildren()[0].findall("deal")
        """
        获取xml发布日期
        """
        try:
            feed.last_modified = struct_to_datetime(feed_xml.attrib["date"])
        except:
            feed.last_modified = parse_date(feed_xml.attrib["date"])

        feed.save()

        items = set()
        """
        提取xml中的内容
        """
        for x_item in feed_data:
            guid, last_modified = x_item.find("title").text, parse_date(feed_xml.attrib["date"])

            for i in items:
                if i.guid == guid:
                    item = i
                    break
            else:
                try:
                    item = Item.objects.get(guid=guid, feed=feed)
                except Item.DoesNotExist:
                    item = Item(guid=guid, last_modified=datetime(1900, 1, 1), feed=feed)

            if True or item.last_modified < last_modified:
                item.title = x_item.find("title").text
                item.description = sanitise_html(self.result_content(x_item))
                item.link = x_item.find("deal_url").text
                item.small_image = x_item.find("medium_image_url").text
                item.big_image = x_item.find("large_image_url").text
                item.last_modified = last_modified
                item.save()

            items.add(item)

        for item in Item.objects.filter(feed=feed):
            if item not in items:
                item.delete()

        return items
Ejemplo n.º 2
0
    def import_feed(self, feed):
        from apps.feeds.models import Item

        feed_data = feedparser.parse(feed.rss_url)

        try:
            feed.last_modified = struct_to_datetime(feed_data.feed.updated_parsed)
        except:
            feed.last_modified = parse_date(
                feed_data.headers.get("last-modified", datetime.now().strftime("%a, %d %b %Y %H:%M:%S +0000"))
            )

        feed.save()

        items = set()
        for x_item in feed_data.entries:
            guid, last_modified = x_item.title, datetime(*x_item.date_parsed[:7])

            for i in items:
                if i.guid == guid:
                    item = i
                    break
            else:
                try:
                    item = Item.objects.get(guid=guid, feed=feed)
                except Item.DoesNotExist:
                    item = Item(guid=guid, last_modified=datetime(1900, 1, 1), feed=feed)

            if True or item.last_modified < last_modified:
                item.title = x_item.title
                item.description = sanitise_html(x_item.get("description", ""))
                item.link = x_item.link
                item.last_modified = last_modified
                item.save()

            items.add(item)

        for item in Item.objects.filter(feed=feed):
            if item not in items:
                item.delete()

        return items