Beispiel #1
0
def update_feed(parsed_items, feed_id):
    """
    Create item instances from parsed items and
    update/replace given feed's items & last_updated_at

    :param parsed_items: List - Items within feed
    :param feed_id: str - Feed's PK
    :return: None
    """
    if not parsed_items:
        return None

    feed = Feed.objects.get(pk=feed_id)
    feed.items.filter(bookmark=False).delete()

    items_list = []
    for item in parsed_items:
        items_list.append(
            Item(
                title=item.get("title"),
                link=item.get("link"),
                description=item.get("description"),
                summary=item.get("summary"),
                feed=feed,
                published_at=str_to_datetime(item.get("published")),
            ))
    Item.objects.bulk_create(items_list)
    feed.last_updated_at = dt.datetime.utcnow()
    feed.save()
Beispiel #2
0
    def import_feed(self, feed):
        from apps.feeds.models import Item

        feed_data = feedparser.parse(feed.rss_url)

        try:
            feed.last_modified = struct_to_datetime(feed_data.feed.updated_parsed)
        except:
            feed.last_modified = parse_date(
                feed_data.headers.get("last-modified", datetime.now().strftime("%a, %d %b %Y %H:%M:%S +0000"))
            )

        feed.save()

        items = set()
        for x_item in feed_data.entries:
            guid, last_modified = x_item.title, datetime(*x_item.date_parsed[:7])

            for i in items:
                if i.guid == guid:
                    item = i
                    break
            else:
                try:
                    item = Item.objects.get(guid=guid, feed=feed)
                except Item.DoesNotExist:
                    item = Item(guid=guid, last_modified=datetime(1900, 1, 1), feed=feed)

            if True or item.last_modified < last_modified:
                item.title = x_item.title
                item.description = sanitise_html(x_item.get("description", ""))
                item.link = x_item.link
                item.last_modified = last_modified
                item.save()

            items.add(item)

        for item in Item.objects.filter(feed=feed):
            if item not in items:
                item.delete()

        return items
Beispiel #3
0
    def import_feed(self, feed):
        from apps.feeds.models import Item
        import xml.etree.ElementTree as ET

        """
        获取美团网xml
        """
        feed_tmp = urllib2.urlopen(feed.rss_url, timeout=30).read()
        """
        获取根
        """
        feed_xml = ET.fromstring(feed_tmp)
        feed_data = feed_xml.getchildren()[0].findall("deal")
        """
        获取xml发布日期
        """
        try:
            feed.last_modified = struct_to_datetime(feed_xml.attrib["date"])
        except:
            feed.last_modified = parse_date(feed_xml.attrib["date"])

        feed.save()

        items = set()
        """
        提取xml中的内容
        """
        for x_item in feed_data:
            guid, last_modified = x_item.find("title").text, parse_date(feed_xml.attrib["date"])

            for i in items:
                if i.guid == guid:
                    item = i
                    break
            else:
                try:
                    item = Item.objects.get(guid=guid, feed=feed)
                except Item.DoesNotExist:
                    item = Item(guid=guid, last_modified=datetime(1900, 1, 1), feed=feed)

            if True or item.last_modified < last_modified:
                item.title = x_item.find("title").text
                item.description = sanitise_html(self.result_content(x_item))
                item.link = x_item.find("deal_url").text
                item.small_image = x_item.find("medium_image_url").text
                item.big_image = x_item.find("large_image_url").text
                item.last_modified = last_modified
                item.save()

            items.add(item)

        for item in Item.objects.filter(feed=feed):
            if item not in items:
                item.delete()

        return items