def update_feed(parsed_items, feed_id): """ Create item instances from parsed items and update/replace given feed's items & last_updated_at :param parsed_items: List - Items within feed :param feed_id: str - Feed's PK :return: None """ if not parsed_items: return None feed = Feed.objects.get(pk=feed_id) feed.items.filter(bookmark=False).delete() items_list = [] for item in parsed_items: items_list.append( Item( title=item.get("title"), link=item.get("link"), description=item.get("description"), summary=item.get("summary"), feed=feed, published_at=str_to_datetime(item.get("published")), )) Item.objects.bulk_create(items_list) feed.last_updated_at = dt.datetime.utcnow() feed.save()
def import_feed(self, feed): from apps.feeds.models import Item feed_data = feedparser.parse(feed.rss_url) try: feed.last_modified = struct_to_datetime(feed_data.feed.updated_parsed) except: feed.last_modified = parse_date( feed_data.headers.get("last-modified", datetime.now().strftime("%a, %d %b %Y %H:%M:%S +0000")) ) feed.save() items = set() for x_item in feed_data.entries: guid, last_modified = x_item.title, datetime(*x_item.date_parsed[:7]) for i in items: if i.guid == guid: item = i break else: try: item = Item.objects.get(guid=guid, feed=feed) except Item.DoesNotExist: item = Item(guid=guid, last_modified=datetime(1900, 1, 1), feed=feed) if True or item.last_modified < last_modified: item.title = x_item.title item.description = sanitise_html(x_item.get("description", "")) item.link = x_item.link item.last_modified = last_modified item.save() items.add(item) for item in Item.objects.filter(feed=feed): if item not in items: item.delete() return items
def import_feed(self, feed): from apps.feeds.models import Item import xml.etree.ElementTree as ET """ 获取美团网xml """ feed_tmp = urllib2.urlopen(feed.rss_url, timeout=30).read() """ 获取根 """ feed_xml = ET.fromstring(feed_tmp) feed_data = feed_xml.getchildren()[0].findall("deal") """ 获取xml发布日期 """ try: feed.last_modified = struct_to_datetime(feed_xml.attrib["date"]) except: feed.last_modified = parse_date(feed_xml.attrib["date"]) feed.save() items = set() """ 提取xml中的内容 """ for x_item in feed_data: guid, last_modified = x_item.find("title").text, parse_date(feed_xml.attrib["date"]) for i in items: if i.guid == guid: item = i break else: try: item = Item.objects.get(guid=guid, feed=feed) except Item.DoesNotExist: item = Item(guid=guid, last_modified=datetime(1900, 1, 1), feed=feed) if True or item.last_modified < last_modified: item.title = x_item.find("title").text item.description = sanitise_html(self.result_content(x_item)) item.link = x_item.find("deal_url").text item.small_image = x_item.find("medium_image_url").text item.big_image = x_item.find("large_image_url").text item.last_modified = last_modified item.save() items.add(item) for item in Item.objects.filter(feed=feed): if item not in items: item.delete() return items