def update_posts_for_feed_task(partner): """ Load and parse the RSS or ATOM feed associated with the given feed url, and for each entry, parse out the individual entries and save each one as a partner_feeds. """ from feedparser import parse from partner_feeds.models import Post import timelib, re, time feed = parse(partner.feed_url) for entry in feed.entries: p = Post() try: p.partner_id = partner.id p.title = entry.title p.subheader = entry.summary try: p.author = entry.author except AttributeError: pass try: p.guid = entry.id except AttributeError: p.guid = entry.link p.url = entry.link # try to get the date of the entry, otherwise, try the date of the feed try: entry_date = re.sub('\|', '', entry.date) entry_date = timelib.strtotime( entry_date) # convert to a timestamp entry_date = time.localtime( entry_date ) # converts to a time.struct_time (with regards to local timezone) entry_date = time.strftime( "%Y-%m-%d %H:%M:%S", entry_date) # converts to mysql date format p.date = entry_date except AttributeError: p.date = time.strftime("%Y-%m-%d %H:%M:%S", feed.date) p.save() except AttributeError: # needs logging pass
def update_posts_for_feed_task(partner): """ Load and parse the RSS or ATOM feed associated with the given feed url, and for each entry, parse out the individual entries and save each one as a partner_feeds. """ from feedparser import parse from partner_feeds.models import Post import timelib, re, time feed = parse(partner.feed_url) for entry in feed.entries: p = Post() try: p.partner_id = partner.id p.title = entry.title p.subheader = entry.summary try: p.author = entry.author except AttributeError: pass try: p.guid = entry.id except AttributeError: p.guid = entry.link p.url = entry.link # try to get the date of the entry, otherwise, try the date of the feed try: entry_date = re.sub('\|','', entry.date) entry_date = timelib.strtotime(entry_date) # convert to a timestamp entry_date = time.localtime(entry_date) # converts to a time.struct_time (with regards to local timezone) entry_date = time.strftime("%Y-%m-%d %H:%M:%S", entry_date) # converts to mysql date format p.date = entry_date except AttributeError: p.date = time.strftime("%Y-%m-%d %H:%M:%S", feed.date) p.save() except AttributeError: # needs logging pass
def update_posts_for_feed_task(partner): """ Load and parse the RSS or ATOM feed associated with the given feed url, and for each entry, parse out the individual entries and save each one as a partner_feeds. """ logger.debug("Updating posts for partner feed: {} - {}.".format(partner, partner.pk)) current_datetime = datetime.now() number_of_new_posts = 0 feed = parse(partner.feed_url) for entry in feed.entries: p = Post() exception_data = {'entry': entry} try: p.partner_id = partner.id p.title = entry.title if not p.title or len(p.title) == 0: continue if hasattr(entry, 'summary'): p.subheader = entry.summary else: p.subheader = '' try: p.author = entry.author except AttributeError: pass try: p.guid = entry.id except AttributeError: p.guid = entry.link # try and select feed post to see if entry exists first try: Post.objects.get(guid=p.guid, partner_id=partner.id) logger.debug("Prexisting partner_feed.Post with partner id: {}, guid: {}.".format(partner.id, p.guid)) # print p.guid # print partner.id # TODO check to see if the story has been updated except ObjectDoesNotExist: logger.debug("partner_feed.Post does not exist with partner id: {}, guid: {}".format(partner.id, p.guid)) # skip if URL is too long for database field max_length = 500 if len(entry.link) > max_length: logger.debug("Entry link is longer than {}. Skipping entry link {}.".format(max_length, entry.link)) continue p.url = entry.link # try to get the date of the entry, otherwise, use the current date if getattr(entry, 'published_parsed', None): p.date = strftime("%Y-%m-%d %H:%M:%S", utc_time_struct_to_local_time_struct(entry.published_parsed)) elif getattr(entry, 'updated_parsed', None): p.date = strftime("%Y-%m-%d %H:%M:%S", utc_time_struct_to_local_time_struct(entry.updated_parsed)) else: p.date = current_datetime logger.debug("Saving partner_feed.Post with partner id: {}, guid: {}".format(partner.id, p.guid)) p.save() logger.debug("Finished saving partner_feed.Post with partner id: {}, guid: {}".format(partner.id, p.guid)) number_of_new_posts = number_of_new_posts + 1 except Exception: client = Client(dsn=settings.RAVEN_CONFIG['dsn']) client.captureException(exc_info=sys.exc_info(), data=exception_data) # return number of added posts return number_of_new_posts
def update_posts_for_feed_task(partner): """ Load and parse the RSS or ATOM feed associated with the given feed url, and for each entry, parse out the individual entries and save each one as a partner_feeds. """ logger.debug(u"Updating posts for partner feed: {} - {}.".format( partner, partner.pk)) current_datetime = datetime.now() number_of_new_posts = 0 feed = parse(partner.feed_url) for entry in feed.entries: p = Post() exception_data = {'entry': entry} try: p.partner_id = partner.id p.title = entry.title if not p.title or len(p.title) == 0: continue if hasattr(entry, 'summary'): p.subheader = entry.summary else: p.subheader = '' try: p.author = entry.author except AttributeError: pass try: p.guid = entry.id except AttributeError: p.guid = entry.link # try and select feed post to see if entry exists first try: Post.objects.get(guid=p.guid, partner_id=partner.id) logger.debug( u"Prexisting partner_feed.Post with partner id: {}, guid: {}." .format(partner.id, p.guid)) # TODO check to see if the story has been updated except ObjectDoesNotExist: logger.debug( u"partner_feed.Post does not exist with partner id: {}, guid: {}" .format(partner.id, p.guid)) # skip if URL is too long for database field max_length = 500 if len(entry.link) > max_length: logger.debug( u"Entry link is longer than {}. Skipping entry link {}." .format(max_length, entry.link)) continue p.url = entry.link # try to get the date of the entry, otherwise, use the current date if getattr(entry, 'published_parsed', None): p.date = strftime( "%Y-%m-%d %H:%M:%S", utc_time_struct_to_local_time_struct( entry.published_parsed)) elif getattr(entry, 'updated_parsed', None): p.date = strftime( "%Y-%m-%d %H:%M:%S", utc_time_struct_to_local_time_struct( entry.updated_parsed)) else: p.date = current_datetime logger.debug( u"Saving partner_feed.Post with partner id: {}, guid: {}". format(partner.id, p.guid)) p.save() logger.debug( u"Finished saving partner_feed.Post with partner id: {}, guid: {}" .format(partner.id, p.guid)) number_of_new_posts = number_of_new_posts + 1 except Exception: raven_client.captureException(exc_info=sys.exc_info(), data=exception_data) # return number of added posts return number_of_new_posts