コード例 #1
0
def update_posts_for_feed_task(partner):
    """
	Load and parse the RSS or ATOM feed associated with the given feed url, and for each entry, parse out the individual
	entries and save each one as a partner_feeds.
	"""
    from feedparser import parse
    from partner_feeds.models import Post
    import timelib, re, time

    feed = parse(partner.feed_url)

    for entry in feed.entries:
        p = Post()
        try:

            p.partner_id = partner.id
            p.title = entry.title

            p.subheader = entry.summary

            try:
                p.author = entry.author
            except AttributeError:
                pass

            try:
                p.guid = entry.id
            except AttributeError:
                p.guid = entry.link

            p.url = entry.link

            # try to get the date of the entry, otherwise, try the date of the feed
            try:
                entry_date = re.sub('\|', '', entry.date)
                entry_date = timelib.strtotime(
                    entry_date)  # convert to a timestamp
                entry_date = time.localtime(
                    entry_date
                )  # converts to a time.struct_time (with regards to local timezone)
                entry_date = time.strftime(
                    "%Y-%m-%d %H:%M:%S",
                    entry_date)  # converts to mysql date format
                p.date = entry_date
            except AttributeError:
                p.date = time.strftime("%Y-%m-%d %H:%M:%S", feed.date)

            p.save()
        except AttributeError:
            # needs logging
            pass
コード例 #2
0
ファイル: tasks.py プロジェクト: Govexec/django-partner-feeds
def update_posts_for_feed_task(partner):
	"""
	Load and parse the RSS or ATOM feed associated with the given feed url, and for each entry, parse out the individual
	entries and save each one as a partner_feeds.
	"""
	from feedparser import parse
	from partner_feeds.models import Post
	import timelib, re, time

	feed = parse(partner.feed_url)

	for entry in feed.entries:
		p = Post()
		try:
			
			p.partner_id = partner.id
			p.title = entry.title

			p.subheader = entry.summary
			
			try:
				p.author = entry.author
			except AttributeError:
				pass

			try:
				p.guid = entry.id
			except AttributeError:
				p.guid = entry.link

			p.url = entry.link

			# try to get the date of the entry, otherwise, try the date of the feed
			try:
				entry_date = re.sub('\|','', entry.date)
				entry_date = timelib.strtotime(entry_date) # convert to a timestamp
				entry_date = time.localtime(entry_date) # converts to a time.struct_time (with regards to local timezone)
				entry_date = time.strftime("%Y-%m-%d %H:%M:%S", entry_date) # converts to mysql date format
				p.date = entry_date
			except AttributeError:
				p.date =  time.strftime("%Y-%m-%d %H:%M:%S", feed.date)

			p.save()
		except AttributeError:
			# needs logging
			pass
コード例 #3
0
def update_posts_for_feed_task(partner):
    """
    Load and parse the RSS or ATOM feed associated with the given feed url, and for each entry, parse out the individual
    entries and save each one as a partner_feeds.
    """
    logger.debug("Updating posts for partner feed: {} - {}.".format(partner, partner.pk))

    current_datetime = datetime.now()
    number_of_new_posts = 0
    feed = parse(partner.feed_url)

    for entry in feed.entries:
        p = Post()
        exception_data = {'entry': entry}
        try:

            p.partner_id = partner.id
            p.title = entry.title

            if not p.title or len(p.title) == 0:
                continue

            if hasattr(entry, 'summary'):
                p.subheader = entry.summary
            else:
                p.subheader = ''

            try:
                p.author = entry.author
            except AttributeError:
                pass

            try:
                p.guid = entry.id
            except AttributeError:
                p.guid = entry.link

            # try and select feed post to see if entry exists first
            try:
                Post.objects.get(guid=p.guid, partner_id=partner.id)
                logger.debug("Prexisting partner_feed.Post with partner id: {}, guid: {}.".format(partner.id, p.guid))
                # print p.guid
                # print partner.id
                # TODO check to see if the story has been updated
            except ObjectDoesNotExist:
                logger.debug("partner_feed.Post does not exist with partner id: {}, guid: {}".format(partner.id, p.guid))
                # skip if URL is too long for database field
                max_length = 500
                if len(entry.link) > max_length:
                    logger.debug("Entry link is longer than {}. Skipping entry link {}.".format(max_length, entry.link))
                    continue

                p.url = entry.link

                # try to get the date of the entry, otherwise, use the current date
                if getattr(entry, 'published_parsed', None):
                    p.date = strftime("%Y-%m-%d %H:%M:%S", utc_time_struct_to_local_time_struct(entry.published_parsed))
                elif getattr(entry, 'updated_parsed', None):
                    p.date = strftime("%Y-%m-%d %H:%M:%S", utc_time_struct_to_local_time_struct(entry.updated_parsed))
                else:
                    p.date = current_datetime

                logger.debug("Saving partner_feed.Post with partner id: {}, guid: {}".format(partner.id, p.guid))
                p.save()
                logger.debug("Finished saving partner_feed.Post with partner id: {}, guid: {}".format(partner.id, p.guid))

                number_of_new_posts = number_of_new_posts + 1

        except Exception:
            client = Client(dsn=settings.RAVEN_CONFIG['dsn'])
            client.captureException(exc_info=sys.exc_info(), data=exception_data)

    # return number of added posts
    return number_of_new_posts
コード例 #4
0
def update_posts_for_feed_task(partner):
    """
    Load and parse the RSS or ATOM feed associated with the given feed url, and for each entry, parse out the individual
    entries and save each one as a partner_feeds.
    """
    logger.debug(u"Updating posts for partner feed: {} - {}.".format(
        partner, partner.pk))

    current_datetime = datetime.now()
    number_of_new_posts = 0
    feed = parse(partner.feed_url)

    for entry in feed.entries:
        p = Post()
        exception_data = {'entry': entry}
        try:

            p.partner_id = partner.id
            p.title = entry.title

            if not p.title or len(p.title) == 0:
                continue

            if hasattr(entry, 'summary'):
                p.subheader = entry.summary
            else:
                p.subheader = ''

            try:
                p.author = entry.author
            except AttributeError:
                pass

            try:
                p.guid = entry.id
            except AttributeError:
                p.guid = entry.link

            # try and select feed post to see if entry exists first
            try:
                Post.objects.get(guid=p.guid, partner_id=partner.id)
                logger.debug(
                    u"Prexisting partner_feed.Post with partner id: {}, guid: {}."
                    .format(partner.id, p.guid))
                # TODO check to see if the story has been updated
            except ObjectDoesNotExist:
                logger.debug(
                    u"partner_feed.Post does not exist with partner id: {}, guid: {}"
                    .format(partner.id, p.guid))
                # skip if URL is too long for database field
                max_length = 500
                if len(entry.link) > max_length:
                    logger.debug(
                        u"Entry link is longer than {}. Skipping entry link {}."
                        .format(max_length, entry.link))
                    continue

                p.url = entry.link

                # try to get the date of the entry, otherwise, use the current date
                if getattr(entry, 'published_parsed', None):
                    p.date = strftime(
                        "%Y-%m-%d %H:%M:%S",
                        utc_time_struct_to_local_time_struct(
                            entry.published_parsed))
                elif getattr(entry, 'updated_parsed', None):
                    p.date = strftime(
                        "%Y-%m-%d %H:%M:%S",
                        utc_time_struct_to_local_time_struct(
                            entry.updated_parsed))
                else:
                    p.date = current_datetime

                logger.debug(
                    u"Saving partner_feed.Post with partner id: {}, guid: {}".
                    format(partner.id, p.guid))
                p.save()
                logger.debug(
                    u"Finished saving partner_feed.Post with partner id: {}, guid: {}"
                    .format(partner.id, p.guid))

                number_of_new_posts = number_of_new_posts + 1

        except Exception:
            raven_client.captureException(exc_info=sys.exc_info(),
                                          data=exception_data)

    # return number of added posts
    return number_of_new_posts