Example #1
0
    def process_data(self, token, trigger_id, date_triggered):
        # call the model
        from th_rss.models import Rss
        # call the cache
        from django.core.cache import get_cache

        # get the URL from the trigger id
        rss = Rss.objects.get(trigger_id=trigger_id)
        self.name = rss.name

        logger.debug("RSS Feeds from %s : url %s", self.name, rss.url)

        # get the cache settings if any
        parms = self._cache_settings()
        # check if the cache is set otherwise no cache support at all
        if 'rss' in parms:
            # cache rss backend + parms
            cache = get_cache('rss', **parms)
            # datas from the cache
            self.data = cache.get(self.name)
        # data not in cache or expiried
        if self.data is None or len(self.data) == 0:
            # retreive the data
            feeds = Feeds(**{'url_to_parse': rss.url}).datas()
            # put in cache
            if 'rss' in parms:
                cache.set(self.name, feeds, parms['rss']['TIMEOUT'])
                # get the cache
                self.data = cache.get(self.name)
            else:
                self.data = feeds
        # return the datas
        return self.data
Example #2
0
    def read_data(self, token, trigger_id, date_triggered):
        """
            get the data from the service

            :param trigger_id: trigger ID to process
            :param date_triggered: the date of the last trigger
            :type trigger_id: int
            :type date_triggered: datetime
            :return: list of data found from the date_triggered filter
            :rtype: list
        """
        # get the URL from the trigger id
        rss = super(ServiceRss, self).read_data('Rss', trigger_id)

        logger.debug("RSS Feeds from %s : url %s", rss.name, rss.url)

        now = arrow.utcnow().to(settings.TIME_ZONE)
        published = ''
        my_feeds = []

        # retrieve the data
        feeds = Feeds(**{'url_to_parse': rss.url}).datas()

        for entry in feeds.entries:

            if hasattr(entry, 'published_parsed'):
                published = datetime.datetime.utcfromtimestamp(
                    time.mktime(entry.published_parsed))
            elif hasattr(entry, 'created_parsed'):
                published = datetime.datetime.utcfromtimestamp(
                    time.mktime(entry.created_parsed))
            elif hasattr(entry, 'updated_parsed'):
                published = datetime.datetime.utcfromtimestamp(
                    time.mktime(entry.updated_parsed))

            if published == '':
                published = now
            else:
                published = arrow.get(str(published)).to(settings.TIME_ZONE)

            date_triggered = arrow.get(str(date_triggered)).to(
                settings.TIME_ZONE)

            if date_triggered is not None and\
               published is not None and\
               now >= published and\
               published >= date_triggered:
                my_feeds.append(entry)

        cache.set('th_rss_' + str(trigger_id), my_feeds)
        cache.set('th_rss_uuid_{}'.format(rss.uuid), my_feeds)
        # return the data
        return my_feeds
Example #3
0
    def read_data(self, **kwargs):
        """
            get the data from the service

            :param kwargs: contain keyword args : trigger_id and model name
            :type kwargs: dict
            :rtype: dict
        """
        date_triggered = kwargs.get('date_triggered')
        trigger_id = kwargs.get('trigger_id')
        kwargs['model_name'] = 'Rss'

        # get the URL from the trigger id
        rss = super(ServiceRss, self).read_data(**kwargs)

        logger.debug("RSS Feeds from %s : url %s", rss.name, rss.url)

        now = arrow.utcnow().to(settings.TIME_ZONE)
        published = ''
        my_feeds = []

        # retrieve the data
        feeds = Feeds(**{'url_to_parse': rss.url}).datas()

        for entry in feeds.entries:

            if hasattr(entry, 'published_parsed'):
                published = datetime.datetime.utcfromtimestamp(
                    time.mktime(entry.published_parsed))
            elif hasattr(entry, 'created_parsed'):
                published = datetime.datetime.utcfromtimestamp(
                    time.mktime(entry.created_parsed))
            elif hasattr(entry, 'updated_parsed'):
                published = datetime.datetime.utcfromtimestamp(
                    time.mktime(entry.updated_parsed))

            if published == '':
                published = now
            else:
                published = arrow.get(str(published)).to(settings.TIME_ZONE)

            date_triggered = arrow.get(
                str(date_triggered)).to(settings.TIME_ZONE)

            if date_triggered is not None and\
               published is not None and\
               now >= published >= date_triggered:
                my_feeds.append(entry)

        cache.set('th_rss_' + str(trigger_id), my_feeds)
        cache.set('th_rss_uuid_{}'.format(rss.uuid), my_feeds)
        # return the data
        return my_feeds
Example #4
0
    def read_data(self, **kwargs):
        """
            get the data from the service

            :param kwargs: contain keyword args : trigger_id and model name
            :type kwargs: dict
            :rtype: dict
        """
        date_triggered = kwargs.get('date_triggered')
        trigger_id = kwargs.get('trigger_id')
        kwargs['model_name'] = 'Rss'
        kwargs['app_label'] = 'django_th'
        # get the URL from the trigger id
        rss = super(ServiceRss, self).read_data(**kwargs)

        logger.debug("RSS Feeds from %s : url %s", rss.name, rss.url)

        now = arrow.utcnow().to(settings.TIME_ZONE)
        my_feeds = []

        # retrieve the data
        feeds = Feeds(**{'url_to_parse': rss.url}).datas()

        for entry in feeds.entries:
            # entry.*_parsed may be None when the date in a RSS Feed is invalid
            # so will have the "now" date as default
            published = self._get_published(entry)

            if published == '':
                published = now
            else:
                published = arrow.get(str(published)).to(settings.TIME_ZONE)

            date_triggered = arrow.get(str(date_triggered)).to(
                settings.TIME_ZONE)

            if date_triggered is not None and\
               published is not None and\
               now >= published >= date_triggered:
                my_feeds.append(entry)

                # digester
                self.send_digest_event(trigger_id, entry.title, entry.link)

        cache.set('th_rss_' + str(trigger_id), my_feeds)
        cache.set('th_rss_uuid_{}'.format(rss.uuid), my_feeds)
        # return the data
        return my_feeds