コード例 #1
0
ファイル: rss.py プロジェクト: Flowdeeps/superdesk-1
    def _create_item(self, data, field_aliases=None):
        """Create a new content item from RSS feed data.

        :param dict data: parsed data of a single feed entry
        :param field_aliases: (optional) field name aliases. Used for content
             fields that are named differently in retrieved data.
        :type field_aliases: dict or None

        :return: created content item
        :rtype: dict
        """
        if field_aliases is None:
            field_aliases = {}
        else:
            field_aliases = merge_dicts(field_aliases)

        item = dict(type="text")

        for field in self.item_fields:
            data_field_name = field_aliases.get(field.name_in_data, field.name_in_data)
            field_value = data.get(data_field_name)

            if (field.type is datetime) and field_value:
                field_value = utcfromtimestamp(timegm(field_value))

            item[field.name] = field_value

        return item
コード例 #2
0
    def _create_item(self, data, field_aliases=None):
        """Create a new content item from RSS feed data.

        :param dict data: parsed data of a single feed entry
        :param field_aliases: (optional) field name aliases. Used for content
             fields that are named differently in retrieved data.
        :type field_aliases: dict or None

        :return: created content item
        :rtype: dict
        """
        if field_aliases is None:
            field_aliases = {}
        else:
            field_aliases = merge_dicts(field_aliases)

        item = dict(type='text')

        for field in self.item_fields:
            data_field_name = field_aliases.get(
                field.name_in_data, field.name_in_data
            )
            field_value = data.get(data_field_name)

            if (field.type is datetime) and field_value:
                field_value = utcfromtimestamp(timegm(field_value))

            item[field.name] = field_value

        return item
コード例 #3
0
ファイル: rss.py プロジェクト: ahilles107/superdesk-core
    def _create_item(self, data, field_aliases=None):
        """Create a new content item from RSS feed data.

        :param dict data: parsed data of a single feed entry
        :param field_aliases: (optional) field name aliases. Used for content
             fields that are named differently in retrieved data.
        :type field_aliases: list of {field_name: alias} dictionaries or None

        :return: created content item
        :rtype: dict
        """
        if field_aliases is None:
            field_aliases = {}
        else:
            field_aliases = merge_dicts(field_aliases)
        aliased_fields = set(field_aliases.values())

        item = dict(type=CONTENT_TYPE.TEXT)

        # Only consider fields that are not used as an alias (i.e. used to
        # populate another field) - unless those fields have their own
        # aliases, too.
        # The idea is that if e.g. the main text field is aliased to use the
        # parsed data's summary field, that summary should not be used to
        # populate the field it was originally meant for.
        fields_to_consider = (
            f for f in self.item_fields
            if (f.name_in_data not in aliased_fields) or
               (f.name_in_data in aliased_fields and
                f.name_in_data in field_aliases)
        )

        for field in fields_to_consider:
            data_field_name = field_aliases.get(
                field.name_in_data, field.name_in_data
            )
            field_value = data.get(data_field_name)

            if (field.type is datetime) and field_value:
                field_value = utcfromtimestamp(timegm(field_value))

            item[field.name] = field_value

            # Some feeds use <content:encoded> tag for storing the main content,
            # and that tag is parsed differently. If the body_html has not been
            # found in its default data field and is not aliased, try to
            # populate it using the aforementioned content field as a fallback.
            if (
                field.name == 'body_html' and
                not field_value and
                field.name_in_data not in field_aliases
            ):
                try:
                    item['body_html'] = data.content[0].value
                except:
                    pass  # content either non-existent or parsed differently

        return item
コード例 #4
0
ファイル: rss.py プロジェクト: plamut/superdesk-core
    def _create_item(self, data, field_aliases=None):
        """Create a new content item from RSS feed data.

        :param dict data: parsed data of a single feed entry
        :param field_aliases: (optional) field name aliases. Used for content
             fields that are named differently in retrieved data.
        :type field_aliases: list of {field_name: alias} dictionaries or None

        :return: created content item
        :rtype: dict
        """
        if field_aliases is None:
            field_aliases = {}
        else:
            field_aliases = merge_dicts(field_aliases)
        aliased_fields = set(field_aliases.values())

        item = dict(type=CONTENT_TYPE.TEXT)

        # Only consider fields that are not used as an alias (i.e. used to
        # populate another field) - unless those fields have their own
        # aliases, too.
        # The idea is that if e.g. the main text field is aliased to use the
        # parsed data's summary field, that summary should not be used to
        # populate the field it was originally meant for.
        fields_to_consider = (
            f for f in self.item_fields
            if (f.name_in_data not in aliased_fields) or
               (f.name_in_data in aliased_fields and
                f.name_in_data in field_aliases)
        )

        for field in fields_to_consider:
            data_field_name = field_aliases.get(
                field.name_in_data, field.name_in_data
            )
            field_value = data.get(data_field_name)

            if (field.type is datetime) and field_value:
                field_value = utcfromtimestamp(timegm(field_value))

            item[field.name] = field_value

            # Some feeds use <content:encoded> tag for storing the main content,
            # and that tag is parsed differently. If the body_html has not been
            # found in its default data field and is not aliased, try to
            # populate it using the aforementioned content field as a fallback.
            if (
                field.name == 'body_html' and
                not field_value and
                field.name_in_data not in field_aliases
            ):
                try:
                    item['body_html'] = data.content[0].value
                except:
                    pass  # content either non-existent or parsed differently

        return item
コード例 #5
0
    def _create_item(self, data, field_aliases=None, source='source'):
        """Create a new content item from RSS feed data.

        :param dict data: parsed data of a single feed entry
        :param field_aliases: (optional) field name aliases. Used for content
             fields that are named differently in retrieved data.
        :type field_aliases: list of {field_name: alias} dictionaries or None
        :param str source: the source of provider

        :return: created content item
        :rtype: dict
        """
        if field_aliases is None:
            field_aliases = {}
        else:
            field_aliases = merge_dicts(field_aliases)
        aliased_fields = set(field_aliases.values())

        item = dict(type=CONTENT_TYPE.TEXT)

        # Only consider fields that are not used as an alias (i.e. used to
        # populate another field) - unless those fields have their own
        # aliases, too.
        # The idea is that if e.g. the main text field is aliased to use the
        # parsed data's summary field, that summary should not be used to
        # populate the field it was originally meant for.
        fields_to_consider = (f for f in self.item_fields
                              if (f.name_in_data not in aliased_fields) or (
                                  f.name_in_data in aliased_fields
                                  and f.name_in_data in field_aliases))

        utc_now = datetime.utcnow()
        for field in fields_to_consider:
            data_field_name = field_aliases.get(field.name_in_data,
                                                field.name_in_data)
            field_value = data.get(data_field_name)

            if (field.type is datetime) and field_value:
                field_value = utcfromtimestamp(timegm(field_value))
                field_value = utc_now if field_value > utc_now else field_value

            item[field.name] = field_value

            # Some feeds use <content:encoded> tag for storing the main content,
            # and that tag is parsed differently. If the body_html has not been
            # found in its default data field and is not aliased, try to
            # populate it using the aforementioned content field as a fallback.
            if (field.name == 'body_html' and not field_value
                    and field.name_in_data not in field_aliases):
                try:
                    item['body_html'] = data.content[0].value
                except Exception:
                    pass  # content either non-existent or parsed differently

        if not data.get('guidislink') and data.get('link'):
            item['uri'] = data['link']
            scheme, netloc, path, query, fragment = urlsplit(item['uri'])
            if data.get('guid'):
                item['guid'] = generate_tag(domain=netloc, id=data.get('guid'))
            else:
                item['guid'] = generate_tag_from_url(data['link'])

        if item.get('uri', None):
            if not item.get('body_html', None):
                item['body_html'] = ''
            item[
                'body_html'] = '<p><a href="%s" target="_blank">%s</a></p>' % (
                    item['uri'], source) + item['body_html']

        item['dateline'] = {
            'source': source,
            'date': item.get('firstcreated', item.get('versioncreated'))
        }

        return item
コード例 #6
0
ファイル: rss.py プロジェクト: jerome-poisson/superdesk-core
    def _create_item(self, data, field_aliases=None, source='source'):
        """Create a new content item from RSS feed data.

        :param dict data: parsed data of a single feed entry
        :param field_aliases: (optional) field name aliases. Used for content
             fields that are named differently in retrieved data.
        :type field_aliases: list of {field_name: alias} dictionaries or None
        :param str source: the source of provider

        :return: created content item
        :rtype: dict
        """
        if field_aliases is None:
            field_aliases = {}
        else:
            field_aliases = merge_dicts(field_aliases)
        aliased_fields = set(field_aliases.values())

        item = dict(type=CONTENT_TYPE.TEXT)

        # Only consider fields that are not used as an alias (i.e. used to
        # populate another field) - unless those fields have their own
        # aliases, too.
        # The idea is that if e.g. the main text field is aliased to use the
        # parsed data's summary field, that summary should not be used to
        # populate the field it was originally meant for.
        fields_to_consider = (
            f for f in self.item_fields
            if (f.name_in_data not in aliased_fields) or
               (f.name_in_data in aliased_fields and
                f.name_in_data in field_aliases)
        )

        utc_now = datetime.utcnow()
        for field in fields_to_consider:
            data_field_name = field_aliases.get(
                field.name_in_data, field.name_in_data
            )
            field_value = data.get(data_field_name)

            if (field.type is datetime) and field_value:
                field_value = utcfromtimestamp(timegm(field_value))
                field_value = utc_now if field_value > utc_now else field_value

            item[field.name] = field_value

            # Some feeds use <content:encoded> tag for storing the main content,
            # and that tag is parsed differently. If the body_html has not been
            # found in its default data field and is not aliased, try to
            # populate it using the aforementioned content field as a fallback.
            if (
                field.name == 'body_html' and
                not field_value and
                field.name_in_data not in field_aliases
            ):
                try:
                    item['body_html'] = data.content[0].value
                except Exception:
                    pass  # content either non-existent or parsed differently

        if not data.get('guidislink') and data.get('link'):
            item['uri'] = data['link']
            scheme, netloc, path, query, fragment = urlsplit(item['uri'])
            if data.get('guid'):
                item['guid'] = generate_tag(domain=netloc, id=data.get('guid'))
            else:
                item['guid'] = generate_tag_from_url(data['link'])

        if item.get('uri', None):
            if not item.get('body_html', None):
                item['body_html'] = ''
            item['body_html'] = '<p><a href="%s" target="_blank">%s</a></p>' % (item['uri'], source) + item['body_html']

        item['dateline'] = {
            'source': source,
            'date': item.get('firstcreated', item.get('versioncreated'))
        }

        return item