예제 #1
0
    def _transform_to_ninjs(self, article, subscriber, recursive=True):
        ninjs = {
            "guid": article.get(GUID_FIELD, article.get("uri")),
            "version": str(article.get(config.VERSION, 1)),
            "type": self._get_type(article),
        }

        if article.get("byline"):
            ninjs["byline"] = article["byline"]

        located = article.get("dateline", {}).get("located", {})
        if located:
            ninjs["located"] = located.get("city", "")

        for copy_property in self.direct_copy_properties:
            if article.get(copy_property) is not None:
                ninjs[copy_property] = article[copy_property]

        if "body_text" not in article and "alt_text" in article:
            ninjs["body_text"] = article["alt_text"]

        if "title" in article:
            ninjs["headline"] = article["title"]

        if article.get("body_html"):
            ninjs["body_html"] = self.append_body_footer(article)

        if article.get("description"):
            ninjs["description_html"] = self.append_body_footer(article)

        if article.get("place"):
            ninjs["place"] = self._format_place(article)

        if article.get("profile"):
            ninjs["profile"] = self._format_profile(article["profile"])

        extra_items = None
        if recursive:
            if article[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                ninjs[ASSOCIATIONS] = self._get_associations(
                    article, subscriber)
                if article.get(ASSOCIATIONS):
                    associations, extra_items = self._format_related(
                        article, subscriber)
                    ninjs[ASSOCIATIONS].update(associations)
            elif article.get(ASSOCIATIONS):
                ninjs[ASSOCIATIONS], extra_items = self._format_related(
                    article, subscriber)
        elif article.get(ASSOCIATIONS) and recursive:
            ninjs[ASSOCIATIONS], extra_items = self._format_related(
                article, subscriber)
        if extra_items:
            ninjs.setdefault(EXTRA_ITEMS, {}).update(extra_items)

        if article.get("embargoed"):
            ninjs["embargoed"] = article["embargoed"].isoformat()

        if article.get(
                EMBARGO):  # embargo set in superdesk overrides ingested one
            ninjs["embargoed"] = get_utc_schedule(article, EMBARGO).isoformat()

        if article.get("priority"):
            ninjs["priority"] = article["priority"]
        else:
            ninjs["priority"] = 5

        if article.get("subject"):
            ninjs["subject"] = self._get_subject(article)

        if article.get("anpa_category"):
            ninjs["service"] = self._get_service(article)
        if article.get("renditions"):
            ninjs["renditions"] = self._get_renditions(article)
        elif "url" in article:
            ninjs["renditions"] = self._generate_renditions(article)

        if "order" in article:
            ninjs["order"] = article["order"]

        # SDPA-317
        if "abstract" in article:
            abstract = article.get("abstract", "")
            ninjs["description_html"] = abstract
            ninjs["description_text"] = text_utils.get_text(abstract)
        elif article.get("description_text"):
            ninjs["description_text"] = article.get("description_text")

        if article.get("company_codes"):
            ninjs["organisation"] = [{
                "name":
                c.get("name", ""),
                "rel":
                "Securities Identifier",
                "symbols": [{
                    "ticker": c.get("qcode", ""),
                    "exchange": c.get("security_exchange", "")
                }],
            } for c in article["company_codes"]]
        elif "company" in article:
            ninjs["organisation"] = [{"name": article["company"]}]

        if article.get("rewrite_of"):
            ninjs["evolvedfrom"] = article["rewrite_of"]

        if not ninjs.get("copyrightholder") and not ninjs.get(
                "copyrightnotice") and not ninjs.get("usageterms"):
            ninjs.update(
                superdesk.get_resource_service("vocabularies").get_rightsinfo(
                    article))

        if article.get("genre"):
            ninjs["genre"] = self._get_genre(article)

        if article.get("flags", {}).get("marked_for_legal"):
            ninjs["signal"] = self._format_signal_cwarn()

        if article.get("signal"):
            ninjs.setdefault("signal", []).extend(
                [self._format_signal(signal) for signal in article["signal"]])

        if article.get("attachments"):
            ninjs["attachments"] = self._format_attachments(article)

        if ninjs["type"] == CONTENT_TYPE.TEXT and ("body_html" in ninjs
                                                   or "body_text" in ninjs):
            if "body_html" in ninjs:
                body_html = ninjs["body_html"]
                word_count = text_utils.get_word_count(body_html)
                char_count = text_utils.get_char_count(body_html)
                readtime = text_utils.get_reading_time(body_html, word_count,
                                                       article.get("language"))
            else:
                body_text = ninjs["body_text"]
                word_count = text_utils.get_text_word_count(body_text)
                char_count = len(body_text)
                readtime = text_utils.get_reading_time(body_text, word_count,
                                                       article.get("language"))
            ninjs["charcount"] = char_count
            ninjs["wordcount"] = word_count
            ninjs["readtime"] = readtime

        if article.get("authors"):
            ninjs["authors"] = self._format_authors(article)

        if (article.get("schedule_settings")
                or {}).get("utc_publish_schedule"):
            ninjs["publish_schedule"] = article["schedule_settings"][
                "utc_publish_schedule"]

        # set description for custom embed field
        if article.get("extra"):
            ninjs["extra"] = article["extra"]
            for key, value in ninjs["extra"].items():
                if type(value) == dict and "embed" in value:
                    value.setdefault("description", "")

        return ninjs
예제 #2
0
    def _transform_to_ninjs(self, article, subscriber, recursive=True):
        ninjs = {
            'guid': article.get(GUID_FIELD, article.get('uri')),
            'version': str(article.get(config.VERSION, 1)),
            'type': self._get_type(article)
        }

        if article.get('byline'):
            ninjs['byline'] = article['byline']

        located = article.get('dateline', {}).get('located', {})
        if located:
            ninjs['located'] = located.get('city', '')

        for copy_property in self.direct_copy_properties:
            if article.get(copy_property) is not None:
                ninjs[copy_property] = article[copy_property]

        if 'body_text' not in article and 'alt_text' in article:
            ninjs['body_text'] = article['alt_text']

        if 'title' in article:
            ninjs['headline'] = article['title']

        if article.get('body_html'):
            ninjs['body_html'] = self.append_body_footer(article)

        if article.get('description'):
            ninjs['description_html'] = self.append_body_footer(article)

        if article.get('place'):
            ninjs['place'] = self._format_place(article)

        if article.get('profile'):
            ninjs['profile'] = self._format_profile(article['profile'])

        if recursive:
            if article[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                ninjs[ASSOCIATIONS] = self._get_associations(
                    article, subscriber)
                if article.get(ASSOCIATIONS):
                    ninjs[ASSOCIATIONS].update(
                        self._format_related(article, subscriber))
            elif article.get(ASSOCIATIONS):
                ninjs[ASSOCIATIONS] = self._format_related(article, subscriber)
        elif article.get(ASSOCIATIONS):
            ninjs[ASSOCIATIONS] = self._format_related(article, subscriber)

        if article.get(EMBARGO):
            ninjs['embargoed'] = get_utc_schedule(article, EMBARGO).isoformat()

        if article.get('priority'):
            ninjs['priority'] = article['priority']
        else:
            ninjs['priority'] = 5

        if article.get('subject'):
            ninjs['subject'] = self._get_subject(article)

        if article.get('anpa_category'):
            ninjs['service'] = self._get_service(article)
        if article.get('renditions'):
            ninjs['renditions'] = self._get_renditions(article)
        elif 'url' in article:
            ninjs['renditions'] = self._generate_renditions(article)

        # SDPA-317
        if 'abstract' in article:
            abstract = article.get('abstract', '')
            ninjs['description_html'] = abstract
            ninjs['description_text'] = text_utils.get_text(abstract)
        elif article.get('description_text'):
            ninjs['description_text'] = article.get('description_text')

        if article.get('company_codes'):
            ninjs['organisation'] = [{
                'name':
                c.get('name', ''),
                'rel':
                'Securities Identifier',
                'symbols': [{
                    'ticker': c.get('qcode', ''),
                    'exchange': c.get('security_exchange', '')
                }]
            } for c in article['company_codes']]
        elif 'company' in article:
            ninjs['organisation'] = [{'name': article['company']}]

        if article.get('rewrite_of'):
            ninjs['evolvedfrom'] = article['rewrite_of']

        if not ninjs.get('copyrightholder') and not ninjs.get(
                'copyrightnotice') and not ninjs.get('usageterms'):
            ninjs.update(
                superdesk.get_resource_service('vocabularies').get_rightsinfo(
                    article))

        if 'genre' in article:
            ninjs['genre'] = self._get_genre(article)

        if article.get('flags', {}).get('marked_for_legal'):
            ninjs['signal'] = self._format_signal_cwarn()

        if article.get('attachments'):
            ninjs['attachments'] = self._format_attachments(article)

        if ninjs['type'] == CONTENT_TYPE.TEXT and ('body_html' in ninjs
                                                   or 'body_text' in ninjs):
            if 'body_html' in ninjs:
                body_html = ninjs['body_html']
                word_count = text_utils.get_word_count(body_html)
                char_count = text_utils.get_char_count(body_html)
                readtime = text_utils.get_reading_time(body_html, word_count,
                                                       article.get('language'))
            else:
                body_text = ninjs['body_text']
                word_count = text_utils.get_text_word_count(body_text)
                char_count = len(body_text)
                readtime = text_utils.get_reading_time(body_text, word_count,
                                                       article.get('language'))
            ninjs['charcount'] = char_count
            ninjs['wordcount'] = word_count
            ninjs['readtime'] = readtime

        if article.get('authors'):
            ninjs['authors'] = self._format_authors(article)

        return ninjs
    def _transform_to_ninjs(self, article, subscriber, recursive=True):
        ninjs = {
            'guid': article.get(GUID_FIELD, article.get('uri')),
            'version': str(article.get(config.VERSION, 1)),
            'type': self._get_type(article)
        }

        if article.get('byline'):
            ninjs['byline'] = article['byline']

        located = article.get('dateline', {}).get('located', {})
        if located:
            ninjs['located'] = located.get('city', '')

        for copy_property in self.direct_copy_properties:
            if article.get(copy_property) is not None:
                ninjs[copy_property] = article[copy_property]

        if 'body_text' not in article and 'alt_text' in article:
            ninjs['body_text'] = article['alt_text']

        if 'title' in article:
            ninjs['headline'] = article['title']

        if article.get('body_html'):
            ninjs['body_html'] = self.append_body_footer(article)

        if article.get('description'):
            ninjs['description_html'] = self.append_body_footer(article)

        if article.get('place'):
            ninjs['place'] = self._format_place(article)

        if article.get('profile'):
            ninjs['profile'] = self._format_profile(article['profile'])

        extra_items = None
        if recursive:
            if article[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                ninjs[ASSOCIATIONS] = self._get_associations(article, subscriber)
                if article.get(ASSOCIATIONS):
                    associations, extra_items = self._format_related(article, subscriber)
                    ninjs[ASSOCIATIONS].update(associations)
            elif article.get(ASSOCIATIONS):
                ninjs[ASSOCIATIONS], extra_items = self._format_related(article, subscriber)
        elif article.get(ASSOCIATIONS):
            ninjs[ASSOCIATIONS], extra_items = self._format_related(article, subscriber)
        if extra_items:
            ninjs.setdefault(EXTRA_ITEMS, {}).update(extra_items)

        if article.get(EMBARGO):
            ninjs['embargoed'] = get_utc_schedule(article, EMBARGO).isoformat()

        if article.get('priority'):
            ninjs['priority'] = article['priority']
        else:
            ninjs['priority'] = 5

        if article.get('subject'):
            ninjs['subject'] = self._get_subject(article)

        if article.get('anpa_category'):
            ninjs['service'] = self._get_service(article)
        if article.get('renditions'):
            ninjs['renditions'] = self._get_renditions(article)
        elif 'url' in article:
            ninjs['renditions'] = self._generate_renditions(article)

        # SDPA-317
        if 'abstract' in article:
            abstract = article.get('abstract', '')
            ninjs['description_html'] = abstract
            ninjs['description_text'] = text_utils.get_text(abstract)
        elif article.get('description_text'):
            ninjs['description_text'] = article.get('description_text')

        if article.get('company_codes'):
            ninjs['organisation'] = [{'name': c.get('name', ''), 'rel': 'Securities Identifier',
                                      'symbols': [{'ticker': c.get('qcode', ''),
                                                   'exchange': c.get('security_exchange', '')}]}
                                     for c in article['company_codes']]
        elif 'company' in article:
            ninjs['organisation'] = [{'name': article['company']}]

        if article.get('rewrite_of'):
            ninjs['evolvedfrom'] = article['rewrite_of']

        if not ninjs.get('copyrightholder') and not ninjs.get('copyrightnotice') and not ninjs.get('usageterms'):
            ninjs.update(superdesk.get_resource_service('vocabularies').get_rightsinfo(article))

        if 'genre' in article:
            ninjs['genre'] = self._get_genre(article)

        if article.get('flags', {}).get('marked_for_legal'):
            ninjs['signal'] = self._format_signal_cwarn()

        if article.get('attachments'):
            ninjs['attachments'] = self._format_attachments(article)

        if ninjs['type'] == CONTENT_TYPE.TEXT and ('body_html' in ninjs or 'body_text' in ninjs):
            if 'body_html' in ninjs:
                body_html = ninjs['body_html']
                word_count = text_utils.get_word_count(body_html)
                char_count = text_utils.get_char_count(body_html)
                readtime = text_utils.get_reading_time(body_html, word_count, article.get('language'))
            else:
                body_text = ninjs['body_text']
                word_count = text_utils.get_text_word_count(body_text)
                char_count = len(body_text)
                readtime = text_utils.get_reading_time(body_text, word_count, article.get('language'))
            ninjs['charcount'] = char_count
            ninjs['wordcount'] = word_count
            ninjs['readtime'] = readtime

        if article.get('authors'):
            ninjs['authors'] = self._format_authors(article)

        return ninjs