Example #1
0
    def __set_published_item_expiry(self, doc):
        """Set the expiry for the published item.

        :param dict doc: doc on which publishing action is performed
        """
        desk_id = doc.get('task', {}).get('desk', None)
        stage_id = doc.get('task', {}).get('stage', None)
        offset = get_utc_schedule(doc, PUBLISH_SCHEDULE) or get_utc_schedule(doc, EMBARGO)
        doc['expiry'] = get_expiry(desk_id, stage_id, offset=offset)
    def __set_published_item_expiry(self, doc):
        """Set the expiry for the published item.

        :param dict doc: doc on which publishing action is performed
        """
        desk_id = doc.get('task', {}).get('desk', None)
        stage_id = doc.get('task', {}).get('stage', None)
        offset = get_utc_schedule(doc, PUBLISH_SCHEDULE) or get_utc_schedule(doc, EMBARGO)
        doc['expiry'] = get_expiry(desk_id, stage_id, offset=offset)
Example #3
0
    def _set_item_expiry(self, updates, original):
        """
        Set the expiry for the item
        :param dict updates: doc on which publishing action is performed
        """
        desk_id = original.get('task', {}).get('desk')
        stage_id = original.get('task', {}).get('stage')

        if EMBARGO in updates or PUBLISH_SCHEDULE in updates:
            offset = get_utc_schedule(updates, PUBLISH_SCHEDULE) or get_utc_schedule(updates, EMBARGO)
        elif EMBARGO in original or PUBLISH_SCHEDULE in original:
            offset = get_utc_schedule(original, PUBLISH_SCHEDULE) or get_utc_schedule(original, EMBARGO)

        updates['expiry'] = get_expiry(desk_id, stage_id, offset=offset)
    def __format_head(self, article, head):
        title = SubElement(head, 'title')
        title.text = article.get('headline', '')

        tobject = SubElement(head, 'tobject', {'tobject.type': 'news'})
        if 'genre' in article and len(article['genre']) > 0:
            SubElement(tobject, 'tobject.property', {'tobject.property.type': article['genre'][0]['name']})
        self.__format_subjects(article, tobject)

        if article.get(EMBARGO):
            docdata = SubElement(head, 'docdata', {'management-status': 'embargoed'})
            SubElement(docdata, 'date.expire',
                       {'norm': str(get_utc_schedule(article, EMBARGO).isoformat())})
        else:
            docdata = SubElement(head, 'docdata', {'management-status': article.get('pubstatus', '')})
            SubElement(docdata, 'date.expire', {'norm': str(article.get('expiry', ''))})

        SubElement(docdata, 'urgency', {'ed-urg': str(article.get('urgency', ''))})
        SubElement(docdata, 'date.issue', {'norm': str(article.get('firstcreated', ''))})
        SubElement(docdata, 'doc-id', attrib={'id-string': article.get('guid', '')})

        if article.get('ednote'):
            SubElement(docdata, 'ed-msg', {'info': article.get('ednote', '')})

        self.__format_keywords(article, head)
    def _format_news_management(self, formatted_article, news_item):
        """
        Create a NewsManagement element

        :param dict formatted_article:
        :param Element news_item:
        """
        news_management = SubElement(news_item, "NewsManagement")
        SubElement(news_management, 'NewsItemType', {'FormalName': 'News'})
        SubElement(news_management, 'FirstCreated').text = \
            formatted_article['firstcreated'].strftime('%Y%m%dT%H%M%S+0000')
        SubElement(news_management, 'ThisRevisionCreated').text = \
            formatted_article['versioncreated'].strftime('%Y%m%dT%H%M%S+0000')

        if formatted_article.get(EMBARGO):
            SubElement(news_management, 'Status', {'FormalName': 'Embargoed'})
            status_will_change = SubElement(news_management, 'StatusWillChange')
            SubElement(status_will_change, 'FutureStatus', {'FormalName': formatted_article['pubstatus']})
            SubElement(status_will_change, 'DateAndTime').text = \
                get_utc_schedule(formatted_article, EMBARGO).isoformat()
        else:
            SubElement(news_management, 'Status', {'FormalName': formatted_article['pubstatus']})

        if formatted_article.get('urgency'):
            SubElement(news_management, 'Urgency', {'FormalName': str(formatted_article['urgency'])})

        if formatted_article['state'] == 'corrected':
            SubElement(news_management, 'Instruction', {'FormalName': 'Correction'})
        else:
            SubElement(news_management, 'Instruction', {'FormalName': 'Update'})

        SubElement(news_management, 'Property', {'FormalName': 'reuters.3rdPartyStyleGuideVersion', 'Value': '2.1'})
        SubElement(news_management, 'Property', {'FormalName': 'USN', 'Value': 'AAP' + str(
            int(formatted_article.get('unique_id', 1)) % 100000) + 'a'})
 def test_get_utc_schedule(self):
     embargo_date = utcnow() + timedelta(minutes=10)
     content = {
         'embargo': embargo_date
     }
     utc_schedule = get_utc_schedule(content, 'embargo')
     self.assertEqual(utc_schedule, embargo_date)
Example #7
0
    def _format_newsmanagement(self, newsitem):
        """
        Creates the NewsManagement element and add it to `newsitem`
        :param Element newsitem:
        """
        news_management = SubElement(newsitem, 'NewsManagement')
        SubElement(news_management, 'NewsItemType', {'FormalName': 'News'})
        SubElement(
            news_management,
            'FirstCreated').text = self._article.get('firstcreated').strftime(
                self.DATETIME_FORMAT)
        SubElement(news_management, 'ThisRevisionCreated'
                   ).text = self._article['versioncreated'].strftime(
                       self.DATETIME_FORMAT)

        if self._article.get(EMBARGO):
            SubElement(news_management, 'Status', {'FormalName': 'Embargoed'})
            status_will_change = SubElement(news_management,
                                            'StatusWillChange')
            SubElement(
                status_will_change, 'FutureStatus',
                {'FormalName': self._article.get('pubstatus', '').upper()})
            SubElement(status_will_change,
                       'DateAndTime').text = get_utc_schedule(
                           self._article, EMBARGO).isoformat()
        else:
            SubElement(
                news_management, 'Status',
                {'FormalName': self._article.get('pubstatus', '').upper()})
Example #8
0
 def test_get_utc_schedule(self):
     embargo_date = utcnow() + timedelta(minutes=10)
     content = {
         'embargo': embargo_date
     }
     utc_schedule = get_utc_schedule(content, 'embargo')
     self.assertEqual(utc_schedule, embargo_date)
    def format(self, article, subscriber, codes=None):
        """
        Constructs a dictionary that represents the parameters passed to the SMS InsertAlerts stored procedure
        :return: returns the sequence number of the subscriber and the constructed parameter dictionary
        """
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
            sms_message = article.get('sms_message', article.get('abstract', '')).replace('\'', '\'\'')

            # category = 1 is used to indicate a test message
            category = '1' if superdesk.app.config.get('TEST_SMS_OUTPUT', True) is True \
                else article.get('anpa_category', [{}])[0].get('qcode').upper()

            odbc_item = {'Sequence': pub_seq_num, 'Category': category,
                         'Headline': BeautifulSoup(sms_message, 'html.parser').text,
                         'Priority': map_priority(article.get('priority'))}

            body = self.append_body_footer(article)
            if article.get(EMBARGO):
                embargo = '{}{}'.format('Embargo Content. Timestamp: ',
                                        get_utc_schedule(article, EMBARGO).isoformat())
                body = embargo + body

            if article[ITEM_TYPE] == CONTENT_TYPE.TEXT:
                body = BeautifulSoup(body, "html.parser").text

            odbc_item['StoryText'] = body.replace('\'', '\'\'')  # @article_text
            odbc_item['ident'] = '0'

            return [(pub_seq_num, json.dumps(odbc_item))]
        except Exception as ex:
            raise FormatterError.AAPSMSFormatterError(ex, subscriber)
    def _format_news_management(self, article, news_item):
        """
        Create a NewsManagement element
        :param dict article:
        :param Element news_item:
        """
        news_management = SubElement(news_item, "NewsManagement")
        SubElement(news_management, 'NewsItemType', {'FormalName': 'News'})
        SubElement(news_management, 'FirstCreated').text = \
            article['firstcreated'].strftime('%Y%m%dT%H%M%S+0000')
        SubElement(news_management, 'ThisRevisionCreated').text = \
            article['versioncreated'].strftime('%Y%m%dT%H%M%S+0000')

        if article.get(EMBARGO):
            SubElement(news_management, 'Status', {'FormalName': 'Embargoed'})
            status_will_change = SubElement(news_management,
                                            'StatusWillChange')
            SubElement(status_will_change, 'FutureStatus',
                       {'FormalName': article['pubstatus']})
            SubElement(status_will_change, 'DateAndTime').text = \
                get_utc_schedule(article, EMBARGO).isoformat()
        else:
            SubElement(news_management, 'Status',
                       {'FormalName': article['pubstatus']})

        if article.get('urgency'):
            SubElement(news_management, 'Urgency',
                       {'FormalName': str(article['urgency'])})

        if article['state'] == 'corrected':
            SubElement(news_management, 'Instruction',
                       {'FormalName': 'Correction'})
        else:
            SubElement(news_management, 'Instruction',
                       {'FormalName': 'Update'})
 def _format_date_expire(self, article, docdata):
     if article.get(EMBARGO):
         docdata.attrib["management-status"] = "embargoed"
         SubElement(docdata, "date.expire", {"norm": str(get_utc_schedule(article, EMBARGO).isoformat())})
     else:
         docdata.attrib["management-status"] = article.get("pubstatus", "")
         SubElement(docdata, "date.expire", {"norm": str(article.get("expiry", ""))})
    def _format_news_management(self, article, news_item):
        """
        Create a NewsManagement element
        :param dict article:
        :param Element news_item:
        """
        news_management = SubElement(news_item, "NewsManagement")
        SubElement(news_management, 'NewsItemType', {'FormalName': 'News'})
        SubElement(news_management, 'FirstCreated').text = \
            article['firstcreated'].strftime('%Y%m%dT%H%M%S+0000')
        SubElement(news_management, 'ThisRevisionCreated').text = \
            article['versioncreated'].strftime('%Y%m%dT%H%M%S+0000')

        if article.get(EMBARGO):
            SubElement(news_management, 'Status', {'FormalName': 'Embargoed'})
            status_will_change = SubElement(news_management, 'StatusWillChange')
            SubElement(status_will_change, 'FutureStatus', {'FormalName': article['pubstatus']})
            SubElement(status_will_change, 'DateAndTime').text = \
                get_utc_schedule(article, EMBARGO).isoformat()
        else:
            SubElement(news_management, 'Status', {'FormalName': article['pubstatus']})

        if article.get('urgency'):
            SubElement(news_management, 'Urgency', {'FormalName': str(article['urgency'])})

        if article['state'] == 'corrected':
            SubElement(news_management, 'Instruction', {'FormalName': 'Correction'})
        else:
            SubElement(news_management, 'Instruction', {'FormalName': 'Update'})
Example #13
0
    def __format_head(self, article, head):
        title = SubElement(head, 'title')
        title.text = article.get('headline', '')

        tobject = SubElement(head, 'tobject', {'tobject.type': 'news'})
        if 'genre' in article and len(article['genre']) > 0:
            SubElement(tobject, 'tobject.property',
                       {'tobject.property.type': article['genre'][0]['name']})
        self.__format_subjects(article, tobject)

        if article.get(EMBARGO):
            docdata = SubElement(head, 'docdata',
                                 {'management-status': 'embargoed'})
            SubElement(
                docdata, 'date.expire',
                {'norm': str(get_utc_schedule(article, EMBARGO).isoformat())})
        else:
            docdata = SubElement(
                head, 'docdata',
                {'management-status': article.get('pubstatus', '')})
            SubElement(docdata, 'date.expire',
                       {'norm': str(article.get('expiry', ''))})

        SubElement(docdata, 'urgency',
                   {'ed-urg': str(article.get('urgency', ''))})
        SubElement(docdata, 'date.issue',
                   {'norm': str(article.get('firstcreated', ''))})
        SubElement(docdata,
                   'doc-id',
                   attrib={'id-string': article.get('guid', '')})

        if article.get('ednote'):
            SubElement(docdata, 'ed-msg', {'info': article.get('ednote', '')})

        self.__format_keywords(article, head)
Example #14
0
 def _format_date_expire(self, article, docdata):
     if article.get(EMBARGO):
         docdata.attrib['management-status'] = 'embargoed'
         SubElement(docdata, 'date.expire',
                    {'norm': str(get_utc_schedule(article, EMBARGO).isoformat())})
     else:
         docdata.attrib['management-status'] = article.get('pubstatus', '')
         SubElement(docdata, 'date.expire', {'norm': str(article.get('expiry', ''))})
Example #15
0
    def _set_item_expiry(self, updates, original):
        """Set the expiry for the item.

        :param dict updates: doc on which publishing action is performed
        """
        desk_id = original.get("task", {}).get("desk")
        stage_id = original.get("task", {}).get("stage")

        if EMBARGO in updates or PUBLISH_SCHEDULE in updates:
            offset = get_utc_schedule(updates, PUBLISH_SCHEDULE) or get_utc_schedule(updates, EMBARGO)
        elif EMBARGO in original or PUBLISH_SCHEDULE in original:
            offset = get_utc_schedule(original, PUBLISH_SCHEDULE) or get_utc_schedule(original, EMBARGO)

        if app.settings.get("PUBLISHED_CONTENT_EXPIRY_MINUTES"):
            updates["expiry"] = get_expiry_date(app.settings["PUBLISHED_CONTENT_EXPIRY_MINUTES"], offset=offset)
        else:
            updates["expiry"] = get_expiry(desk_id, stage_id, offset=offset)
Example #16
0
    def _set_item_expiry(self, updates, original):
        """Set the expiry for the item.

        :param dict updates: doc on which publishing action is performed
        """
        desk_id = original.get('task', {}).get('desk')
        stage_id = original.get('task', {}).get('stage')

        if EMBARGO in updates or PUBLISH_SCHEDULE in updates:
            offset = get_utc_schedule(updates, PUBLISH_SCHEDULE) or get_utc_schedule(updates, EMBARGO)
        elif EMBARGO in original or PUBLISH_SCHEDULE in original:
            offset = get_utc_schedule(original, PUBLISH_SCHEDULE) or get_utc_schedule(original, EMBARGO)

        if app.settings.get('PUBLISHED_CONTENT_EXPIRY_MINUTES'):
            updates['expiry'] = get_expiry_date(app.settings['PUBLISHED_CONTENT_EXPIRY_MINUTES'], offset=offset)
        else:
            updates['expiry'] = get_expiry(desk_id, stage_id, offset=offset)
Example #17
0
 def _format_date_expire(self, article, docdata):
     if article.get(EMBARGO):
         docdata.attrib['management-status'] = 'embargoed'
         SubElement(docdata, 'date.expire',
                    {'norm': str(get_utc_schedule(article, EMBARGO).isoformat())})
     else:
         docdata.attrib['management-status'] = article.get('pubstatus', '')
         SubElement(docdata, 'date.expire', {'norm': str(article.get('expiry', ''))})
 def add_embargo(self, odbc_item, article):
     """
     Add the embargo text to the article if required
     :param odbc_item:
     :param article:
     :return:
     """
     if article.get(EMBARGO):
         embargo = '{}{}\r\n'.format('Embargo Content. Timestamp: ', get_utc_schedule(article, EMBARGO).isoformat())
         odbc_item['article_text'] = embargo + odbc_item['article_text']
 def add_embargo(self, odbc_item, article):
     """
     Add the embargo text to the article if required
     :param odbc_item:
     :param article:
     :return:
     """
     if article.get(EMBARGO):
         embargo = '{}{}\r\n'.format(
             'Embargo Content. Timestamp: ',
             get_utc_schedule(article, EMBARGO).isoformat())
         odbc_item['article_text'] = embargo + odbc_item['article_text']
    def get_subscribers(self, doc, target_media_type):
        """Get the subscribers for this document based on the target_media_type for article Correction.

        1. The article is sent to Subscribers (digital and wire) who has received the article previously.
        2. For subsequent takes, only published to previously published wire clients. Digital clients don't get
           individual takes but digital client takes package.
        3. If the item has embargo and is a future date then fetch active Wire Subscribers.
           Otherwise fetch Active Subscribers. After fetching exclude those who received the article previously from
           active subscribers list.
        4. If article has 'targeted_for' property then exclude subscribers of type Internet from Subscribers list.
        5. Filter the subscriber that have not received the article previously against publish filters
        and global filters for this document.

        :param doc: Document to correct
        :param target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article.
                Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire
                then the doc being queues is an Individual Article.
        :return: (list, list) List of filtered subscribers, List of subscribers that have not received item previously
        """
        subscribers, subscribers_yet_to_receive = [], []
        # step 1
        query = {'$and': [{'item_id': doc['item_id']},
                          {'publishing_action': {'$in': [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]}}]}

        subscribers, subscriber_codes = self._get_subscribers_for_previously_sent_items(query)

        if subscribers:
            # step 2
            if not self.takes_package_service.get_take_package_id(doc):
                # Step 3
                query = {'is_active': True}
                if doc.get(EMBARGO) and get_utc_schedule(doc, EMBARGO) > utcnow():
                    query['subscriber_type'] = SUBSCRIBER_TYPES.WIRE
                    # Ta 04/05/16: Commenting out this section for ticket SD-4465
                    # query['media_type'] = SUBSCRIBER_MEDIA_TYPES.MEDIA

                active_subscribers = list(get_resource_service('subscribers').get(req=None, lookup=query))
                subscribers_yet_to_receive = [a for a in active_subscribers
                                              if not any(a[config.ID_FIELD] == s[config.ID_FIELD]
                                                         for s in subscribers)]

            if len(subscribers_yet_to_receive) > 0:
                # Step 4
                if doc.get('target_regions'):
                    subscribers_yet_to_receive = list(self.non_digital(subscribers_yet_to_receive))
                # Step 5
                subscribers_yet_to_receive, codes = \
                    self.filter_subscribers(doc, subscribers_yet_to_receive, target_media_type)
                if codes:
                    subscriber_codes.update(codes)

        return subscribers, subscribers_yet_to_receive, subscriber_codes
    def _format_item_meta(self, article, item_meta, item):
        self._format_itemClass(article, item_meta)
        self._format_provider(item_meta)
        self._format_versioncreated(article, item_meta)
        self._format_firstcreated(article, item_meta)
        self._format_pubstatus(article, item_meta)

        if article.get(EMBARGO):
            SubElement(item_meta, 'embargoed').text = \
                get_utc_schedule(article, EMBARGO).isoformat()

        # optional properties
        self._format_ednote(article, item_meta)
        self._format_signal(article, item_meta)
    def _format_item_meta(self, article, item_meta, item):
        self._format_itemClass(article, item_meta)
        self._format_provider(item_meta)
        self._format_versioncreated(article, item_meta)
        self._format_firstcreated(article, item_meta)
        self._format_pubstatus(article, item_meta)

        if article.get(EMBARGO):
            SubElement(item_meta, 'embargoed').text = \
                get_utc_schedule(article, EMBARGO).isoformat()

        # optional properties
        self._format_ednote(article, item_meta)
        self._format_signal(article, item_meta)
Example #23
0
    def get_subscribers(self, doc, target_media_type):
        """
        Get the subscribers for this document based on the target_media_type for article Correction.
        1. The article is sent to Subscribers (digital and wire) who has received the article previously.
        2. For subsequent takes, only published to previously published wire clients. Digital clients don't get
           individual takes but digital client takes package.
        3. If the item has embargo and is a future date then fetch active Wire Subscribers.
           Otherwise fetch Active Subscribers. After fetching exclude those who received the article previously from
           active subscribers list.
        4. If article has 'targeted_for' property then exclude subscribers of type Internet from Subscribers list.
        5. Filter the subscriber that have not received the article previously against publish filters
        and global filters for this document.
        :param doc: Document to correct
        :param target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article.
                Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire
                then the doc being queues is an Individual Article.
        :return: (list, list) List of filtered subscribers, List of subscribers that have not received item previously
        """
        subscribers, subscribers_yet_to_receive = [], []
        # step 1
        query = {'$and': [{'item_id': doc['item_id']},
                          {'publishing_action': {'$in': [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]}}]}

        subscribers, subscriber_codes = self._get_subscribers_for_previously_sent_items(query)

        if subscribers:
            # step 2
            if not self.takes_package_service.get_take_package_id(doc):
                # Step 3
                query = {'is_active': True}
                if doc.get(EMBARGO) and get_utc_schedule(doc, EMBARGO) > utcnow():
                    query['subscriber_type'] = SUBSCRIBER_TYPES.WIRE

                active_subscribers = list(get_resource_service('subscribers').get(req=None, lookup=query))
                subscribers_yet_to_receive = [a for a in active_subscribers
                                              if not any(a[config.ID_FIELD] == s[config.ID_FIELD]
                                                         for s in subscribers)]

            if len(subscribers_yet_to_receive) > 0:
                # Step 4
                if doc.get('targeted_for'):
                    subscribers_yet_to_receive = list(self.non_digital(subscribers_yet_to_receive))
                # Step 5
                subscribers_yet_to_receive, codes = \
                    self.filter_subscribers(doc, subscribers_yet_to_receive,
                                            SUBSCRIBER_TYPES.WIRE if doc.get('targeted_for') else target_media_type)
                if codes:
                    subscriber_codes.update(codes)

        return subscribers, subscribers_yet_to_receive, subscriber_codes
    def _format_news_management(self, formatted_article, news_item):
        """
        Create a NewsManagement element

        :param dict formatted_article:
        :param Element news_item:
        """
        news_management = SubElement(news_item, "NewsManagement")
        SubElement(news_management, 'NewsItemType', {'FormalName': 'News'})
        SubElement(news_management, 'FirstCreated').text = \
            formatted_article['firstcreated'].strftime('%Y%m%dT%H%M%S+0000')
        SubElement(news_management, 'ThisRevisionCreated').text = \
            formatted_article['versioncreated'].strftime('%Y%m%dT%H%M%S+0000')

        if formatted_article.get(EMBARGO):
            SubElement(news_management, 'Status', {'FormalName': 'Embargoed'})
            status_will_change = SubElement(news_management,
                                            'StatusWillChange')
            SubElement(status_will_change, 'FutureStatus',
                       {'FormalName': formatted_article['pubstatus']})
            SubElement(status_will_change, 'DateAndTime').text = \
                get_utc_schedule(formatted_article, EMBARGO).isoformat()
        else:
            SubElement(news_management, 'Status',
                       {'FormalName': formatted_article['pubstatus']})

        if formatted_article.get('urgency'):
            SubElement(news_management, 'Urgency',
                       {'FormalName': str(formatted_article['urgency'])})

        if formatted_article['state'] == 'corrected':
            SubElement(news_management, 'Instruction',
                       {'FormalName': 'Correction'})
        else:
            SubElement(news_management, 'Instruction',
                       {'FormalName': 'Update'})

        SubElement(news_management, 'Property', {
            'FormalName': 'reuters.3rdPartyStyleGuideVersion',
            'Value': '2.1'
        })
        SubElement(
            news_management, 'Property', {
                'FormalName':
                'USN',
                'Value':
                'AAP' +
                str(int(formatted_article.get('unique_id', 1)) % 100000) + 'a'
            })
Example #25
0
    def _format_item_set(self, article, item_set, item_type):
        """Construct the item element (newsItem or packageItem) and append the item_meta and contentMeta entities

        :param dict article:
        :param element item_set:
        :param str item_type:
        """
        item = SubElement(item_set, item_type, attrib={'standard': 'NewsML-G2', 'standardversion': '2.18',
                                                       'guid': article['guid'],
                                                       'version': str(article[superdesk.config.VERSION]),
                                                       XML_LANG: article.get('language', 'en'),
                                                       'conformance': 'power'})
        SubElement(item, 'catalogRef',
                   attrib={'href': 'http://www.iptc.org/std/catalog/catalog.IPTC-G2-Standards_25.xml'})
        self._format_rights(item, article)
        item_meta = SubElement(item, 'itemMeta')
        self._format_itemClass(article, item_meta)
        self._format_provider(item_meta)
        self._format_versioncreated(article, item_meta)
        self._format_firstcreated(article, item_meta)
        self._format_pubstatus(article, item_meta)

        if article.get(EMBARGO):
            SubElement(item_meta, 'embargoed').text = \
                get_utc_schedule(article, EMBARGO).isoformat()

        # optional properties
        self._format_ednote(article, item_meta)
        self._format_signal(article, item_meta)

        content_meta = SubElement(item, 'contentMeta')
        SubElement(content_meta, 'urgency').text = str(article.get('urgency', 5))
        self._format_timestamps(article, content_meta)
        self._format_creator(article, content_meta)
        self._format_located(article, content_meta)
        self._format_subject(article, content_meta)
        self._format_genre(article, content_meta)
        self._format_slugline(article, content_meta)
        self._format_headline(article, content_meta)
        self._format_place(article, content_meta)
        self._format_category(article, content_meta)
        self._format_company_codes(article, content_meta, item)

        if article[ITEM_TYPE] in {CONTENT_TYPE.PICTURE, CONTENT_TYPE.AUDIO, CONTENT_TYPE.VIDEO}:
            self._format_description(article, content_meta)
            self._format_creditline(article, content_meta)
        return item
    def _format_item_set(self, article, item_set, item_type):
        """
        Construct the item element (newsItem or packageItem) and append the item_meta and contentMeta entities
        :param dict article:
        :param element item_set:
        :param str item_type:
        """
        item = SubElement(item_set, item_type, attrib={'standard': 'NewsML-G2', 'standardversion': '2.18',
                                                       'guid': article['guid'],
                                                       'version': str(article[superdesk.config.VERSION]),
                                                       'xml:lang': article.get('language', 'en'),
                                                       'conformance': 'power'})
        SubElement(item, 'catalogRef',
                   attrib={'href': 'http://www.iptc.org/std/catalog/catalog.IPTC-G2-Standards_25.xml'})
        self._format_rights(item, article)
        item_meta = SubElement(item, 'itemMeta')
        self._format_itemClass(article, item_meta)
        self._format_provider(item_meta)
        self._format_versioncreated(article, item_meta)
        self._format_firstcreated(article, item_meta)
        self._format_pubstatus(article, item_meta)

        if article.get(EMBARGO):
            SubElement(item_meta, 'embargoed').text = \
                get_utc_schedule(article, EMBARGO).isoformat()

        # optional properties
        self._format_ednote(article, item_meta)
        self._format_signal(article, item_meta)

        content_meta = SubElement(item, 'contentMeta')
        SubElement(content_meta, 'urgency').text = str(article.get('urgency', 5))
        self._format_timestamps(article, content_meta)
        self._format_creator(article, content_meta)
        self._format_located(article, content_meta)
        self._format_subject(article, content_meta)
        self._format_genre(article, content_meta)
        self._format_slugline(article, content_meta)
        self._format_headline(article, content_meta)
        self._format_place(article, content_meta)
        self._format_category(article, content_meta)
        self._format_company_codes(article, content_meta, item)

        if article[ITEM_TYPE] in {CONTENT_TYPE.PICTURE, CONTENT_TYPE.AUDIO, CONTENT_TYPE.VIDEO}:
            self._format_description(article, content_meta)
            self._format_creditline(article, content_meta)
        return item
Example #27
0
def enqueue_items(published_items):
    """
    Creates the corresponding entries in the publish queue for each item
    :param list published_items: the list of items marked for publishing
    """
    failed_items = {}
    current_utc = utcnow()

    for queue_item in published_items:
        try:
            schedule_utc_datetime = get_utc_schedule(queue_item, PUBLISH_SCHEDULE)
            if not schedule_utc_datetime or schedule_utc_datetime < current_utc:
                enqueue_item(queue_item)
        except:
            logger.exception('Failed to queue item {}'.format(queue_item.get('_id')))
            failed_items[str(queue_item.get('_id'))] = queue_item

    # mark failed items as pending so that Celery tasks will try again
    if len(failed_items) > 0:
        logger.error('Failed to publish the following items: {}'.format(failed_items.keys()))
Example #28
0
    def _format_news_management(self, article, news_item):
        """
        Create a NewsManagement element

        :param dict article:
        :param Element news_item:
        """
        news_management = SubElement(news_item, "NewsManagement")
        SubElement(news_management, "NewsItemType", {"FormalName": "News"})
        SubElement(news_management,
                   "FirstCreated").text = article["firstcreated"].strftime(
                       "%Y%m%dT%H%M%S+0000")
        SubElement(
            news_management,
            "ThisRevisionCreated").text = article["versioncreated"].strftime(
                "%Y%m%dT%H%M%S+0000")

        if article.get(EMBARGO):
            SubElement(news_management, "Status", {"FormalName": "Embargoed"})
            status_will_change = SubElement(news_management,
                                            "StatusWillChange")
            SubElement(status_will_change, "FutureStatus",
                       {"FormalName": article["pubstatus"]})
            SubElement(status_will_change,
                       "DateAndTime").text = get_utc_schedule(
                           article, EMBARGO).isoformat()
        else:
            SubElement(news_management, "Status",
                       {"FormalName": article["pubstatus"]})

        if article.get("urgency"):
            SubElement(news_management, "Urgency",
                       {"FormalName": str(article["urgency"])})

        if article["state"] == "corrected":
            SubElement(news_management, "Instruction",
                       {"FormalName": "Correction"})
        else:
            SubElement(news_management, "Instruction",
                       {"FormalName": "Update"})
Example #29
0
    def format(self, article, subscriber, codes=None):
        """
        Constructs a dictionary that represents the parameters passed to the SMS InsertAlerts stored procedure
        :return: returns the sequence number of the subscriber and the constructed parameter dictionary
        """
        try:
            pub_seq_num = superdesk.get_resource_service(
                'subscribers').generate_sequence_number(subscriber)
            sms_message = article.get('sms_message',
                                      article.get('abstract',
                                                  '')).replace('\'', '\'\'')

            # category = 1 is used to indicate a test message
            category = '1' if superdesk.app.config.get('TEST_SMS_OUTPUT', True) is True \
                else article.get('anpa_category', [{}])[0].get('qcode').upper()

            odbc_item = {
                'Sequence': pub_seq_num,
                'Category': category,
                'Headline': BeautifulSoup(sms_message, 'html.parser').text,
                'Priority': map_priority(article.get('priority'))
            }

            body = self.append_body_footer(article)
            if article.get(EMBARGO):
                embargo = '{}{}'.format(
                    'Embargo Content. Timestamp: ',
                    get_utc_schedule(article, EMBARGO).isoformat())
                body = embargo + body

            if article[ITEM_TYPE] == CONTENT_TYPE.TEXT:
                body = BeautifulSoup(body, "html.parser").text

            odbc_item['StoryText'] = body.replace('\'',
                                                  '\'\'')  # @article_text
            odbc_item['ident'] = '0'

            return [(pub_seq_num, json.dumps(odbc_item))]
        except Exception as ex:
            raise FormatterError.AAPSMSFormatterError(ex, subscriber)
Example #30
0
def enqueue_items(published_items):
    """
    Creates the corresponding entries in the publish queue for each item
    :param list published_items: the list of items marked for publishing
    """
    failed_items = {}
    current_utc = utcnow()

    for queue_item in published_items:
        try:
            schedule_utc_datetime = get_utc_schedule(queue_item,
                                                     PUBLISH_SCHEDULE)
            if not schedule_utc_datetime or schedule_utc_datetime < current_utc:
                enqueue_item(queue_item)
        except:
            logger.exception('Failed to queue item {}'.format(
                queue_item.get('_id')))
            failed_items[str(queue_item.get('_id'))] = queue_item

    # mark failed items as pending so that Celery tasks will try again
    if len(failed_items) > 0:
        logger.error('Failed to publish the following items: {}'.format(
            failed_items.keys()))
Example #31
0
    def format(self, article, subscriber):
        try:
            docs = []
            for category in article.get('anpa_category'):
                pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
                anpa = []

                # selector codes are only injected for those subscribers that are defined
                # in the mapper
                selectors = dict()
                SelectorcodeMapper().map(article, category.get('qcode').upper(),
                                         subscriber=subscriber,
                                         formatted_item=selectors)
                if 'selector_codes' in selectors and selectors['selector_codes']:
                    anpa.append(b'\x05')
                    anpa.append(selectors['selector_codes'].encode('ascii'))
                    anpa.append(b'\x0D\x0A')

                # start of message header (syn syn soh)
                anpa.append(b'\x16\x16\x01')
                anpa.append(article.get('service_level', 'a').lower().encode('ascii'))

                # story number
                anpa.append(str(pub_seq_num).zfill(4).encode('ascii'))

                # field seperator
                anpa.append(b'\x0A')  # -LF
                anpa.append(map_priority(article.get('priority')).encode('ascii'))
                anpa.append(b'\x20')

                anpa.append(category['qcode'].encode('ascii'))

                anpa.append(b'\x13')
                # format identifier
                if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED:
                    anpa.append(b'\x12')
                else:
                    anpa.append(b'\x11')
                anpa.append(b'\x20')

                # keyword
                keyword = 'bc-{}'.format(self.append_legal(article=article, truncate=True)).replace(' ', '-')
                keyword = keyword[:24] if len(keyword) > 24 else keyword
                anpa.append(keyword.encode('ascii'))
                anpa.append(b'\x20')

                # version field
                anpa.append(b'\x20')

                # reference field
                anpa.append(b'\x20')

                # filing date
                anpa.append('{}-{}'.format(article['_updated'].strftime('%m'),
                                           article['_updated'].strftime('%d')).encode('ascii'))
                anpa.append(b'\x20')

                # add the word count
                anpa.append(str(article.get('word_count', '0000')).zfill(4).encode('ascii'))
                anpa.append(b'\x0D\x0A')

                anpa.append(b'\x02')  # STX

                self._process_headline(anpa, article, category['qcode'].encode('ascii'))

                keyword = self.append_legal(article=article, truncate=True).encode('ascii', 'ignore')
                anpa.append(keyword)
                take_key = article.get('anpa_take_key', '').encode('ascii', 'ignore')
                anpa.append((b'\x20' + take_key) if len(take_key) > 0 else b'')
                anpa.append(b'\x0D\x0A')

                if BYLINE in article:
                    anpa.append(article.get(BYLINE).encode('ascii', 'ignore'))
                    anpa.append(b'\x0D\x0A')

                if article.get('dateline', {}).get('text'):
                    anpa.append(article.get('dateline').get('text').encode('ascii', 'ignore'))

                body = self.append_body_footer(article)
                if article.get(EMBARGO):
                    embargo = '{}{}'.format('Embargo Content. Timestamp: ',
                                            get_utc_schedule(article, EMBARGO).isoformat())
                    body = embargo + body

                if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED:
                    anpa.append(body.encode('ascii', 'replace'))
                else:
                    anpa.append(BeautifulSoup(body, "html.parser").text.encode('ascii', 'replace'))

                anpa.append(b'\x0D\x0A')
                if article.get('more_coming', False):
                    anpa.append('MORE'.encode('ascii'))
                else:
                    anpa.append(article.get('source', '').encode('ascii'))
                sign_off = article.get('sign_off', '').encode('ascii')
                anpa.append((b'\x20' + sign_off) if len(sign_off) > 0 else b'')
                anpa.append(b'\x0D\x0A')

                anpa.append(b'\x03')  # ETX

                # time and date
                anpa.append(datetime.datetime.now().strftime('%d-%m-%y %H-%M-%S').encode('ascii'))

                anpa.append(b'\x04')  # EOT
                anpa.append(b'\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A')

                docs.append((pub_seq_num, b''.join(anpa)))

            return docs
        except Exception as ex:
            raise FormatterError.AnpaFormatterError(ex, subscriber)
    def get_subscribers(self, doc, target_media_type):
        """
        Get the subscribers for this document based on the target_media_type for publishing.
        1. If the item has embargo and is a future date then fetch active Wire Subscribers.
           Otherwise get all active subscribers.
            a. Get the list of takes subscribers if Takes Package
        2. If takes package then subsequent takes are sent to same wire subscriber as first take.
        3. Filter the subscriber list based on the publish filter and global filters (if configured).
            a. Publish to takes package subscribers if the takes package is received by the subscriber.
            b. Rewrites are sent to subscribers that received the original item or the previous rewrite.
        :param dict doc: Document to publish/correct/kill
        :param str target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article.
                Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire
                then the doc being queues is an Individual Article.
        :return: (list, list, dict) List of filtered subscriber,
                List of subscribers that have not received item previously (empty list in this case).
                List of product codes per subscriber
        """
        subscribers, subscribers_yet_to_receive, takes_subscribers, rewrite_subscribers = [], [], [], []
        subscriber_codes, take_codes, codes, rewrite_codes = {}, {}, {}, {}
        first_take = None

        # Step 3b
        rewrite_of = doc.get('rewrite_of')
        rewrite_take_package = None
        if rewrite_of:
            rewrite_of_item = get_resource_service('archive').find_one(req=None, _id=rewrite_of)
            if rewrite_of_item:
                if is_takes_package(rewrite_of_item):
                    rewrite_take_package = rewrite_of_item
                else:
                    rewrite_take_package = self.takes_package_service.get_take_package(rewrite_of_item)

        # Step 1
        query = {'is_active': True}
        if doc.get(EMBARGO) and get_utc_schedule(doc, EMBARGO) > utcnow():
            query['subscriber_type'] = SUBSCRIBER_TYPES.WIRE
            # Ta 04/05/16: Commenting out this section for ticket SD-4465
            # query['media_type'] = SUBSCRIBER_MEDIA_TYPES.MEDIA

        subscribers = list(get_resource_service('subscribers').get(req=None, lookup=query))

        if doc.get(ITEM_TYPE) in [CONTENT_TYPE.COMPOSITE] and doc.get(PACKAGE_TYPE) == TAKES_PACKAGE:
            # Step 1a
            query = {'$and': [{'item_id': doc['item_id']},
                              {'publishing_action': {'$in': [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]}}]}
            takes_subscribers, take_codes = self._get_subscribers_for_previously_sent_items(query)

            if rewrite_of and rewrite_take_package:
                # Step 3b
                query = {'$and': [{'item_id': rewrite_take_package.get(config.ID_FIELD)},
                                  {'publishing_action': {'$in': [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]}}]}
                rewrite_subscribers, rewrite_codes = self._get_subscribers_for_previously_sent_items(query)

        # Step 2
        if doc.get(ITEM_TYPE) in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]:
            # get first take
            first_take = self.takes_package_service.get_take_by_take_no(doc, 1)
            if str(doc['item_id']) == str(first_take):
                # if the current document is the first take then continue
                first_take = None

            if first_take:
                # if first take is published then subsequent takes should to same subscribers.
                query = {'$and': [{'item_id': first_take},
                                  {'publishing_action': {'$in': [CONTENT_STATE.PUBLISHED]}}]}
                subscribers, subscriber_codes = self._get_subscribers_for_previously_sent_items(query)

            if rewrite_of:
                # Step 3b
                if rewrite_take_package and rewrite_take_package.get(config.ID_FIELD) == rewrite_of:
                    item_ids = self.package_service.get_residrefs(rewrite_take_package)
                else:
                    item_ids = [rewrite_of]

                query = {'$and': [{'item_id': {'$in': item_ids}},
                                  {'publishing_action': {'$in': [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]}}]}
                rewrite_subscribers, rewrite_codes = self._get_subscribers_for_previously_sent_items(query)

        # Step 3
        if not first_take:
            subscribers, codes = self.filter_subscribers(doc, subscribers, target_media_type)

        if takes_subscribers:
            # Step 3a
            subscribers_ids = set(s[config.ID_FIELD] for s in takes_subscribers)
            subscribers = takes_subscribers + [s for s in subscribers if s[config.ID_FIELD] not in subscribers_ids]

        if rewrite_subscribers:
            # Step 3b
            subscribers_ids = set(s[config.ID_FIELD] for s in rewrite_subscribers)
            subscribers = rewrite_subscribers + [s for s in subscribers if s[config.ID_FIELD] not in subscribers_ids]

        if take_codes:
            # join the codes
            subscriber_codes.update(take_codes)

        if rewrite_codes:
            # join the codes
            subscriber_codes.update(rewrite_codes)

        if codes:
            # join the codes
            subscriber_codes.update(codes)

        return subscribers, subscribers_yet_to_receive, subscriber_codes
    def format(self, article, subscriber):
        try:
            docs = []
            for category in article.get('anpa_category'):
                pub_seq_num = superdesk.get_resource_service(
                    'subscribers').generate_sequence_number(subscriber)
                anpa = []

                # selector codes are only injected for those subscribers that are defined
                # in the mapper
                selectors = dict()
                SelectorcodeMapper().map(article,
                                         category.get('qcode').upper(),
                                         subscriber=subscriber,
                                         formatted_item=selectors)
                if 'selector_codes' in selectors and selectors[
                        'selector_codes']:
                    anpa.append(b'\x05')
                    anpa.append(selectors['selector_codes'].encode('ascii'))
                    anpa.append(b'\x0D\x0A')

                # start of message header (syn syn soh)
                anpa.append(b'\x16\x16\x01')
                anpa.append(
                    article.get('service_level', 'a').lower().encode('ascii'))

                # story number
                anpa.append(str(pub_seq_num).zfill(4).encode('ascii'))

                # field seperator
                anpa.append(b'\x0A')  # -LF
                anpa.append(
                    map_priority(article.get('priority')).encode('ascii'))
                anpa.append(b'\x20')

                anpa.append(category['qcode'].encode('ascii'))

                anpa.append(b'\x13')
                # format identifier
                if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED:
                    anpa.append(b'\x12')
                else:
                    anpa.append(b'\x11')
                anpa.append(b'\x20')

                # keyword
                keyword = 'bc-{}'.format(
                    self.append_legal(article=article,
                                      truncate=True)).replace(' ', '-')
                keyword = keyword[:24] if len(keyword) > 24 else keyword
                anpa.append(keyword.encode('ascii'))
                anpa.append(b'\x20')

                # version field
                anpa.append(b'\x20')

                # reference field
                anpa.append(b'\x20')

                # filing date
                anpa.append('{}-{}'.format(
                    article['_updated'].strftime('%m'),
                    article['_updated'].strftime('%d')).encode('ascii'))
                anpa.append(b'\x20')

                # add the word count
                anpa.append(
                    str(article.get('word_count',
                                    '0000')).zfill(4).encode('ascii'))
                anpa.append(b'\x0D\x0A')

                anpa.append(b'\x02')  # STX

                self._process_headline(anpa, article,
                                       category['qcode'].encode('ascii'))

                keyword = self.append_legal(article=article,
                                            truncate=True).encode(
                                                'ascii', 'ignore')
                anpa.append(keyword)
                take_key = article.get('anpa_take_key',
                                       '').encode('ascii', 'ignore')
                anpa.append((b'\x20' + take_key) if len(take_key) > 0 else b'')
                anpa.append(b'\x0D\x0A')

                if BYLINE in article:
                    anpa.append(article.get(BYLINE).encode('ascii', 'ignore'))
                    anpa.append(b'\x0D\x0A')

                if article.get('dateline', {}).get('text'):
                    anpa.append(
                        article.get('dateline').get('text').encode(
                            'ascii', 'ignore'))

                body = self.append_body_footer(article)
                if article.get(EMBARGO):
                    embargo = '{}{}'.format(
                        'Embargo Content. Timestamp: ',
                        get_utc_schedule(article, EMBARGO).isoformat())
                    body = embargo + body

                if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED:
                    anpa.append(body.encode('ascii', 'replace'))
                else:
                    anpa.append(
                        BeautifulSoup(body, "html.parser").text.encode(
                            'ascii', 'replace'))

                anpa.append(b'\x0D\x0A')
                if article.get('more_coming', False):
                    anpa.append('MORE'.encode('ascii'))
                else:
                    anpa.append(article.get('source', '').encode('ascii'))
                sign_off = article.get('sign_off', '').encode('ascii')
                anpa.append((b'\x20' + sign_off) if len(sign_off) > 0 else b'')
                anpa.append(b'\x0D\x0A')

                anpa.append(b'\x03')  # ETX

                # time and date
                anpa.append(datetime.datetime.now().strftime(
                    '%d-%m-%y %H-%M-%S').encode('ascii'))

                anpa.append(b'\x04')  # EOT
                anpa.append(
                    b'\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A'
                )

                docs.append((pub_seq_num, b''.join(anpa)))

            return docs
        except Exception as ex:
            raise FormatterError.AnpaFormatterError(ex, subscriber)
Example #34
0
    def queue_transmission(self, doc, subscribers, subscriber_codes={}):
        """
        Method formats and then queues the article for transmission to the passed subscribers.
        ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once
        based on the format_types configured across for all the subscribers as the formatted item must have a published
        sequence number generated by Subscriber.
        :param dict doc: document to queue for transmission
        :param list subscribers: List of subscriber dict.
        :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False
        """

        try:
            queued = False
            no_formatters = []
            for subscriber in subscribers:
                try:
                    if doc[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] and \
                            subscriber.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE:
                        # wire subscribers can get only text and preformatted stories
                        continue

                    for destination in subscriber['destinations']:
                        embed_package_items = doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and \
                            PACKAGE_TYPE not in doc and destination['config'].get('packaged', False)
                        if embed_package_items:
                            doc = self._embed_package_items(doc)
                        # Step 2(a)
                        formatter = get_formatter(destination['format'], doc)

                        if not formatter:  # if formatter not found then record it
                            no_formatters.append(destination['format'])
                            continue

                        formatted_docs = formatter.format(
                            doc, subscriber,
                            subscriber_codes.get(subscriber[config.ID_FIELD]))

                        for idx, publish_data in enumerate(formatted_docs):
                            if not isinstance(publish_data, dict):
                                pub_seq_num, formatted_doc = publish_data
                                formatted_docs[idx] = {
                                    'published_seq_num': pub_seq_num,
                                    'formatted_item': formatted_doc
                                }
                            else:
                                assert 'published_seq_num' in publish_data and 'formatted_item' in publish_data,\
                                    "missing keys in publish_data"

                        for publish_queue_item in formatted_docs:
                            publish_queue_item['item_id'] = doc['item_id']
                            publish_queue_item['item_version'] = doc[
                                config.VERSION]
                            publish_queue_item['subscriber_id'] = subscriber[
                                config.ID_FIELD]
                            publish_queue_item['codes'] = subscriber_codes.get(
                                subscriber[config.ID_FIELD])
                            publish_queue_item['destination'] = destination
                            # publish_schedule is just to indicate in the queue item is create via scheduled item
                            publish_queue_item[
                                PUBLISH_SCHEDULE] = get_utc_schedule(
                                    doc, PUBLISH_SCHEDULE) or None
                            publish_queue_item['unique_name'] = doc.get(
                                'unique_name', None)
                            publish_queue_item['content_type'] = doc.get(
                                'type', None)
                            publish_queue_item['headline'] = doc.get(
                                'headline', None)
                            publish_queue_item[
                                'publishing_action'] = self.published_state
                            publish_queue_item['ingest_provider'] = \
                                ObjectId(doc.get('ingest_provider')) if doc.get('ingest_provider') else None
                            if doc.get(PUBLISHED_IN_PACKAGE):
                                publish_queue_item[PUBLISHED_IN_PACKAGE] = doc[
                                    PUBLISHED_IN_PACKAGE]
                            try:
                                encoded_item = publish_queue_item.pop(
                                    'encoded_item')
                            except KeyError:
                                pass
                            else:
                                binary = io.BytesIO(encoded_item)
                                publish_queue_item[
                                    'encoded_item_id'] = app.storage.put(
                                        binary)
                            publish_queue_item.pop(ITEM_STATE, None)
                            get_resource_service('publish_queue').post(
                                [publish_queue_item])
                            queued = True
                except:
                    logger.exception(
                        "Failed to queue item for id {} with headline {} for subscriber {}."
                        .format(doc.get(config.ID_FIELD), doc.get('headline'),
                                subscriber.get('name')))

            return no_formatters, queued
        except:
            raise
    def queue_transmission(self, doc, subscribers, subscriber_codes={}):
        """
        Method formats and then queues the article for transmission to the passed subscribers.
        ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once
        based on the format_types configured across for all the subscribers as the formatted item must have a published
        sequence number generated by Subscriber.
        :param dict doc: document to queue for transmission
        :param list subscribers: List of subscriber dict.
        :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False
        """

        try:
            queued = False
            no_formatters = []
            for subscriber in subscribers:
                try:
                    if doc[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] and \
                            subscriber.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE:
                        # wire subscribers can get only text and preformatted stories
                        continue

                    for destination in subscriber['destinations']:
                        embed_package_items = doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and \
                            PACKAGE_TYPE not in doc and destination['config'].get('packaged', False)
                        if embed_package_items:
                            doc = self._embed_package_items(doc)
                        # Step 2(a)
                        formatter = get_formatter(destination['format'], doc)

                        if not formatter:  # if formatter not found then record it
                            no_formatters.append(destination['format'])
                            continue

                        formatted_docs = formatter.format(doc, subscriber)

                        for pub_seq_num, formatted_doc in formatted_docs:
                            publish_queue_item = dict()
                            publish_queue_item['item_id'] = doc['item_id']
                            publish_queue_item['item_version'] = doc[config.VERSION]
                            publish_queue_item['formatted_item'] = formatted_doc
                            publish_queue_item['subscriber_id'] = subscriber[config.ID_FIELD]
                            publish_queue_item['codes'] = subscriber_codes.get(subscriber[config.ID_FIELD])
                            publish_queue_item['destination'] = destination
                            publish_queue_item['published_seq_num'] = pub_seq_num
                            # publish_schedule is just to indicate in the queue item is create via scheduled item
                            publish_queue_item[PUBLISH_SCHEDULE] = get_utc_schedule(doc, PUBLISH_SCHEDULE) or None
                            publish_queue_item['unique_name'] = doc.get('unique_name', None)
                            publish_queue_item['content_type'] = doc.get('type', None)
                            publish_queue_item['headline'] = doc.get('headline', None)
                            publish_queue_item['publishing_action'] = self.published_state
                            if doc.get(PUBLISHED_IN_PACKAGE):
                                publish_queue_item[PUBLISHED_IN_PACKAGE] = doc[PUBLISHED_IN_PACKAGE]
                            publish_queue_item.pop(ITEM_STATE, None)
                            get_resource_service('publish_queue').post([publish_queue_item])
                            queued = True
                except:
                    logger.exception("Failed to queue item for id {} with headline {} for subscriber {}."
                                     .format(doc.get(config.ID_FIELD), doc.get('headline'), subscriber.get('name')))

            return no_formatters, queued
        except:
            raise
Example #36
0
    def format(self, article, subscriber, codes=None):
        try:
            docs = []
            formatted_article = deepcopy(article)
            for category in self._get_category_list(formatted_article.get('anpa_category')):
                mapped_source = self._get_mapped_source(formatted_article)
                formatted_article[config.ID_FIELD] = formatted_article.get('item_id',
                                                                           formatted_article.get(config.ID_FIELD))
                is_last_take = TakesPackageService().is_last_takes_package_item(formatted_article)
                is_first_part = formatted_article.get('sequence', 1) == 1
                pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
                anpa = []

                if codes:
                    anpa.append(b'\x05')
                    anpa.append(' '.join(codes).encode('ascii'))
                    anpa.append(b'\x0D\x0A')

                # start of message header (syn syn soh)
                anpa.append(b'\x16\x16\x01')
                anpa.append(get_service_level(category, formatted_article).encode('ascii'))

                # story number
                anpa.append(str(pub_seq_num).zfill(4).encode('ascii'))

                # field seperator
                anpa.append(b'\x0A')  # -LF
                anpa.append(map_priority(formatted_article.get('priority')).encode('ascii'))
                anpa.append(b'\x20')

                anpa.append(category['qcode'].lower().encode('ascii'))

                anpa.append(b'\x13')
                # format identifier
                if formatted_article.get(FORMAT, FORMATS.HTML) == FORMATS.PRESERVED:
                    anpa.append(b'\x12')
                else:
                    anpa.append(b'\x11')
                anpa.append(b'\x20')

                # keyword
                keyword = 'bc-{}'.format(self.append_legal(article=formatted_article, truncate=True)).replace(' ', '-')
                keyword = keyword[:24] if len(keyword) > 24 else keyword
                anpa.append(keyword.encode('ascii'))
                anpa.append(b'\x20')

                # version field
                anpa.append(b'\x20')

                # reference field
                anpa.append(b'\x20')

                # filing date
                anpa.append('{}-{}'.format(formatted_article['_updated'].strftime('%m'),
                                           formatted_article['_updated'].strftime('%d')).encode('ascii'))
                anpa.append(b'\x20')

                # add the word count
                anpa.append(str(formatted_article.get('word_count', '0000')).zfill(4).encode('ascii'))
                anpa.append(b'\x0D\x0A')

                anpa.append(b'\x02')  # STX

                self._process_headline(anpa, formatted_article, category['qcode'].encode('ascii'))

                keyword = SluglineMapper().map(article=formatted_article, category=category['qcode'].upper(),
                                               truncate=True).encode('ascii', 'ignore')
                anpa.append(keyword)
                take_key = (formatted_article.get('anpa_take_key', '') or '').encode('ascii', 'ignore')
                anpa.append((b'\x20' + take_key) if len(take_key) > 0 else b'')
                anpa.append(b'\x0D\x0A')

                if formatted_article.get(EMBARGO):
                    embargo = '{}{}\r\n'.format('Embargo Content. Timestamp: ',
                                                get_utc_schedule(formatted_article, EMBARGO).isoformat())
                    anpa.append(embargo.encode('ascii', 'replace'))

                if formatted_article.get('ednote', '') != '':
                    ednote = '{}\r\n'.format(to_ascii(formatted_article.get('ednote')))
                    anpa.append(ednote.encode('ascii', 'replace'))

                if formatted_article.get(BYLINE):
                    anpa.append(BeautifulSoup(formatted_article.get(BYLINE), 'html.parser').text.encode
                                ('ascii', 'ignore'))
                    anpa.append(b'\x0D\x0A')

                if formatted_article.get(FORMAT) == FORMATS.PRESERVED:
                    soup = BeautifulSoup(self.append_body_footer(formatted_article), "html.parser")
                    anpa.append(soup.get_text().encode('ascii', 'replace'))
                else:
                    body = to_ascii(formatted_article.get('body_html', ''))
                    # we need to inject the dateline
                    if is_first_part and formatted_article.get('dateline', {}).get('text') \
                            and not article.get('auto_publish', False):
                        soup = BeautifulSoup(body, "html.parser")
                        ptag = soup.find('p')
                        if ptag is not None:
                            ptag.insert(0, NavigableString(
                                '{} '.format(formatted_article.get('dateline').get('text')).encode('ascii', 'ignore')))
                            body = str(soup)
                    anpa.append(self.get_text_content(body))
                    if formatted_article.get('body_footer'):
                        anpa.append(self.get_text_content(to_ascii(formatted_article.get('body_footer', ''))))

                anpa.append(b'\x0D\x0A')
                if not is_last_take:
                    anpa.append('MORE'.encode('ascii'))
                else:
                    anpa.append(mapped_source.encode('ascii'))
                sign_off = (formatted_article.get('sign_off', '') or '').encode('ascii')
                anpa.append((b'\x20' + sign_off) if len(sign_off) > 0 else b'')
                anpa.append(b'\x0D\x0A')

                anpa.append(b'\x03')  # ETX

                # time and date
                anpa.append(datetime.datetime.now().strftime('%d-%m-%y %H-%M-%S').encode('ascii'))

                anpa.append(b'\x04')  # EOT
                anpa.append(b'\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A')

                docs.append({'published_seq_num': pub_seq_num, 'encoded_item': b''.join(anpa),
                             'formatted_item': b''.join(anpa).decode('ascii')})

            return docs
        except Exception as ex:
            raise FormatterError.AnpaFormatterError(ex, subscriber)
Example #37
0
    def queue_transmission(self,
                           doc,
                           subscribers,
                           subscriber_codes=None,
                           associations=None,
                           sent=False):
        """Method formats and then queues the article for transmission to the passed subscribers.

        ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once
        based on the format_types configured across for all the subscribers as the formatted item must have a published
        sequence number generated by Subscriber.

        :param dict doc: document to queue for transmission
        :param list subscribers: List of subscriber dict.
        :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False
        """
        if associations is None:
            associations = {}
        if subscriber_codes is None:
            subscriber_codes = {}

        try:
            if config.PUBLISH_ASSOCIATIONS_RESEND and not sent:
                is_correction = doc.get("state") in [
                    "corrected", "being_corrected"
                ]
                is_update = doc.get("rewrite_of")
                is_new = not is_correction and not is_update

                if config.PUBLISH_ASSOCIATIONS_RESEND == "new" and is_new:
                    self.resend_association_items(doc)
                elif config.PUBLISH_ASSOCIATIONS_RESEND == "corrections":
                    self.resend_association_items(doc)
                elif config.PUBLISH_ASSOCIATIONS_RESEND == "updates" and not is_correction:
                    self.resend_association_items(doc)

            queued = False
            no_formatters = []
            for subscriber in subscribers:

                try:
                    if (doc[ITEM_TYPE] not in [
                            CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED
                    ] and subscriber.get("subscriber_type", "")
                            == SUBSCRIBER_TYPES.WIRE):
                        # wire subscribers can get only text and preformatted stories
                        continue

                    for destination in self.get_destinations(subscriber):
                        embed_package_items = doc[
                            ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and (
                                destination.get("config") or {}).get(
                                    "packaged", False)
                        if embed_package_items:
                            doc = self._embed_package_items(doc)

                        if doc.get(PUBLISHED_IN_PACKAGE) and (
                                destination.get("config") or {}).get(
                                    "packaged", False):
                            continue

                        # Step 2(a)
                        formatter = get_formatter(destination["format"], doc)

                        if not formatter:  # if formatter not found then record it
                            no_formatters.append(destination["format"])
                            continue

                        formatter.set_destination(destination, subscriber)
                        formatted_docs = formatter.format(
                            self.filter_document(doc), subscriber,
                            subscriber_codes.get(subscriber[config.ID_FIELD]))

                        for idx, publish_data in enumerate(formatted_docs):
                            if not isinstance(publish_data, dict):
                                pub_seq_num, formatted_doc = publish_data
                                formatted_docs[idx] = {
                                    "published_seq_num": pub_seq_num,
                                    "formatted_item": formatted_doc,
                                }
                            else:
                                assert ("published_seq_num" in publish_data
                                        and "formatted_item" in publish_data
                                        ), "missing keys in publish_data"

                        for publish_queue_item in formatted_docs:
                            publish_queue_item["item_id"] = doc["item_id"]
                            publish_queue_item["item_version"] = doc[
                                config.VERSION]
                            publish_queue_item["subscriber_id"] = subscriber[
                                config.ID_FIELD]
                            publish_queue_item["codes"] = subscriber_codes.get(
                                subscriber[config.ID_FIELD])
                            publish_queue_item["destination"] = destination
                            # publish_schedule is just to indicate in the queue item is create via scheduled item
                            publish_queue_item[
                                PUBLISH_SCHEDULE] = get_utc_schedule(
                                    doc, PUBLISH_SCHEDULE) or None
                            publish_queue_item["unique_name"] = doc.get(
                                "unique_name", None)
                            publish_queue_item["content_type"] = doc.get(
                                "type", None)
                            publish_queue_item["headline"] = doc.get(
                                "headline", None)
                            publish_queue_item[
                                "publishing_action"] = self.published_state
                            publish_queue_item["ingest_provider"] = (
                                ObjectId(doc.get("ingest_provider"))
                                if doc.get("ingest_provider") else None)
                            publish_queue_item[
                                "associated_items"] = associations.get(
                                    subscriber[config.ID_FIELD], [])
                            publish_queue_item["priority"] = subscriber.get(
                                "priority")

                            if doc.get(PUBLISHED_IN_PACKAGE):
                                publish_queue_item[PUBLISHED_IN_PACKAGE] = doc[
                                    PUBLISHED_IN_PACKAGE]
                            try:
                                encoded_item = publish_queue_item.pop(
                                    "encoded_item")
                            except KeyError:
                                pass
                            else:
                                binary = io.BytesIO(encoded_item)
                                publish_queue_item[
                                    "encoded_item_id"] = app.storage.put(
                                        binary)
                            publish_queue_item.pop(ITEM_STATE, None)

                            # content api delivery will be marked as SUCCESS in queue
                            get_resource_service("publish_queue").post(
                                [publish_queue_item])
                            queued = True

                except Exception:
                    logger.exception(
                        "Failed to queue item for id {} with headline {} for subscriber {}."
                        .format(doc.get(config.ID_FIELD), doc.get("headline"),
                                subscriber.get("name")))

            return no_formatters, queued
        except Exception:
            raise
Example #38
0
    def get_subscribers(self, doc, target_media_type):
        """Get the subscribers for this document based on the target_media_type for publishing.

        1. If the item has embargo and is a future date then fetch active Wire Subscribers.
           Otherwise get all active subscribers.
            a. Get the list of takes subscribers if Takes Package
        2. If takes package then subsequent takes are sent to same wire subscriber as first take.
        3. Filter the subscriber list based on the publish filter and global filters (if configured).
            a. Publish to takes package subscribers if the takes package is received by the subscriber.
            b. Rewrites are sent to subscribers that received the original item or the previous rewrite.

        :param dict doc: Document to publish/correct/kill
        :param str target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article.
                Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire
                then the doc being queues is an Individual Article.
        :return: (list, list, dict) List of filtered subscriber,
                List of subscribers that have not received item previously (empty list in this case).
                List of product codes per subscriber
        """
        subscribers, subscribers_yet_to_receive, takes_subscribers, rewrite_subscribers = [], [], [], []
        subscriber_codes, take_codes, codes, rewrite_codes = {}, {}, {}, {}
        first_take = None

        # Step 3b
        rewrite_of = doc.get('rewrite_of')
        rewrite_take_package = None
        if rewrite_of:
            rewrite_of_item = get_resource_service('archive').find_one(
                req=None, _id=rewrite_of)
            if rewrite_of_item:
                if is_takes_package(rewrite_of_item):
                    rewrite_take_package = rewrite_of_item
                else:
                    rewrite_take_package = self.takes_package_service.get_take_package(
                        rewrite_of_item)

        # Step 1
        query = {'is_active': True}
        if doc.get(EMBARGO) and get_utc_schedule(doc, EMBARGO) > utcnow():
            query['subscriber_type'] = SUBSCRIBER_TYPES.WIRE
            # Ta 04/05/16: Commenting out this section for ticket SD-4465
            # query['media_type'] = SUBSCRIBER_MEDIA_TYPES.MEDIA

        subscribers = list(
            get_resource_service('subscribers').get(req=None, lookup=query))

        if doc.get(ITEM_TYPE) in [CONTENT_TYPE.COMPOSITE
                                  ] and doc.get(PACKAGE_TYPE) == TAKES_PACKAGE:
            # Step 1a
            query = {
                '$and': [{
                    'item_id': doc['item_id']
                }, {
                    'publishing_action': {
                        '$in':
                        [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]
                    }
                }]
            }
            takes_subscribers, take_codes = self._get_subscribers_for_previously_sent_items(
                query)

            if rewrite_of and rewrite_take_package:
                # Step 3b
                query = {
                    '$and': [{
                        'item_id':
                        rewrite_take_package.get(config.ID_FIELD)
                    }, {
                        'publishing_action': {
                            '$in':
                            [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]
                        }
                    }]
                }
                rewrite_subscribers, rewrite_codes = self._get_subscribers_for_previously_sent_items(
                    query)

        # Step 2
        if doc.get(ITEM_TYPE) in [
                CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED
        ]:
            # get first take
            first_take = self.takes_package_service.get_take_by_take_no(doc, 1)
            if str(doc['item_id']) == str(first_take):
                # if the current document is the first take then continue
                first_take = None

            if first_take:
                # if first take is published then subsequent takes should to same subscribers.
                query = {
                    '$and': [{
                        'item_id': first_take
                    }, {
                        'publishing_action': {
                            '$in': [CONTENT_STATE.PUBLISHED]
                        }
                    }]
                }
                subscribers, subscriber_codes = self._get_subscribers_for_previously_sent_items(
                    query)

            if rewrite_of:
                # Step 3b
                if rewrite_take_package and rewrite_take_package.get(
                        config.ID_FIELD) == rewrite_of:
                    item_ids = self.package_service.get_residrefs(
                        rewrite_take_package)
                else:
                    item_ids = [rewrite_of]

                query = {
                    '$and': [{
                        'item_id': {
                            '$in': item_ids
                        }
                    }, {
                        'publishing_action': {
                            '$in':
                            [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]
                        }
                    }]
                }
                rewrite_subscribers, rewrite_codes = self._get_subscribers_for_previously_sent_items(
                    query)

        # Step 3
        if not first_take:
            subscribers, codes = self.filter_subscribers(
                doc, subscribers, target_media_type)

        if takes_subscribers:
            # Step 3a
            subscribers_ids = set(s[config.ID_FIELD]
                                  for s in takes_subscribers)
            subscribers = takes_subscribers + [
                s for s in subscribers
                if s[config.ID_FIELD] not in subscribers_ids
            ]

        if rewrite_subscribers:
            # Step 3b
            subscribers_ids = set(s[config.ID_FIELD]
                                  for s in rewrite_subscribers)
            subscribers = rewrite_subscribers + [
                s for s in subscribers
                if s[config.ID_FIELD] not in subscribers_ids
            ]

        if take_codes:
            # join the codes
            subscriber_codes.update(take_codes)

        if rewrite_codes:
            # join the codes
            subscriber_codes.update(rewrite_codes)

        if codes:
            # join the codes
            subscriber_codes.update(codes)

        return subscribers, subscribers_yet_to_receive, subscriber_codes
Example #39
0
    def _transform_to_ninjs(self, article, subscriber, recursive=True):
        ninjs = {
            'guid': article.get(GUID_FIELD, article.get('uri')),
            'version': str(article.get(config.VERSION, 1)),
            'type': self._get_type(article)
        }

        try:
            ninjs['byline'] = self._get_byline(article)
        except:
            pass

        located = article.get('dateline', {}).get('located', {})
        if located:
            ninjs['located'] = located.get('city', '')

        for copy_property in self.direct_copy_properties:
            if article.get(copy_property) is not None:
                ninjs[copy_property] = article[copy_property]

        if 'body_text' not in article and 'alt_text' in article:
            ninjs['body_text'] = article['alt_text']

        if 'title' in article:
            ninjs['headline'] = article['title']

        if article.get('body_html'):
            ninjs['body_html'] = self.append_body_footer(article)

        if article.get('description'):
            ninjs['description_html'] = self.append_body_footer(article)

        if recursive:
            if article[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                ninjs['associations'] = self._get_associations(article, subscriber)
                if 'associations' in article:
                    ninjs['associations'].update(self._format_related(article, subscriber))
            elif article.get('associations', {}):
                ninjs['associations'] = self._format_related(article, subscriber)
        elif article.get('associations'):
            ninjs['associations'] = self._format_related(article, subscriber)

        if article.get(EMBARGO):
            ninjs['embargoed'] = get_utc_schedule(article, EMBARGO).isoformat()

        if article.get('priority'):
            ninjs['priority'] = article['priority']
        else:
            ninjs['priority'] = 5

        if article.get('subject'):
            ninjs['subject'] = self._get_subject(article)

        if article.get('anpa_category'):
            ninjs['service'] = self._get_service(article)
        if article.get('renditions'):
            ninjs['renditions'] = self._get_renditions(article)
        elif 'url' in article:
            ninjs['renditions'] = self._generate_renditions(article)

        # SDPA-317
        if article.get('abstract'):
            abstract = article.get('abstract')
            ninjs['description_html'] = abstract
            soup = BeautifulSoup(abstract, 'html.parser')
            ninjs['description_text'] = soup.get_text()
        elif article.get('description_text'):
            ninjs['description_text'] = article.get('description_text')

        if article.get('company_codes'):
            ninjs['organisation'] = [{'name': c.get('name', ''), 'rel': 'Securities Identifier',
                                      'symbols': [{'ticker': c.get('qcode', ''),
                                                   'exchange': c.get('security_exchange', '')}]}
                                     for c in article['company_codes']]
        elif 'company' in article:
            ninjs['organisation'] = [{'name': article['company']}]

        return ninjs
Example #40
0
    def queue_transmission(self, doc, subscribers, subscriber_codes={}, associations={}):
        """Method formats and then queues the article for transmission to the passed subscribers.

        ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once
        based on the format_types configured across for all the subscribers as the formatted item must have a published
        sequence number generated by Subscriber.

        :param dict doc: document to queue for transmission
        :param list subscribers: List of subscriber dict.
        :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False
        """
        try:
            queued = False
            no_formatters = []
            for subscriber in subscribers:
                try:
                    if doc[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] and \
                            subscriber.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE:
                        # wire subscribers can get only text and preformatted stories
                        continue

                    for destination in self.get_destinations(subscriber):
                        embed_package_items = doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and \
                            (destination.get('config') or {}).get('packaged', False)
                        if embed_package_items:
                            doc = self._embed_package_items(doc)

                        if doc.get(PUBLISHED_IN_PACKAGE) and \
                                (destination.get('config') or {}).get('packaged', False):
                            continue

                        # Step 2(a)
                        formatter = get_formatter(destination['format'], doc)

                        if not formatter:  # if formatter not found then record it
                            no_formatters.append(destination['format'])
                            continue

                        formatted_docs = formatter.format(apply_schema(doc),
                                                          subscriber,
                                                          subscriber_codes.get(subscriber[config.ID_FIELD]))

                        for idx, publish_data in enumerate(formatted_docs):
                            if not isinstance(publish_data, dict):
                                pub_seq_num, formatted_doc = publish_data
                                formatted_docs[idx] = {'published_seq_num': pub_seq_num,
                                                       'formatted_item': formatted_doc}
                            else:
                                assert 'published_seq_num' in publish_data and 'formatted_item' in publish_data,\
                                    "missing keys in publish_data"

                        for publish_queue_item in formatted_docs:
                            publish_queue_item['item_id'] = doc['item_id']
                            publish_queue_item['item_version'] = doc[config.VERSION]
                            publish_queue_item['subscriber_id'] = subscriber[config.ID_FIELD]
                            publish_queue_item['codes'] = subscriber_codes.get(subscriber[config.ID_FIELD])
                            publish_queue_item['destination'] = destination
                            # publish_schedule is just to indicate in the queue item is create via scheduled item
                            publish_queue_item[PUBLISH_SCHEDULE] = get_utc_schedule(doc, PUBLISH_SCHEDULE) or None
                            publish_queue_item['unique_name'] = doc.get('unique_name', None)
                            publish_queue_item['content_type'] = doc.get('type', None)
                            publish_queue_item['headline'] = doc.get('headline', None)
                            publish_queue_item['publishing_action'] = self.published_state
                            publish_queue_item['ingest_provider'] = \
                                ObjectId(doc.get('ingest_provider')) if doc.get('ingest_provider') else None
                            publish_queue_item['associated_items'] = associations.get(subscriber[config.ID_FIELD], [])

                            if doc.get(PUBLISHED_IN_PACKAGE):
                                publish_queue_item[PUBLISHED_IN_PACKAGE] = doc[PUBLISHED_IN_PACKAGE]
                            try:
                                encoded_item = publish_queue_item.pop('encoded_item')
                            except KeyError:
                                pass
                            else:
                                binary = io.BytesIO(encoded_item)
                                publish_queue_item['encoded_item_id'] = app.storage.put(binary)
                            publish_queue_item.pop(ITEM_STATE, None)

                            # content api delivery will be marked as SUCCESS in queue
                            get_resource_service('publish_queue').post([publish_queue_item])

                            queued = True
                except Exception:
                    logger.exception("Failed to queue item for id {} with headline {} for subscriber {}."
                                     .format(doc.get(config.ID_FIELD), doc.get('headline'), subscriber.get('name')))

            return no_formatters, queued
        except Exception:
            raise
    def _transform_to_ninjs(self, article, subscriber, recursive=True):
        ninjs = {
            "guid": article.get(GUID_FIELD, article.get("uri")),
            "version": str(article.get(config.VERSION, 1)),
            "type": self._get_type(article),
        }

        if article.get("byline"):
            ninjs["byline"] = article["byline"]

        located = article.get("dateline", {}).get("located", {})
        if located:
            ninjs["located"] = located.get("city", "")

        for copy_property in self.direct_copy_properties:
            if article.get(copy_property) is not None:
                ninjs[copy_property] = article[copy_property]

        if "body_text" not in article and "alt_text" in article:
            ninjs["body_text"] = article["alt_text"]

        if "title" in article:
            ninjs["headline"] = article["title"]

        if article.get("body_html"):
            ninjs["body_html"] = self.append_body_footer(article)

        if article.get("description"):
            ninjs["description_html"] = self.append_body_footer(article)

        if article.get("place"):
            ninjs["place"] = self._format_place(article)

        if article.get("profile"):
            ninjs["profile"] = self._format_profile(article["profile"])

        extra_items = None
        if recursive:
            if article[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                ninjs[ASSOCIATIONS] = self._get_associations(
                    article, subscriber)
                if article.get(ASSOCIATIONS):
                    associations, extra_items = self._format_related(
                        article, subscriber)
                    ninjs[ASSOCIATIONS].update(associations)
            elif article.get(ASSOCIATIONS):
                ninjs[ASSOCIATIONS], extra_items = self._format_related(
                    article, subscriber)
        elif article.get(ASSOCIATIONS) and recursive:
            ninjs[ASSOCIATIONS], extra_items = self._format_related(
                article, subscriber)
        if extra_items:
            ninjs.setdefault(EXTRA_ITEMS, {}).update(extra_items)

        if article.get("embargoed"):
            ninjs["embargoed"] = article["embargoed"].isoformat()

        if article.get(
                EMBARGO):  # embargo set in superdesk overrides ingested one
            ninjs["embargoed"] = get_utc_schedule(article, EMBARGO).isoformat()

        if article.get("priority"):
            ninjs["priority"] = article["priority"]
        else:
            ninjs["priority"] = 5

        if article.get("subject"):
            ninjs["subject"] = self._get_subject(article)

        if article.get("anpa_category"):
            ninjs["service"] = self._get_service(article)
        if article.get("renditions"):
            ninjs["renditions"] = self._get_renditions(article)
        elif "url" in article:
            ninjs["renditions"] = self._generate_renditions(article)

        if "order" in article:
            ninjs["order"] = article["order"]

        # SDPA-317
        if "abstract" in article:
            abstract = article.get("abstract", "")
            ninjs["description_html"] = abstract
            ninjs["description_text"] = text_utils.get_text(abstract)
        elif article.get("description_text"):
            ninjs["description_text"] = article.get("description_text")

        if article.get("company_codes"):
            ninjs["organisation"] = [{
                "name":
                c.get("name", ""),
                "rel":
                "Securities Identifier",
                "symbols": [{
                    "ticker": c.get("qcode", ""),
                    "exchange": c.get("security_exchange", "")
                }],
            } for c in article["company_codes"]]
        elif "company" in article:
            ninjs["organisation"] = [{"name": article["company"]}]

        if article.get("rewrite_of"):
            ninjs["evolvedfrom"] = article["rewrite_of"]

        if not ninjs.get("copyrightholder") and not ninjs.get(
                "copyrightnotice") and not ninjs.get("usageterms"):
            ninjs.update(
                superdesk.get_resource_service("vocabularies").get_rightsinfo(
                    article))

        if article.get("genre"):
            ninjs["genre"] = self._get_genre(article)

        if article.get("flags", {}).get("marked_for_legal"):
            ninjs["signal"] = self._format_signal_cwarn()

        if article.get("signal"):
            ninjs.setdefault("signal", []).extend(
                [self._format_signal(signal) for signal in article["signal"]])

        if article.get("attachments"):
            ninjs["attachments"] = self._format_attachments(article)

        if ninjs["type"] == CONTENT_TYPE.TEXT and ("body_html" in ninjs
                                                   or "body_text" in ninjs):
            if "body_html" in ninjs:
                body_html = ninjs["body_html"]
                word_count = text_utils.get_word_count(body_html)
                char_count = text_utils.get_char_count(body_html)
                readtime = text_utils.get_reading_time(body_html, word_count,
                                                       article.get("language"))
            else:
                body_text = ninjs["body_text"]
                word_count = text_utils.get_text_word_count(body_text)
                char_count = len(body_text)
                readtime = text_utils.get_reading_time(body_text, word_count,
                                                       article.get("language"))
            ninjs["charcount"] = char_count
            ninjs["wordcount"] = word_count
            ninjs["readtime"] = readtime

        if article.get("authors"):
            ninjs["authors"] = self._format_authors(article)

        if (article.get("schedule_settings")
                or {}).get("utc_publish_schedule"):
            ninjs["publish_schedule"] = article["schedule_settings"][
                "utc_publish_schedule"]

        # set description for custom embed field
        if article.get("extra"):
            ninjs["extra"] = article["extra"]
            for key, value in ninjs["extra"].items():
                if type(value) == dict and "embed" in value:
                    value.setdefault("description", "")

        return ninjs
Example #42
0
    def _transform_to_ninjs(self, article, subscriber, recursive=True):
        ninjs = {
            'guid': article.get(GUID_FIELD, article.get('uri')),
            'version': str(article.get(config.VERSION, 1)),
            'type': self._get_type(article)
        }

        if article.get('byline'):
            ninjs['byline'] = article['byline']

        located = article.get('dateline', {}).get('located', {})
        if located:
            ninjs['located'] = located.get('city', '')

        for copy_property in self.direct_copy_properties:
            if article.get(copy_property) is not None:
                ninjs[copy_property] = article[copy_property]

        if 'body_text' not in article and 'alt_text' in article:
            ninjs['body_text'] = article['alt_text']

        if 'title' in article:
            ninjs['headline'] = article['title']

        if article.get('body_html'):
            ninjs['body_html'] = self.append_body_footer(article)

        if article.get('description'):
            ninjs['description_html'] = self.append_body_footer(article)

        if article.get('place'):
            ninjs['place'] = self._format_place(article)

        if article.get('profile'):
            ninjs['profile'] = self._format_profile(article['profile'])

        if recursive:
            if article[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                ninjs[ASSOCIATIONS] = self._get_associations(
                    article, subscriber)
                if article.get(ASSOCIATIONS):
                    ninjs[ASSOCIATIONS].update(
                        self._format_related(article, subscriber))
            elif article.get(ASSOCIATIONS):
                ninjs[ASSOCIATIONS] = self._format_related(article, subscriber)
        elif article.get(ASSOCIATIONS):
            ninjs[ASSOCIATIONS] = self._format_related(article, subscriber)

        if article.get(EMBARGO):
            ninjs['embargoed'] = get_utc_schedule(article, EMBARGO).isoformat()

        if article.get('priority'):
            ninjs['priority'] = article['priority']
        else:
            ninjs['priority'] = 5

        if article.get('subject'):
            ninjs['subject'] = self._get_subject(article)

        if article.get('anpa_category'):
            ninjs['service'] = self._get_service(article)
        if article.get('renditions'):
            ninjs['renditions'] = self._get_renditions(article)
        elif 'url' in article:
            ninjs['renditions'] = self._generate_renditions(article)

        # SDPA-317
        if 'abstract' in article:
            abstract = article.get('abstract', '')
            ninjs['description_html'] = abstract
            ninjs['description_text'] = text_utils.get_text(abstract)
        elif article.get('description_text'):
            ninjs['description_text'] = article.get('description_text')

        if article.get('company_codes'):
            ninjs['organisation'] = [{
                'name':
                c.get('name', ''),
                'rel':
                'Securities Identifier',
                'symbols': [{
                    'ticker': c.get('qcode', ''),
                    'exchange': c.get('security_exchange', '')
                }]
            } for c in article['company_codes']]
        elif 'company' in article:
            ninjs['organisation'] = [{'name': article['company']}]

        if article.get('rewrite_of'):
            ninjs['evolvedfrom'] = article['rewrite_of']

        if not ninjs.get('copyrightholder') and not ninjs.get(
                'copyrightnotice') and not ninjs.get('usageterms'):
            ninjs.update(
                superdesk.get_resource_service('vocabularies').get_rightsinfo(
                    article))

        if 'genre' in article:
            ninjs['genre'] = self._get_genre(article)

        if article.get('flags', {}).get('marked_for_legal'):
            ninjs['signal'] = self._format_signal_cwarn()

        if article.get('attachments'):
            ninjs['attachments'] = self._format_attachments(article)

        if ninjs['type'] == CONTENT_TYPE.TEXT and ('body_html' in ninjs
                                                   or 'body_text' in ninjs):
            if 'body_html' in ninjs:
                body_html = ninjs['body_html']
                word_count = text_utils.get_word_count(body_html)
                char_count = text_utils.get_char_count(body_html)
                readtime = text_utils.get_reading_time(body_html, word_count,
                                                       article.get('language'))
            else:
                body_text = ninjs['body_text']
                word_count = text_utils.get_text_word_count(body_text)
                char_count = len(body_text)
                readtime = text_utils.get_reading_time(body_text, word_count,
                                                       article.get('language'))
            ninjs['charcount'] = char_count
            ninjs['wordcount'] = word_count
            ninjs['readtime'] = readtime

        if article.get('authors'):
            ninjs['authors'] = self._format_authors(article)

        return ninjs
    def _transform_to_ninjs(self, article, subscriber, recursive=True):
        ninjs = {
            "guid": article.get(GUID_FIELD, article.get("uri")),
            "version": str(article.get(config.VERSION, 1)),
            "type": self._get_type(article),
        }

        try:
            ninjs["byline"] = self._get_byline(article)
        except:
            pass

        located = article.get("dateline", {}).get("located", {})
        if located:
            ninjs["located"] = located.get("city", "")

        for copy_property in self.direct_copy_properties:
            if article.get(copy_property) is not None:
                ninjs[copy_property] = article[copy_property]

        if "body_text" not in article and "alt_text" in article:
            ninjs["body_text"] = article["alt_text"]

        if "title" in article:
            ninjs["headline"] = article["title"]

        if article.get("body_html"):
            ninjs["body_html"] = self.append_body_footer(article)

        if article.get("description"):
            ninjs["description_html"] = self.append_body_footer(article)

        if recursive:
            if article[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                ninjs["associations"] = self._get_associations(article, subscriber)
                if "associations" in article:
                    ninjs["associations"].update(self._format_related(article, subscriber))
            elif article.get("associations", {}):
                ninjs["associations"] = self._format_related(article, subscriber)
        elif article.get("associations"):
            ninjs["associations"] = self._format_related(article, subscriber)

        if article.get(EMBARGO):
            ninjs["embargoed"] = get_utc_schedule(article, EMBARGO).isoformat()

        if article.get("priority"):
            ninjs["priority"] = article["priority"]
        else:
            ninjs["priority"] = 5

        if article.get("subject"):
            ninjs["subject"] = self._get_subject(article)

        if article.get("anpa_category"):
            ninjs["service"] = self._get_service(article)
        if article.get("renditions"):
            ninjs["renditions"] = self._get_renditions(article)
        elif "url" in article:
            ninjs["renditions"] = self._generate_renditions(article)

        # SDPA-317
        if article.get("abstract"):
            abstract = article.get("abstract")
            ninjs["description_html"] = abstract
            soup = BeautifulSoup(abstract, "html.parser")
            ninjs["description_text"] = soup.get_text()
        elif article.get("description_text"):
            ninjs["description_text"] = article.get("description_text")

        if article.get("company_codes"):
            ninjs["organisation"] = [
                {
                    "name": c.get("name", ""),
                    "rel": "Securities Identifier",
                    "symbols": [{"ticker": c.get("qcode", ""), "exchange": c.get("security_exchange", "")}],
                }
                for c in article["company_codes"]
            ]
        elif "company" in article:
            ninjs["organisation"] = [{"name": article["company"]}]

        return ninjs
    def _transform_to_ninjs(self, article, subscriber, recursive=True):
        ninjs = {
            'guid': article.get(GUID_FIELD, article.get('uri')),
            'version': str(article.get(config.VERSION, 1)),
            'type': self._get_type(article)
        }

        if article.get('byline'):
            ninjs['byline'] = article['byline']

        located = article.get('dateline', {}).get('located', {})
        if located:
            ninjs['located'] = located.get('city', '')

        for copy_property in self.direct_copy_properties:
            if article.get(copy_property) is not None:
                ninjs[copy_property] = article[copy_property]

        if 'body_text' not in article and 'alt_text' in article:
            ninjs['body_text'] = article['alt_text']

        if 'title' in article:
            ninjs['headline'] = article['title']

        if article.get('body_html'):
            ninjs['body_html'] = self.append_body_footer(article)

        if article.get('description'):
            ninjs['description_html'] = self.append_body_footer(article)

        if article.get('place'):
            ninjs['place'] = self._format_place(article)

        if article.get('profile'):
            ninjs['profile'] = self._format_profile(article['profile'])

        extra_items = None
        if recursive:
            if article[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                ninjs[ASSOCIATIONS] = self._get_associations(article, subscriber)
                if article.get(ASSOCIATIONS):
                    associations, extra_items = self._format_related(article, subscriber)
                    ninjs[ASSOCIATIONS].update(associations)
            elif article.get(ASSOCIATIONS):
                ninjs[ASSOCIATIONS], extra_items = self._format_related(article, subscriber)
        elif article.get(ASSOCIATIONS):
            ninjs[ASSOCIATIONS], extra_items = self._format_related(article, subscriber)
        if extra_items:
            ninjs.setdefault(EXTRA_ITEMS, {}).update(extra_items)

        if article.get(EMBARGO):
            ninjs['embargoed'] = get_utc_schedule(article, EMBARGO).isoformat()

        if article.get('priority'):
            ninjs['priority'] = article['priority']
        else:
            ninjs['priority'] = 5

        if article.get('subject'):
            ninjs['subject'] = self._get_subject(article)

        if article.get('anpa_category'):
            ninjs['service'] = self._get_service(article)
        if article.get('renditions'):
            ninjs['renditions'] = self._get_renditions(article)
        elif 'url' in article:
            ninjs['renditions'] = self._generate_renditions(article)

        # SDPA-317
        if 'abstract' in article:
            abstract = article.get('abstract', '')
            ninjs['description_html'] = abstract
            ninjs['description_text'] = text_utils.get_text(abstract)
        elif article.get('description_text'):
            ninjs['description_text'] = article.get('description_text')

        if article.get('company_codes'):
            ninjs['organisation'] = [{'name': c.get('name', ''), 'rel': 'Securities Identifier',
                                      'symbols': [{'ticker': c.get('qcode', ''),
                                                   'exchange': c.get('security_exchange', '')}]}
                                     for c in article['company_codes']]
        elif 'company' in article:
            ninjs['organisation'] = [{'name': article['company']}]

        if article.get('rewrite_of'):
            ninjs['evolvedfrom'] = article['rewrite_of']

        if not ninjs.get('copyrightholder') and not ninjs.get('copyrightnotice') and not ninjs.get('usageterms'):
            ninjs.update(superdesk.get_resource_service('vocabularies').get_rightsinfo(article))

        if 'genre' in article:
            ninjs['genre'] = self._get_genre(article)

        if article.get('flags', {}).get('marked_for_legal'):
            ninjs['signal'] = self._format_signal_cwarn()

        if article.get('attachments'):
            ninjs['attachments'] = self._format_attachments(article)

        if ninjs['type'] == CONTENT_TYPE.TEXT and ('body_html' in ninjs or 'body_text' in ninjs):
            if 'body_html' in ninjs:
                body_html = ninjs['body_html']
                word_count = text_utils.get_word_count(body_html)
                char_count = text_utils.get_char_count(body_html)
                readtime = text_utils.get_reading_time(body_html, word_count, article.get('language'))
            else:
                body_text = ninjs['body_text']
                word_count = text_utils.get_text_word_count(body_text)
                char_count = len(body_text)
                readtime = text_utils.get_reading_time(body_text, word_count, article.get('language'))
            ninjs['charcount'] = char_count
            ninjs['wordcount'] = word_count
            ninjs['readtime'] = readtime

        if article.get('authors'):
            ninjs['authors'] = self._format_authors(article)

        return ninjs
Example #45
0
    def _transform_to_ninjs(self, article, subscriber, recursive=True):
        ninjs = {
            'guid': article.get(GUID_FIELD, article.get('uri')),
            'version': str(article.get(config.VERSION, 1)),
            'type': self._get_type(article)
        }

        try:
            ninjs['byline'] = self._get_byline(article)
        except:
            pass

        located = article.get('dateline', {}).get('located', {})
        if located:
            ninjs['located'] = located.get('city', '')

        for copy_property in self.direct_copy_properties:
            if article.get(copy_property) is not None:
                ninjs[copy_property] = article[copy_property]

        if 'body_text' not in article and 'alt_text' in article:
            ninjs['body_text'] = article['alt_text']

        if 'title' in article:
            ninjs['headline'] = article['title']

        if article.get('body_html'):
            ninjs['body_html'] = self.append_body_footer(article)

        if article.get('description'):
            ninjs['description_html'] = self.append_body_footer(article)

        if recursive:
            if article[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                ninjs['associations'] = self._get_associations(article, subscriber)
                if 'associations' in article:
                    ninjs['associations'].update(self._format_related(article, subscriber))
            elif article.get('associations', {}):
                ninjs['associations'] = self._format_related(article, subscriber)
        elif article.get('associations'):
            ninjs['associations'] = self._format_related(article, subscriber)

        if article.get(EMBARGO):
            ninjs['embargoed'] = get_utc_schedule(article, EMBARGO).isoformat()

        if article.get('priority'):
            ninjs['priority'] = article['priority']
        else:
            ninjs['priority'] = 5

        if article.get('subject'):
            ninjs['subject'] = self._get_subject(article)

        if article.get('anpa_category'):
            ninjs['service'] = self._get_service(article)
        if article.get('renditions'):
            ninjs['renditions'] = self._get_renditions(article)
        elif 'url' in article:
            ninjs['renditions'] = self._generate_renditions(article)

        # SDPA-317
        if article.get('abstract'):
            abstract = article.get('abstract')
            ninjs['description_html'] = abstract
            soup = BeautifulSoup(abstract, 'html.parser')
            ninjs['description_text'] = soup.get_text()
        elif article.get('description_text'):
            ninjs['description_text'] = article.get('description_text')

        if article.get('company_codes'):
            ninjs['organisation'] = [{'name': c.get('name', ''), 'rel': 'Securities Identifier',
                                      'symbols': [{'ticker': c.get('qcode', ''),
                                                   'exchange': c.get('security_exchange', '')}]}
                                     for c in article['company_codes']]
        elif 'company' in article:
            ninjs['organisation'] = [{'name': article['company']}]

        return ninjs
    def format(self, article, subscriber):
        """
        Constructs a dictionary that represents the parameters passed to the IPNews InsertNews stored procedure
        :return: returns the sequence number of the subscriber and the constructed parameter dictionary
        """
        try:
            docs = []
            for category in article.get('anpa_category'):
                pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
                odbc_item = {'originator': article.get('source', None), 'sequence': pub_seq_num,
                             'category': category.get('qcode'),
                             'headline': article.get('headline', '').replace('\'', '\'\''),
                             'author': article.get('byline', '').replace('\'', '\'\''),
                             'keyword': self.append_legal(article=article, truncate=True).replace('\'', '\'\''),
                             'subject_reference': set_subject(category, article)}

                if 'subject_reference' in odbc_item and odbc_item['subject_reference'] is not None \
                        and odbc_item['subject_reference'] != '00000000':
                    odbc_item['subject'] = subject_codes[odbc_item['subject_reference'][:2] + '000000']
                    if odbc_item['subject_reference'][2:5] != '000':
                        odbc_item['subject_matter'] = subject_codes[odbc_item['subject_reference'][:5] + '000']
                    else:
                        odbc_item['subject_matter'] = ''
                    if not odbc_item['subject_reference'].endswith('000'):
                        odbc_item['subject_detail'] = subject_codes[odbc_item['subject_reference']]
                    else:
                        odbc_item['subject_detail'] = ''
                else:
                    odbc_item['subject_reference'] = '00000000'

                odbc_item['take_key'] = article.get('anpa_take_key', '').replace('\'', '\'\'')  # @take_key
                odbc_item['usn'] = article.get('unique_id', None)  # @usn
                if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED:  # @article_text
                    odbc_item['article_text'] = self.append_body_footer(article).replace('\'', '\'\'')
                elif article[ITEM_TYPE] == CONTENT_TYPE.TEXT:
                    soup = BeautifulSoup(self.append_body_footer(article), "html.parser")
                    text = StringIO()
                    for p in soup.findAll('p'):
                        text.write('\x19\r\n')
                        ptext = p.get_text('\n')
                        for l in ptext.split('\n'):
                            if len(l) > 80:
                                text.write(textwrap.fill(l, 80).replace('\n', ' \r\n'))
                            else:
                                text.write(l + ' \r\n')
                    odbc_item['article_text'] = text.getvalue().replace('\'', '\'\'')

                if 'genre' in article and len(article['genre']) >= 1:
                    odbc_item['genre'] = article['genre'][0].get('name', None)
                else:
                    odbc_item['genre'] = 'Current'  # @genre
                if article.get(ITEM_TYPE, CONTENT_TYPE.TEXT) == CONTENT_TYPE.TEXT:
                    odbc_item['texttab'] = 'x'
                elif article.get(ITEM_TYPE, None) == CONTENT_TYPE.PREFORMATTED:
                    odbc_item['texttab'] = 't'
                odbc_item['wordcount'] = article.get('word_count', None)  # @wordcount
                odbc_item['news_item_type'] = 'News'
                odbc_item['priority'] = map_priority(article.get('priority'))  # @priority
                odbc_item['service_level'] = 'a'  # @service_level
                odbc_item['fullStory'] = 1
                odbc_item['ident'] = '0'  # @ident

                SelectorcodeMapper().map(article, category.get('qcode').upper(),
                                         subscriber=subscriber,
                                         formatted_item=odbc_item)
                headline_prefix = LocatorMapper().map(article, category.get('qcode').upper())
                if headline_prefix:
                    odbc_item['headline'] = '{}:{}'.format(headline_prefix, odbc_item['headline'])

                if article.get(EMBARGO):
                    embargo = '{}{}'.format('Embargo Content. Timestamp: ',
                                            get_utc_schedule(article, EMBARGO).isoformat())
                    odbc_item['article_text'] = embargo + odbc_item['article_text']

                docs.append((pub_seq_num, json.dumps(odbc_item)))

            return docs
        except Exception as ex:
            raise FormatterError.AAPIpNewsFormatterError(ex, subscriber)
Example #47
0
    def _validate_associated_items(self, original_item, updates=None, validation_errors=None):
        """Validates associated items.

        This function will ensure that the unpublished content validates and none of
        the content is locked, also do not allow any killed or recalled or spiked content.

        :param package:
        :param validation_errors: validation errors are appended if there are any.
        """

        if validation_errors is None:
            validation_errors = []

        if updates is None:
            updates = {}

        # merge associations
        associations = deepcopy(original_item.get(ASSOCIATIONS, {}))
        associations.update(updates.get(ASSOCIATIONS, {}))

        items = [value for value in associations.values()]
        if original_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and self.publish_type == ITEM_PUBLISH:
            items.extend(self.package_service.get_residrefs(original_item))

        main_publish_schedule = get_utc_schedule(updates, PUBLISH_SCHEDULE) or get_utc_schedule(
            original_item, PUBLISH_SCHEDULE
        )

        for item in items:
            orig = None
            if type(item) == dict and item.get(config.ID_FIELD):
                doc = item
                orig = super().find_one(req=None, _id=item[config.ID_FIELD])
                if not app.settings.get("COPY_METADATA_FROM_PARENT") and orig:
                    doc = orig
                try:
                    doc.update({"lock_user": orig["lock_user"]})
                except (TypeError, KeyError):
                    pass
            elif item:
                doc = super().find_one(req=None, _id=item)
            else:
                continue

            if not doc:
                continue

            if not orig:
                orig = doc.copy()

            if original_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                self._validate_associated_items(doc, validation_errors=validation_errors)

            # make sure no items are killed or recalled or spiked
            # using the latest version of the item from archive
            doc_item_state = orig.get(ITEM_STATE, CONTENT_STATE.PUBLISHED)
            if (
                doc_item_state
                in {
                    CONTENT_STATE.KILLED,
                    CONTENT_STATE.RECALLED,
                    CONTENT_STATE.SPIKED,
                }
                or (doc_item_state == CONTENT_STATE.SCHEDULED and main_publish_schedule is None)
            ):
                validation_errors.append(_("Item cannot contain associated {state} item.").format(state=doc_item_state))
            elif doc_item_state == CONTENT_STATE.SCHEDULED:
                item_schedule = get_utc_schedule(orig, PUBLISH_SCHEDULE)
                if main_publish_schedule < item_schedule:
                    validation_errors.append(_("Associated item is scheduled later than current item."))

            if doc.get(EMBARGO):
                validation_errors.append(_("Item cannot have associated items with Embargo"))

            # don't validate items that already have published
            if doc_item_state not in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]:
                validate_item = {"act": self.publish_type, "type": doc[ITEM_TYPE], "validate": doc}
                if type(item) == dict:
                    validate_item["embedded"] = True
                errors = get_resource_service("validate").post([validate_item], headline=True, fields=True)[0]
                if errors[0]:
                    pre_errors = [
                        _("Associated item {name} {error}").format(name=doc.get("slugline", ""), error=error)
                        for error in errors[0]
                    ]
                    validation_errors.extend(pre_errors)

            if config.PUBLISH_ASSOCIATED_ITEMS:
                # check the locks on the items
                if doc.get("lock_user"):
                    if original_item["lock_user"] != doc["lock_user"]:
                        validation_errors.extend(
                            [
                                "{}: {}".format(
                                    doc.get("headline", doc["_id"]), _("packaged item is locked by another user")
                                )
                            ]
                        )
                    elif original_item["lock_user"] == doc["lock_user"]:
                        validation_errors.extend(
                            [
                                "{}: {}".format(
                                    doc.get("headline", doc["_id"]),
                                    _("packaged item is locked by you. Unlock it and try again"),
                                )
                            ]
                        )
Example #48
0
    def format(self, article, subscriber):
        """
        Constructs a dictionary that represents the parameters passed to the IPNews InsertNews stored procedure
        :return: returns the sequence number of the subscriber and the constructed parameter dictionary
        """
        try:
            docs = []
            for category in article.get('anpa_category'):
                pub_seq_num = superdesk.get_resource_service(
                    'subscribers').generate_sequence_number(subscriber)
                odbc_item = {
                    'originator':
                    article.get('source', None),
                    'sequence':
                    pub_seq_num,
                    'category':
                    category.get('qcode'),
                    'headline':
                    article.get('headline', '').replace('\'', '\'\''),
                    'author':
                    article.get('byline', '').replace('\'', '\'\''),
                    'keyword':
                    self.append_legal(article=article,
                                      truncate=True).replace('\'', '\'\''),
                    'subject_reference':
                    set_subject(category, article)
                }

                if 'subject_reference' in odbc_item and odbc_item['subject_reference'] is not None \
                        and odbc_item['subject_reference'] != '00000000':
                    odbc_item['subject'] = subject_codes[
                        odbc_item['subject_reference'][:2] + '000000']
                    if odbc_item['subject_reference'][2:5] != '000':
                        odbc_item['subject_matter'] = subject_codes[
                            odbc_item['subject_reference'][:5] + '000']
                    else:
                        odbc_item['subject_matter'] = ''
                    if not odbc_item['subject_reference'].endswith('000'):
                        odbc_item['subject_detail'] = subject_codes[
                            odbc_item['subject_reference']]
                    else:
                        odbc_item['subject_detail'] = ''
                else:
                    odbc_item['subject_reference'] = '00000000'

                odbc_item['take_key'] = article.get(
                    'anpa_take_key', '').replace('\'', '\'\'')  # @take_key
                odbc_item['usn'] = article.get('unique_id', None)  # @usn
                if article[
                        ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED:  # @article_text
                    odbc_item['article_text'] = self.append_body_footer(
                        article).replace('\'', '\'\'')
                elif article[ITEM_TYPE] == CONTENT_TYPE.TEXT:
                    soup = BeautifulSoup(self.append_body_footer(article),
                                         "html.parser")
                    text = StringIO()
                    for p in soup.findAll('p'):
                        text.write('\x19\r\n')
                        ptext = p.get_text('\n')
                        for l in ptext.split('\n'):
                            if len(l) > 80:
                                text.write(
                                    textwrap.fill(l,
                                                  80).replace('\n', ' \r\n'))
                            else:
                                text.write(l + ' \r\n')
                    odbc_item['article_text'] = text.getvalue().replace(
                        '\'', '\'\'')

                if 'genre' in article and len(article['genre']) >= 1:
                    odbc_item['genre'] = article['genre'][0].get('name', None)
                else:
                    odbc_item['genre'] = 'Current'  # @genre
                if article.get(ITEM_TYPE,
                               CONTENT_TYPE.TEXT) == CONTENT_TYPE.TEXT:
                    odbc_item['texttab'] = 'x'
                elif article.get(ITEM_TYPE, None) == CONTENT_TYPE.PREFORMATTED:
                    odbc_item['texttab'] = 't'
                odbc_item['wordcount'] = article.get('word_count',
                                                     None)  # @wordcount
                odbc_item['news_item_type'] = 'News'
                odbc_item['priority'] = map_priority(
                    article.get('priority'))  # @priority
                odbc_item['service_level'] = 'a'  # @service_level
                odbc_item['fullStory'] = 1
                odbc_item['ident'] = '0'  # @ident

                SelectorcodeMapper().map(article,
                                         category.get('qcode').upper(),
                                         subscriber=subscriber,
                                         formatted_item=odbc_item)
                headline_prefix = LocatorMapper().map(
                    article,
                    category.get('qcode').upper())
                if headline_prefix:
                    odbc_item['headline'] = '{}:{}'.format(
                        headline_prefix, odbc_item['headline'])

                if article.get(EMBARGO):
                    embargo = '{}{}'.format(
                        'Embargo Content. Timestamp: ',
                        get_utc_schedule(article, EMBARGO).isoformat())
                    odbc_item[
                        'article_text'] = embargo + odbc_item['article_text']

                docs.append((pub_seq_num, json.dumps(odbc_item)))

            return docs
        except Exception as ex:
            raise FormatterError.AAPIpNewsFormatterError(ex, subscriber)