def format(self, article, subscriber, codes=None):
        """
        Create article in NewsML G2 format
        :param dict article:
        :param dict subscriber:
        :param list codes: selector codes
        :return [(int, str)]: return a List of tuples. A tuple consist of
            publish sequence number and formatted article string.
        :raises FormatterError: if the formatter fails to format an article
        """
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
            is_package = self._is_package(article)
            self._message_attrib.update(self._debug_message_extra)
            news_message = etree.Element('newsMessage', attrib=self._message_attrib)
            self._format_header(article, news_message, pub_seq_num)
            item_set = self._format_item(news_message)
            if is_package:
                item = self._format_item_set(article, item_set, 'packageItem')
                self._format_groupset(article, item)
            elif article[ITEM_TYPE] in {CONTENT_TYPE.PICTURE, CONTENT_TYPE.AUDIO, CONTENT_TYPE.VIDEO}:
                item = self._format_item_set(article, item_set, 'newsItem')
                self._format_contentset(article, item)
            else:
                nitfFormater = NITFFormatter()
                nitf = nitfFormater.get_nitf(article, subscriber, pub_seq_num)
                newsItem = self._format_item_set(article, item_set, 'newsItem')
                self._format_content(article, newsItem, nitf)

            return [(pub_seq_num, self.XML_ROOT + etree.tostring(news_message).decode('utf-8'))]
        except Exception as ex:
            raise FormatterError.newmsmlG2FormatterError(ex, subscriber)
Example #2
0
    def format(self, article, subscriber, codes=None):
        """
        Create article in Belga NewsML 1.2 format

        :param dict article:
        :param dict subscriber:
        :param list codes:
        :return [(int, str)]: return a List of tuples. A tuple consist of
            publish sequence number and formatted article string.
        :raises FormatterError: if the formatter fails to format an article
        """

        try:
            pub_seq_num = superdesk.get_resource_service(
                'subscribers').generate_sequence_number(subscriber)
            self._newsml = etree.Element('NewsML')
            self._article = article
            self._now = utcnow()
            self._string_now = self._now.strftime(self.DATETIME_FORMAT)
            # SD does not have the same structure, there are no packages,
            # but to cover old belga's news ml 1.2 output, this value will be used:
            self._package_duid = 'pkg_{}'.format(self._article[GUID_FIELD])

            self._format_catalog()
            self._format_newsenvelope()
            self._format_newsitem()

            xml_string = self.XML_ROOT + '\n' + etree.tostring(
                self._newsml, pretty_print=True).decode('utf-8')

            return [(pub_seq_num, xml_string)]
        except Exception as ex:
            raise FormatterError.newml12FormatterError(ex, subscriber)
Example #3
0
    def format(self, article, subscriber, codes=None):
        try:
            formatted_doc = {}
            formatted_doc['headline'] = get_text(article.get('headline', ''),
                                                 content='html')
            formatted_doc['headline'] = formatted_doc['headline'].replace(
                '\'', '\'\'').replace('\xA0', ' ')
            formatted_doc['keyword'] = article.get('slugline',
                                                   '').replace('\'', '\'\'')

            # body formatting
            if article.get(FORMAT) == FORMATS.PRESERVED:
                body = get_text(self.append_body_footer(article),
                                content='html')
                formatted_doc['article_text'] = body.replace('\'', '\'\'')
            elif article.get(FORMAT, FORMATS.HTML) == FORMATS.HTML:
                body = self.get_wrapped_text_content(
                    to_ascii(self.append_body_footer(article))).replace(
                        '\'', '\'\'')
                formatted_doc['article_text'] = body

            self.refine_article_body(formatted_doc, article)

            # Frame the text output according to AAP requirement
            formatted_output = 'KEYWORD: ' + formatted_doc.get('keyword',
                                                               '') + '\r\n'
            formatted_output += 'HEADLINE: ' + formatted_doc.get(
                'headline', '') + '\r\n'
            formatted_output += '   ' + formatted_doc.get('article_text', '')

            return [(0, json.dumps({'article_text': formatted_output}))]
        except Exception as ex:
            raise FormatterError.AAPTextFormatterError(ex, subscriber)
Example #4
0
    def format(self, article, subscriber, codes=None):
        """
        Create article in NewsML1.2 format

        :param dict article:
        :param dict subscriber:
        :param list codes:
        :return [(int, str)]: return a List of tuples. A tuple consist of
            publish sequence number and formatted article string.
        :raises FormatterError: if the formatter fails to format an article
        """
        try:
            pub_seq_num = superdesk.get_resource_service(
                "subscribers").generate_sequence_number(subscriber)
            self.now = utcnow()
            self.string_now = self.now.strftime("%Y%m%dT%H%M%S+0000")

            newsml = etree.Element("NewsML")
            SubElement(
                newsml, "Catalog", {
                    "Href":
                    "http://www.iptc.org/std/catalog/catalog.IptcMasterCatalog.xml"
                })
            news_envelope = SubElement(newsml, "NewsEnvelope")
            news_item = SubElement(newsml, "NewsItem")

            self._format_news_envelope(article, news_envelope, pub_seq_num)
            self._format_identification(article, news_item)
            self._format_news_management(article, news_item)
            self._format_news_component(article, news_item)

            return [(pub_seq_num, self.XML_ROOT + etree.tostring(
                newsml, encoding=self.ENCODING).decode(self.ENCODING))]
        except Exception as ex:
            raise FormatterError.newml12FormatterError(ex, subscriber)
    def format(self, article, subscriber):
        """
        Create article in NewsML1.2 format
        :param dict article:
        :param dict subscriber:
        :return [(int, str)]: return a List of tuples. A tuple consist of
            publish sequence number and formatted article string.
        :raises FormatterError: if the formatter fails to format an article
        """
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)

            newsml = etree.Element("NewsML")
            SubElement(newsml, "Catalog", {'Href': 'http://www.iptc.org/std/catalog/catalog.IptcMasterCatalog.xml'})
            news_envelope = SubElement(newsml, "NewsEnvelope")
            news_item = SubElement(newsml, "NewsItem")

            self._format_news_envelope(article, news_envelope, pub_seq_num)
            self._format_identification(article, news_item)
            self._format_news_management(article, news_item)
            self._format_news_component(article, news_item)

            return [(pub_seq_num, self.XML_ROOT + etree.tostring(newsml).decode('utf-8'))]
        except Exception as ex:
            raise FormatterError.newml12FormatterError(ex, subscriber)
    def format(self, article, subscriber):
        """
        Constructs a dictionary that represents the parameters passed to the SMS InsertAlerts stored procedure
        :return: returns the sequence number of the subscriber and the constructed parameter dictionary
        """
        try:
            pub_seq_num = superdesk.get_resource_service(
                'subscribers').generate_sequence_number(subscriber)

            odbc_item = {
                'Sequence': pub_seq_num,
                'Category': article.get('anpa_category', [{}])[0].get('qcode'),
                'Headline': article.get('headline', '').replace('\'', '\'\''),
                'Priority': map_priority(article.get('priority'))
            }

            if article.get(EMBARGO):
                embargo = '{}{}'.format('Embargo Content. Timestamp: ',
                                        article.get(EMBARGO).isoformat())
                article['body_html'] = embargo + article['body_html']

            if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED:
                odbc_item['StoryText'] = article.get('body_html', '').replace(
                    '\'', '\'\'')  # @article_text
            elif article[ITEM_TYPE] == CONTENT_TYPE.TEXT:
                soup = BeautifulSoup(article.get('body_html', ''))
                odbc_item['StoryText'] = soup.text.replace('\'', '\'\'')

            odbc_item['ident'] = '0'

            return [(pub_seq_num, odbc_item)]
        except Exception as ex:
            raise FormatterError.AAPSMSFormatterError(ex, subscriber)
Example #7
0
    def format(self, article, subscriber):
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)

            ninjs = {
                '_id': article['_id'],
                'version': str(article['_current_version']),
                'type': self._get_type(article)
            }
            try:
                ninjs['byline'] = self._get_byline(article)
            except:
                pass
            for copy_property in self.direct_copy_properties:
                if copy_property in article:
                    ninjs[copy_property] = article[copy_property]

            if 'description' in article:
                ninjs['description_text'] = article['description']

            if article['type'] == 'composite':
                ninjs['associations'] = self._get_associations(article)

            return [(pub_seq_num, json.dumps(ninjs, default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.ninjsFormatterError(ex, subscriber)
Example #8
0
    def format(self, article, subscriber):
        """
        Formats the article as require by the subscriber
        :param dict article: article to be formatted
        :param dict subscriber: subscriber receiving the article
        :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string
        """
        try:
            pub_seq_num = superdesk.get_resource_service(
                'subscribers').generate_sequence_number(subscriber)
            body_html = article.get('body_html', '').strip('\r\n')
            soup = BeautifulSoup(body_html)
            for br in soup.find_all('br'):
                # remove the <br> tag
                br.replace_with(' {}'.format(br.get_text()))

            for p in soup.find_all('p'):
                # replace <p> tag with two carriage return
                p.replace_with('{}\r\n\r\n'.format(p.get_text()))

            article['body_text'] = soup.get_text()

            # get the first category and derive the locator
            category = next((iter(article.get('anpa_category', []))), None)
            if category:
                locator = LocatorMapper().map(article,
                                              category.get('qcode').upper())
                if locator:
                    article['place'] = [{'qcode': locator, 'name': locator}]

            return [(pub_seq_num,
                     superdesk.json.dumps(
                         article, default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
    def format(self, article, subscriber, codes=None):
        """
        Create article in NewsML1.2 format

        :param dict article:
        :param dict subscriber:
        :param list codes:
        :return [(int, str)]: return a List of tuples. A tuple consist of
            publish sequence number and formatted article string.
        :raises FormatterError: if the formatter fails to format an article
        """
        try:
            formatted_article = deepcopy(article)
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
            self.now = utcnow()
            self.string_now = self.now.strftime('%Y%m%dT%H%M%S+0000')

            newsml = etree.Element("NewsML", {'Version': '1.2'})
            SubElement(newsml, "Catalog", {
                'Href': 'http://about.reuters.com/newsml/vocabulary/catalog-reuters-3rdParty-master_catalog.xml'})
            news_envelope = SubElement(newsml, "NewsEnvelope")
            news_item = SubElement(newsml, "NewsItem")

            self._format_news_envelope(formatted_article, news_envelope, pub_seq_num)
            self._format_identification(formatted_article, news_item)
            self._format_news_management(formatted_article, news_item)
            self._format_news_component(formatted_article, news_item)

            return [(pub_seq_num, self.XML_ROOT + etree.tostring(newsml).decode('utf-8'))]
        except Exception as ex:
            raise FormatterError.newml12FormatterError(ex, subscriber)
Example #10
0
    def format(self, article, subscriber):
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)

            ninjs = {
                '_id': article['_id'],
                'version': str(article['_current_version']),
                'type': self._get_type(article)
            }
            try:
                ninjs['byline'] = self._get_byline(article)
            except:
                pass

            located = article.get('dateline', {}).get('located', {}).get('city')
            if located:
                ninjs['located'] = article.get('dateline', {}).get('located', {}).get('city', '')

            for copy_property in self.direct_copy_properties:
                if copy_property in article:
                    ninjs[copy_property] = article[copy_property]

            if 'description' in article:
                ninjs['description_text'] = article['description']

            if article[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                ninjs['associations'] = self._get_associations(article)

            if article.get(EMBARGO):
                ninjs['embargoed'] = article.get(EMBARGO).isoformat()

            return [(pub_seq_num, json.dumps(ninjs, default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.ninjsFormatterError(ex, subscriber)
    def format(self, article, subscriber, codes=None):
        """Create article in NewsML G2 format

        :param dict article:
        :param dict subscriber:
        :param list codes: selector codes
        :return [(int, str)]: return a List of tuples. A tuple consist of
            publish sequence number and formatted article string.
        :raises FormatterError: if the formatter fails to format an article
        """
        try:
            self.subscriber = subscriber
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
            is_package = self._is_package(article)
            news_message = etree.Element('newsMessage', attrib=self._debug_message_extra, nsmap=self._message_nsmap)
            self._format_header(article, news_message, pub_seq_num)
            item_set = self._format_item(news_message)
            if is_package:
                item = self._format_item_set(article, item_set, 'packageItem')
                self._format_groupset(article, item)
            elif article[ITEM_TYPE] in {CONTENT_TYPE.PICTURE, CONTENT_TYPE.AUDIO, CONTENT_TYPE.VIDEO}:
                item = self._format_item_set(article, item_set, 'newsItem')
                self._format_contentset(article, item)
            else:
                nitfFormater = NITFFormatter()
                nitf = nitfFormater.get_nitf(article, subscriber, pub_seq_num)
                newsItem = self._format_item_set(article, item_set, 'newsItem')
                self._format_content(article, newsItem, nitf)

            sd_etree.fix_html_void_elements(news_message)
            return [(pub_seq_num, self.XML_ROOT + etree.tostring(news_message, pretty_print=True).decode('utf-8'))]
        except Exception as ex:
            raise FormatterError.newmsmlG2FormatterError(ex, subscriber)
Example #12
0
    def format(self, article, subscriber, codes=None):
        """
        Create article in NewsML1.2 format

        :param dict article:
        :param dict subscriber:
        :param list codes:
        :return [(int, str)]: return a List of tuples. A tuple consist of
            publish sequence number and formatted article string.
        :raises FormatterError: if the formatter fails to format an article
        """
        try:
            formatted_article = deepcopy(article)
            pub_seq_num = superdesk.get_resource_service(
                'subscribers').generate_sequence_number(subscriber)

            newsml = etree.Element("NewsML", {'Version': '1.2'})
            SubElement(
                newsml, "Catalog", {
                    'Href':
                    'http://about.reuters.com/newsml/vocabulary/catalog-reuters-3rdParty-master_catalog.xml'
                })
            news_envelope = SubElement(newsml, "NewsEnvelope")
            news_item = SubElement(newsml, "NewsItem")

            self._format_news_envelope(formatted_article, news_envelope,
                                       pub_seq_num)
            self._format_identification(formatted_article, news_item)
            self._format_news_management(formatted_article, news_item)
            self._format_news_component(formatted_article, news_item)

            return [(pub_seq_num,
                     self.XML_ROOT + etree.tostring(newsml).decode('utf-8'))]
        except Exception as ex:
            raise FormatterError.newml12FormatterError(ex, subscriber)
    def format(self, article, subscriber):
        """
        Formats the article as require by the subscriber
        :param dict article: article to be formatted
        :param dict subscriber: subscriber receiving the article
        :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string
        """
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
            body_html = article.get('body_html', '').strip('\r\n')
            soup = BeautifulSoup(body_html)
            for br in soup.find_all('br'):
                # remove the <br> tag
                br.replace_with(' {}'.format(br.get_text()))

            for p in soup.find_all('p'):
                # replace <p> tag with two carriage return
                p.replace_with('{}\r\n\r\n'.format(p.get_text()))

            article['body_text'] = soup.get_text()

            # get the first category and derive the locator
            category = next((iter(article.get('anpa_category', []))), None)
            if category:
                locator = LocatorMapper().map(article, category.get('qcode').upper())
                if locator:
                    article['place'] = [{'qcode': locator, 'name': locator}]

            return [(pub_seq_num, superdesk.json.dumps(article, default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
Example #14
0
 def format(self, article, subscriber, codes=None):
     formatted_article = deepcopy(article)
     pub_seq_num = superdesk.get_resource_service(
         'subscribers').generate_sequence_number(subscriber)
     doc = {}
     try:
         # If there is a dateline inject it into the body
         if formatted_article.get(
                 FORMAT) == FORMATS.HTML and formatted_article.get(
                     'dateline', {}).get('text'):
             soup = BeautifulSoup(formatted_article.get('body_html'),
                                  "html.parser")
             ptag = soup.find('p')
             if ptag is not None:
                 ptag.insert(
                     0,
                     NavigableString('{} '.format(
                         formatted_article.get('dateline').get('text'))))
                 formatted_article['body_html'] = str(soup)
             doc['message_html'] = render_template(
                 'email_article_body.html', article=formatted_article)
         else:
             doc['message_html'] = None
         doc['message_text'] = render_template('email_article_body.txt',
                                               article=formatted_article)
         doc['message_subject'] = render_template(
             'email_article_subject.txt', article=formatted_article)
     except Exception as ex:
         raise FormatterError.EmailFormatterError(ex, FormatterError)
     return [(pub_seq_num, json.dumps(doc))]
Example #15
0
    def format(self, original_article, subscriber, codes=None, encoding="us-ascii"):
        article = deepcopy(original_article)
        self._populate_metadata(article)
        global tz
        if tz is None:
            # first time this method is launched
            # we set timezone and NTB specific filter
            tz = pytz.timezone(superdesk.app.config['DEFAULT_TIMEZONE'])
        try:
            if article.get('body_html'):
                article['body_html'] = article['body_html'].replace('<br>', '<br />')
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
            nitf = self.get_nitf(article, subscriber, pub_seq_num)
            try:
                nitf.attrib['baselang'] = _get_language(article)
            except KeyError:
                pass

            encoded = etree.tostring(nitf, encoding=ENCODING, xml_declaration=False, pretty_print=True)

            return [{'published_seq_num': pub_seq_num,
                     # formatted_item can be used for preview, so we keep unicode version there
                     'formatted_item': self.XML_DECLARATION + '\n' + etree.tostring(nitf, encoding="unicode"),
                     'encoded_item': (self.XML_DECLARATION + '\n').encode(ENCODING) + encoded}]
        except Exception as ex:
            app.sentry.captureException()
            raise FormatterError.nitfFormatterError(ex, subscriber)
    def format(self, article, subscriber):
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)

            ninjs = {
                '_id': article['_id'],
                'version': str(article.get(config.VERSION, 1)),
                'type': self._get_type(article)
            }

            try:
                ninjs['byline'] = self._get_byline(article)
            except:
                pass

            located = article.get('dateline', {}).get('located', {})
            if located:
                ninjs['located'] = located.get('city', '')

            for copy_property in self.direct_copy_properties:
                if article.get(copy_property) is not None:
                    ninjs[copy_property] = article[copy_property]

            if article.get('body_html'):
                ninjs['body_html'] = self.append_body_footer(article)

            if article.get('description'):
                ninjs['description_html'] = self.append_body_footer(article)

            if article[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                ninjs['associations'] = self._get_associations(article)
            elif article.get('associations', {}):
                ninjs['associations'] = self._format_related(article, subscriber)

            if article.get(EMBARGO):
                ninjs['embargoed'] = article.get(EMBARGO).isoformat()

            if article.get('priority'):
                ninjs['priority'] = article['priority']
            else:
                ninjs['priority'] = 5

            if article.get('subject'):
                ninjs['subject'] = self._get_subject(article)

            if article.get('anpa_category'):
                ninjs['service'] = self._get_service(article)

            if article.get('renditions'):
                ninjs['renditions'] = self._get_renditions(article)

            if article.get('abstract'):
                ninjs['description_text'] = article.get('abstract')
            elif article.get('description_text'):
                ninjs['description_text'] = article.get('description_text')

            return [(pub_seq_num, json.dumps(ninjs, default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.ninjsFormatterError(ex, subscriber)
Example #17
0
    def format(self, article, subscriber, codes=None):
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)

            ninjs = self._transform_to_ninjs(article, subscriber)
            return [(pub_seq_num, json.dumps(ninjs, default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.ninjsFormatterError(ex, subscriber)
Example #18
0
    def format(self, article, subscriber):
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)

            nitf = self.get_nitf(article, subscriber, pub_seq_num)
            return [(pub_seq_num, self.XML_ROOT + etree.tostring(nitf).decode('utf-8'))]
        except Exception as ex:
            raise FormatterError.nitfFormatterError(ex, subscriber)
Example #19
0
    def format(self, article, subscriber, codes=None):
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)

            ninjs = self._transform_to_ninjs(article, subscriber)
            return [(pub_seq_num, json.dumps(ninjs, default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.ninjsFormatterError(ex, subscriber)
Example #20
0
    def format(self, article, subscriber, codes=None):
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)

            nitf = self.get_nitf(article, subscriber, pub_seq_num)
            return [(pub_seq_num, self.XML_ROOT + etree.tostring(nitf).decode('utf-8'))]
        except Exception as ex:
            raise FormatterError.nitfFormatterError(ex, subscriber)
Example #21
0
    def format(self, article, subscriber):
        try:
            pub_seq_num = superdesk.get_resource_service(
                'subscribers').generate_sequence_number(subscriber)

            ninjs = {
                '_id': article['_id'],
                'version': str(article.get(config.VERSION, 1)),
                'type': self._get_type(article)
            }

            try:
                ninjs['byline'] = self._get_byline(article)
            except:
                pass

            located = article.get('dateline', {}).get('located', {})
            if located:
                ninjs['located'] = located.get('city', '')

            for copy_property in self.direct_copy_properties:
                if article.get(copy_property) is not None:
                    ninjs[copy_property] = article[copy_property]

            if article.get('body_html'):
                ninjs['body_html'] = self.append_body_footer(article)

            if article.get('description'):
                ninjs['description_html'] = self.append_body_footer(article)

            if article[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                ninjs['associations'] = self._get_associations(article)
            elif article.get('associations', {}):
                ninjs['associations'] = self._format_related(
                    article, subscriber)

            if article.get(EMBARGO):
                ninjs['embargoed'] = article.get(EMBARGO).isoformat()

            if article.get('priority'):
                ninjs['priority'] = article['priority']
            else:
                ninjs['priority'] = 5

            if article.get('subject'):
                ninjs['subject'] = self._get_subject(article)

            if article.get('anpa_category'):
                ninjs['service'] = self._get_service(article)

            if article.get('renditions'):
                ninjs['renditions'] = self._get_renditions(article)

            return [(pub_seq_num,
                     json.dumps(ninjs,
                                default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.ninjsFormatterError(ex, subscriber)
Example #22
0
    def format(self, article, subscriber, codes=None):
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)

            nitf = self.get_nitf(article, subscriber, pub_seq_num)
            return [{'published_seq_num': pub_seq_num,
                     'formatted_item': etree.tostring(nitf, encoding='ascii').decode('ascii'),
                    'item_encoding': 'ascii'}]
        except Exception as ex:
            raise FormatterError.nitfFormatterError(ex, subscriber)
    def format(self, article, subscriber, codes=None):
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)

            nitf = self.get_nitf(article, subscriber, pub_seq_num)
            return [{'published_seq_num': pub_seq_num,
                     'formatted_item': etree.tostring(nitf, encoding='ascii').decode('ascii'),
                    'item_encoding': 'ascii'}]
        except Exception as ex:
            raise FormatterError.nitfFormatterError(ex, subscriber)
Example #24
0
    def format(self, article, destination, selector_codes=None):
        try:

            pub_seq_num = superdesk.get_resource_service('output_channels').generate_sequence_number(destination)

            nitf = self.get_nitf(article, destination, pub_seq_num)

            return pub_seq_num, self.XML_ROOT + etree.tostring(nitf).decode('utf-8')
        except Exception as ex:
            raise FormatterError.nitfFormatterError(ex, destination)
    def format(self, article, subscriber):
        """
        Formats the article as require by the subscriber
        :param dict article: article to be formatted
        :param dict subscriber: subscriber receiving the article
        :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string
        """
        try:

            article['slugline'] = self.append_legal(article=article, truncate=True)
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
            body_html = self.append_body_footer(article).strip('\r\n')
            soup = BeautifulSoup(body_html, 'html.parser')

            if not len(soup.find_all('p')):
                for br in soup.find_all('br'):
                    # remove the <br> tag
                    br.replace_with(' {}'.format(br.get_text()))

            for p in soup.find_all('p'):
                # replace <p> tag with two carriage return
                for br in p.find_all('br'):
                    # remove the <br> tag
                    br.replace_with(' {}'.format(br.get_text()))

                para_text = p.get_text().strip()
                if para_text != '':
                    p.replace_with('{}\r\n\r\n'.format(para_text))
                else:
                    p.replace_with('')

            article['body_text'] = re.sub(' +', ' ', soup.get_text())
            # get the first category and derive the locator
            category = next((iter(article.get('anpa_category', []))), None)
            if category:
                locator = LocatorMapper().map(article, category.get('qcode').upper())
                if locator:
                    article['place'] = [{'qcode': locator, 'name': locator}]

                article['first_category'] = category
                article['first_subject'] = set_subject(category, article)

            odbc_item = {
                'id': article.get(config.ID_FIELD),
                'version': article.get(config.VERSION),
                ITEM_TYPE: article.get(ITEM_TYPE),
                PACKAGE_TYPE: article.get(PACKAGE_TYPE, ''),
                'headline': article.get('headline', '').replace('\'', '\'\''),
                'slugline': article.get('slugline', '').replace('\'', '\'\''),
                'data': superdesk.json.dumps(article, default=json_serialize_datetime_objectId).replace('\'', '\'\'')
            }

            return [(pub_seq_num, json.dumps(odbc_item, default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
Example #26
0
    def format(self, article, subscriber, codes=None):
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)

            nitf = self.get_nitf(article, subscriber, pub_seq_num)
            return [(pub_seq_num, self.XML_ROOT + etree.tostring(
                nitf,
                pretty_print=True,
                encoding=self.ENCODING
            ).decode(self.ENCODING))]
        except Exception as ex:
            raise FormatterError.nitfFormatterError(ex, subscriber)
Example #27
0
    def format(self, article, subscriber, codes=None):
        try:
            publish_seq_num = superdesk.get_resource_service(
                "subscribers").generate_sequence_number(subscriber)
            idml_bytes = Converter().create_idml(article)
        except Exception as e:
            raise FormatterError.IDMLFormatterError(e, subscriber)

        return [{
            "published_seq_num": publish_seq_num,
            "encoded_item": idml_bytes,
            "formatted_item": "",
        }]
    def format_for_source(self, article, subscriber, source, codes=None):
        try:
            pass_through = article.get('auto_publish', False)
            docs = []
            for category in self._get_category_list(
                    article.get('anpa_category')):
                article['source'] = source
                pub_seq_num, odbc_item = self.get_odbc_item(
                    article, subscriber, category, codes, pass_through)
                is_last_take = self.is_last_take(article)
                if article.get(FORMAT) == FORMATS.PRESERVED:  # @article_text
                    soup = BeautifulSoup(
                        self.append_body_footer(article) if is_last_take else
                        article.get('body_html', ''), "html.parser")
                    odbc_item['article_text'] = soup.get_text().replace(
                        '\'', '\'\'')
                else:
                    body = self.get_text_content(
                        to_ascii(
                            self.append_body_footer(article)
                            if is_last_take else article.get('body_html', '')))

                    if self.is_first_part(article) and 'dateline' in article \
                            and 'text' in article.get('dateline', {}) and not pass_through:
                        if body.startswith('   '):
                            body = '   {} {}'.format(
                                article.get('dateline').get('text'), body[3:])
                    odbc_item['article_text'] = body.replace('\'', '\'\'')

                if self.is_first_part(article) and not pass_through:
                    self.add_ednote(odbc_item, article)
                    self.add_embargo(odbc_item, article)
                    self.add_byline(odbc_item, article)

                if not is_last_take:
                    odbc_item['article_text'] += '\r\nMORE'
                else:
                    odbc_item['article_text'] += '\r\n' + source
                sign_off = article.get('sign_off', '') or ''
                if len(sign_off) > 0:
                    odbc_item['article_text'] += ' ' + sign_off

                odbc_item['category'] = odbc_item.get('category', '').upper()
                odbc_item['selector_codes'] = odbc_item.get(
                    'selector_codes', '').upper()

                docs.append((pub_seq_num, json.dumps(odbc_item)))

            return docs
        except Exception as ex:
            raise FormatterError.AAPNewscentreFormatterError(ex, subscriber)
 def format(self, article, subscriber, codes=None):
     try:
         pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
         nitf = self.get_nitf(article, subscriber, pub_seq_num)
         strip_elements(nitf, 'body.end')
         nitf_string = etree.tostring(nitf, encoding='utf-8').decode()
         headers = ['<?xml version=\"1.0\" encoding=\"UTF-8\"?>',
                    '<!-- <!DOCTYPE nitf SYSTEM \"./nitf-3-3.dtd\"> -->']
         return [{
             'published_seq_num': pub_seq_num,
             'formatted_item': '{}\r\n{}'.format("\r\n".join(headers), nitf_string).
                 replace('&#13;\n', self.line_ender)}]
     except Exception as ex:
         raise FormatterError.nitfFormatterError(ex, subscriber)
Example #30
0
 def format(self, article, destination):
     try:
         nitf = etree.Element("nitf")
         head = SubElement(nitf, "head")
         body = SubElement(nitf, "body")
         body_head = SubElement(body, "body.head")
         body_content = SubElement(body, "body.content")
         body_content.text = article['body_html']
         body_end = SubElement(body, "body.end")
         etree.Element('doc-id', attrib={'id-string': article['guid']})
         self.__format_head(article, head)
         self.__format_body_head(article, body_head)
         self.__format_body_end(article, body_end)
         return self.XML_ROOT + str(etree.tostring(nitf))
     except Exception as ex:
         raise FormatterError.nitfFormatterError(ex, destination)
    def format_for_source(self, article, subscriber, source, codes=None):
        """Constructs a dictionary that represents the parameters passed to the IPNews InsertNews stored procedure
        :type article: object
        :return: returns the sequence number of the subscriber and the constructed parameter dictionary
        """
        pass_through = article.get('auto_publish', False)
        try:
            docs = []
            for category in self._get_category_list(article.get('anpa_category')):
                # All NZN sourced content is AAP content for the AAP output formatted
                article['source'] = source
                pub_seq_num, odbc_item = self.get_odbc_item(article, subscriber, category, codes, pass_through)

                if article.get(FORMAT) == FORMATS.PRESERVED:  # @article_text
                    body = get_text(self.append_body_footer(article))
                    odbc_item['article_text'] = body.replace('\'', '\'\'')
                    odbc_item['texttab'] = 't'
                elif article.get(FORMAT, FORMATS.HTML) == FORMATS.HTML:
                    body = self.get_wrapped_text_content(
                        to_ascii(self.append_body_footer(article))).replace('\'', '\'\'')
                    # if we have a dateline inject it
                    if 'dateline' in article and 'text' in article.get('dateline', {}) and not pass_through:
                        if body.startswith('   '):
                            body = '   {} {}'.format(article.get('dateline')
                                                     .get('text').replace('\'', '\'\''),
                                                     body[3:])

                    odbc_item['article_text'] = body
                    odbc_item['texttab'] = 'x'

                if not pass_through:
                    self.add_ednote(odbc_item, article)
                    self.add_byline(odbc_item, article)

                odbc_item['article_text'] += '\r\n' + article.get('source', '')
                sign_off = article.get('sign_off', '') or ''
                if len(sign_off) > 0:
                    odbc_item['article_text'] += ' ' + sign_off

                odbc_item['service_level'] = get_service_level(category, article)  # @service_level
                odbc_item['wordcount'] = article.get('word_count') or 0   # @wordcount
                odbc_item['priority'] = map_priority(article.get('priority'))  # @priority

                docs.append((pub_seq_num, json.dumps(odbc_item)))
            return docs
        except Exception as ex:
            raise FormatterError.AAPIpNewsFormatterError(ex, subscriber)
Example #32
0
    def format(self, article, subscriber):
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)

            newsml = etree.Element("NewsML")
            SubElement(newsml, "Catalog", {'Href': 'http://www.aap.com.au/xml-res/aap-master-catalog.xml'})
            news_envelope = SubElement(newsml, "NewsEnvelope")
            news_item = SubElement(newsml, "NewsItem")

            self._format_news_envelope(article, news_envelope, pub_seq_num)
            self._format_identification(article, news_item)
            self._format_news_management(article, news_item)
            self._format_news_component(article, news_item)

            return [(pub_seq_num, self.XML_ROOT + etree.tostring(newsml).decode('utf-8'))]
        except Exception as ex:
            raise FormatterError.newml12FormatterError(ex, subscriber)
    def format(self, article, destination, selector_codes=None):
        try:
            pub_seq_num = superdesk.get_resource_service('output_channels').generate_sequence_number(destination)

            nitfFormater = NITFFormatter()
            nitf = nitfFormater.get_nitf(article, destination, pub_seq_num)

            self._message_attrib.update(self._debug_message_extra)
            newsMessage = etree.Element('newsMessage', attrib=self._message_attrib)
            self._format_header(article, newsMessage, pub_seq_num)
            itemSet = self._format_item(newsMessage)
            if article['type'] == 'text' or article['type'] == 'preformatted':
                self._format_newsitem(article, itemSet, nitf)

            return pub_seq_num, self.XML_ROOT + etree.tostring(newsMessage).decode('utf-8')
        except Exception as ex:
            raise FormatterError.newmsmlG2FormatterError(ex, destination)
Example #34
0
 def format(self, article, subscriber, codes=None):
     formatted_article = deepcopy(article)
     pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
     doc = {}
     try:
         if formatted_article.get(FORMAT) == FORMATS.HTML:
             if formatted_article.get('dateline', {}).get('text'):
                 # If there is a dateline inject it into the body
                 self._inject_dateline(formatted_article)
             doc['message_html'] = render_template('email_article_body.html', article=formatted_article)
         else:
             doc['message_html'] = None
         doc['message_text'] = render_template('email_article_body.txt', article=formatted_article)
         doc['message_subject'] = render_template('email_article_subject.txt', article=formatted_article)
     except Exception as ex:
         raise FormatterError.EmailFormatterError(ex, FormatterError)
     return [(pub_seq_num, json.dumps(doc))]
    def format(self, article, subscriber):
        try:
            pub_seq_num = superdesk.get_resource_service("subscribers").generate_sequence_number(subscriber)
            body_html = article.get("body_html", "").strip("\r\n")
            soup = BeautifulSoup(body_html)
            for br in soup.find_all("br"):
                # remove the <br> tag
                br.replace_with(" {}".format(br.get_text()))

            for p in soup.find_all("p"):
                # replace <p> tag with two carriage return
                p.replace_with("{}\r\n\r\n".format(p.get_text()))

            article["body_text"] = soup.get_text()

            return [(pub_seq_num, superdesk.json.dumps(article, default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
    def format(self, article, destination, selector_codes=None):
        try:

            pub_seq_num = superdesk.get_resource_service("output_channels").generate_sequence_number(destination)

            newsml = etree.Element("NewsML")
            SubElement(newsml, "Catalog", {"Href": "http://www.aap.com.au/xml-res/aap-master-catalog.xml"})
            news_envelope = SubElement(newsml, "NewsEnvelope")
            news_item = SubElement(newsml, "NewsItem")

            self._format_news_envelope(article, news_envelope, pub_seq_num)
            self._format_identification(article, news_item)
            self._format_news_management(article, news_item)
            self._format_news_component(article, news_item)

            return pub_seq_num, self.XML_ROOT + etree.tostring(newsml).decode("utf-8")
        except Exception as ex:
            raise FormatterError.newml12FormatterError(ex, destination)
Example #37
0
    def format(self, article, subscriber, codes=None):
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)

            ninjs = self._transform_to_ninjs(self._merge_versions(article), subscriber)
            ninjs['extra'] = {'published_id': article.get('_id')}

            # If the item was ingested and auto-published, the guid is set to the ingest_id
            # which in FileFeeds will be the path to the file that was ingested
            # [STTNHUB-58] - Auto published ingested items should preserve id
            # (https://github.com/superdesk/superdesk-core/pull/1579)
            # Change the guid back to using the family_id of the item
            if (ninjs.get(GUID_FIELD) or '').startswith('/mnt/'):
                ninjs[GUID_FIELD] = article.get(FAMILY_ID)

            return [(pub_seq_num, json.dumps(ninjs, default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.ninjsFormatterError(ex, subscriber)
    def format(self, article, subscriber, codes=None):
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)

            ninjs = self._transform_to_ninjs(article, subscriber)

            # if the article has an abstract then the description text has been over written by the abstract
            if article.get('abstract'):
                # if it is a picture then put it back
                if article.get('type') == 'picture':
                    ninjs['description_text'] = article.get('description_text', '')

            media = article.get('associations', {}).get('featuremedia')
            ninjs_media = article.get('associations', {}).get('featuremedia')
            if media and media.get('type') == 'picture':
                ninjs_media['description_text'] = media.get('description_text')

            return [(pub_seq_num, json.dumps(ninjs, default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.ninjsFormatterError(ex, subscriber)
Example #39
0
    def format(self, article, destination, selector_codes=None):
        try:
            pub_seq_num = superdesk.get_resource_service('output_channels').generate_sequence_number(destination)

            ninjs = {}
            ninjs['_id'] = article['_id']
            ninjs['version'] = str(article['version'])
            ninjs['type'] = self._get_type(article)
            try:
                ninjs['byline'] = self._get_byline(article)
            except:
                pass
            for copy_property in self.direct_copy_properties:
                if copy_property in article:
                    ninjs[copy_property] = article[copy_property]
            if article['type'] == 'composite':
                article['associations'] = self._get_associations(article)

            return pub_seq_num, json.dumps(ninjs, default=json_util.default)
        except Exception as ex:
            raise FormatterError.ninjsFormatterError(ex, destination)
Example #40
0
    def format(self, article, subscriber, codes=None):
        """
        Constructs a dictionary that represents the parameters passed to the SMS InsertAlerts stored procedure
        :return: returns the sequence number of the subscriber and the constructed parameter dictionary
        """
        try:
            pub_seq_num = superdesk.get_resource_service(
                'subscribers').generate_sequence_number(subscriber)
            sms_message = article.get('sms_message',
                                      article.get('abstract',
                                                  '')).replace('\'', '\'\'')

            # category = 1 is used to indicate a test message
            category = '1' if superdesk.app.config.get('TEST_SMS_OUTPUT', True) is True \
                else article.get('anpa_category', [{}])[0].get('qcode').upper()

            odbc_item = {
                'Sequence': pub_seq_num,
                'Category': category,
                'Headline': BeautifulSoup(sms_message, 'html.parser').text,
                'Priority': map_priority(article.get('priority'))
            }

            body = self.append_body_footer(article)
            if article.get(EMBARGO):
                embargo = '{}{}'.format(
                    'Embargo Content. Timestamp: ',
                    get_utc_schedule(article, EMBARGO).isoformat())
                body = embargo + body

            if article[ITEM_TYPE] == CONTENT_TYPE.TEXT:
                body = BeautifulSoup(body, "html.parser").text

            odbc_item['StoryText'] = body.replace('\'',
                                                  '\'\'')  # @article_text
            odbc_item['ident'] = '0'

            return [(pub_seq_num, json.dumps(odbc_item))]
        except Exception as ex:
            raise FormatterError.AAPSMSFormatterError(ex, subscriber)
    def format(self, article, subscriber):
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
            is_package = self._is_package(article)
            self._message_attrib.update(self._debug_message_extra)
            newsMessage = etree.Element('newsMessage', attrib=self._message_attrib)
            self._format_header(article, newsMessage, pub_seq_num)
            itemSet = self._format_item(newsMessage)
            if is_package:
                item = self._format_item_set(article, itemSet, 'packageItem')
                self._format_groupset(article, item)
            elif article[ITEM_TYPE] == CONTENT_TYPE.PICTURE:
                item = self._format_item_set(article, itemSet, 'newsItem')
                self._format_contentset(article, item)
            else:
                nitfFormater = NITFFormatter()
                nitf = nitfFormater.get_nitf(article, subscriber, pub_seq_num)
                newsItem = self._format_item_set(article, itemSet, 'newsItem')
                self._format_content(article, newsItem, nitf)

            return [(pub_seq_num, self.XML_ROOT + etree.tostring(newsMessage).decode('utf-8'))]
        except Exception as ex:
            raise FormatterError.newmsmlG2FormatterError(ex, subscriber)
Example #42
0
    def format(self, article, destination, selector_codes=None):
        try:

            pub_seq_num = superdesk.get_resource_service('output_channels').generate_sequence_number(destination)

            nitf = etree.Element("nitf")
            head = SubElement(nitf, "head")
            body = SubElement(nitf, "body")
            body_head = SubElement(body, "body.head")
            body_content = SubElement(body, "body.content")
            body_content.text = article['body_html']
            body_end = SubElement(body, "body.end")

            etree.Element('doc-id', attrib={'id-string': article['guid']})

            self.__append_meta(article, head, destination, pub_seq_num)
            self.__format_head(article, head)
            self.__format_body_head(article, body_head)
            self.__format_body_end(article, body_end)

            return pub_seq_num, self.XML_ROOT + str(etree.tostring(nitf))
        except Exception as ex:
            raise FormatterError.nitfFormatterError(ex, destination)
Example #43
0
    def format(self, article, destination):
        try:

            pub_seq_num = self.generate_sequence_number(destination)

            nitf = etree.Element("nitf")
            head = SubElement(nitf, "head")
            body = SubElement(nitf, "body")
            body_head = SubElement(body, "body.head")
            body_content = SubElement(body, "body.content")
            body_content.text = article['body_html']
            body_end = SubElement(body, "body.end")

            etree.Element('doc-id', attrib={'id-string': article['guid']})

            self.__append_meta(article, head, destination, pub_seq_num)
            self.__format_head(article, head)
            self.__format_body_head(article, body_head)
            self.__format_body_end(article, body_end)

            return pub_seq_num, self.XML_ROOT + str(etree.tostring(nitf))
        except Exception as ex:
            raise FormatterError.nitfFormatterError(ex, destination)
Example #44
0
 def format(self, article, subscriber, codes=None):
     formatted_article = deepcopy(article)
     pub_seq_num = superdesk.get_resource_service(
         "subscribers").generate_sequence_number(subscriber)
     doc = {}
     try:
         if formatted_article.get(FORMAT) == FORMATS.HTML:
             if formatted_article.get("dateline", {}).get("text"):
                 # If there is a dateline inject it into the body
                 self._inject_dateline(formatted_article)
             doc["message_html"] = render_template(
                 "email_article_body.html", article=formatted_article)
         else:
             doc["message_html"] = None
         doc["message_text"] = render_template("email_article_body.txt",
                                               article=formatted_article)
         doc["message_subject"] = render_template(
             "email_article_subject.txt", article=formatted_article)
         doc["renditions"] = ((formatted_article.get("associations", {})
                               or {}).get("featuremedia", {})
                              or {}).get("renditions")
     except Exception as ex:
         raise FormatterError.EmailFormatterError(ex, FormatterError)
     return [(pub_seq_num, json.dumps(doc))]
    def format(self, article, subscriber, codes=None):
        try:
            pub_seq_num = superdesk.get_resource_service(
                'subscribers').generate_sequence_number(subscriber)

            ninjs = self._transform_to_ninjs(article, subscriber)

            # if the article has an abstract then the description text has been over written by the abstract
            if article.get('abstract'):
                # if it is a picture then put it back
                if article.get('type') == 'picture':
                    ninjs['description_text'] = article.get(
                        'description_text', '')

            media = article.get('associations', {}).get('featuremedia')
            ninjs_media = article.get('associations', {}).get('featuremedia')
            if media and media.get('type') == 'picture':
                ninjs_media['description_text'] = media.get('description_text')

            return [(pub_seq_num,
                     json.dumps(ninjs,
                                default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.ninjsFormatterError(ex, subscriber)
    def format(self, article, subscriber, codes=None):
        """
        Formats the article as require by the subscriber
        :param dict article: article to be formatted
        :param dict subscriber: subscriber receiving the article
        :param list codes: selector codes
        :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string
        """
        try:
            formatted_article = deepcopy(article)

            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
            body_html = to_ascii(self.append_body_footer(formatted_article)).strip('\r\n')

            # get the desk name
            desk_name = superdesk.get_resource_service('desks').\
                get_desk_name(formatted_article.get('task', {}).get('desk'))

            # force the content to source 'NZN' if desk is 'NZN'
            if 'new zealand' in desk_name.lower().strip():
                formatted_article['source'] = 'NZN'

            # this is temporary fix for bulletin builder formatter
            if formatted_article.get(ITEM_STATE, '') == CONTENT_STATE.SCHEDULED:
                formatted_article['versioncreated'] = utcnow()

            formatted_article['body_text'] = self.get_text_content(body_html)
            formatted_article['abstract'] = self.get_text_content(
                to_ascii(formatted_article.get('abstract', '') or '')).strip()
            formatted_article['headline'] = self.get_text_content(
                to_ascii(formatted_article.get('headline', ''))).strip()
            formatted_article['byline'] = self.get_text_content(
                to_ascii(formatted_article.get('byline', '') or '')).strip()

            if len(formatted_article.get('anpa_category') or []) > 1:
                formatted_article['anpa_category'] = [cat for cat in (formatted_article.get('anpa_category') or [])
                                                      if cat.get('qcode') != 'c']

            self._handle_auto_publish(formatted_article)

            # get the first category and derive the locator
            category = next((iter((formatted_article.get('anpa_category') or []))), None)

            if category:
                locator = LocatorMapper().map(formatted_article, category.get('qcode').upper())
                if locator:
                    formatted_article['place'] = [{'qcode': locator, 'name': locator}]

                formatted_article['first_category'] = category
                formatted_article['first_subject'] = set_subject(category, formatted_article)
                formatted_article['slugline'] = self.get_text_content(
                    to_ascii(SluglineMapper().map(article=formatted_article,
                                                  category=category.get('qcode').upper(),
                                                  truncate=(not formatted_article.get('auto_publish')))).strip())

            self.format_associated_item(formatted_article)

            odbc_item = {
                'id': formatted_article.get(config.ID_FIELD),
                'version': formatted_article.get(config.VERSION),
                ITEM_TYPE: formatted_article.get(ITEM_TYPE),
                PACKAGE_TYPE: formatted_article.get(PACKAGE_TYPE, ''),
                'headline': formatted_article.get('headline', '').replace('\'', '\'\''),
                'slugline': formatted_article.get('slugline', '').replace('\'', '\'\''),
                'data': superdesk.json.dumps(formatted_article,
                                             default=json_serialize_datetime_objectId).replace('\'', '\'\'')
            }

            return [(pub_seq_num, json.dumps(odbc_item, default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
    def format(self, article, subscriber):
        try:
            docs = []
            for category in article.get('anpa_category'):
                pub_seq_num = superdesk.get_resource_service(
                    'subscribers').generate_sequence_number(subscriber)
                anpa = []

                # selector codes are only injected for those subscribers that are defined
                # in the mapper
                selectors = dict()
                SelectorcodeMapper().map(article,
                                         category.get('qcode').upper(),
                                         subscriber=subscriber,
                                         formatted_item=selectors)
                if 'selector_codes' in selectors and selectors[
                        'selector_codes']:
                    anpa.append(b'\x05')
                    anpa.append(selectors['selector_codes'].encode('ascii'))
                    anpa.append(b'\x0D\x0A')

                # start of message header (syn syn soh)
                anpa.append(b'\x16\x16\x01')
                anpa.append(
                    article.get('service_level', 'a').lower().encode('ascii'))

                # story number
                anpa.append(str(pub_seq_num).zfill(4).encode('ascii'))

                # field seperator
                anpa.append(b'\x0A')  # -LF
                anpa.append(
                    map_priority(article.get('priority')).encode('ascii'))
                anpa.append(b'\x20')

                anpa.append(category['qcode'].encode('ascii'))

                anpa.append(b'\x13')
                # format identifier
                if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED:
                    anpa.append(b'\x12')
                else:
                    anpa.append(b'\x11')
                anpa.append(b'\x20')

                # keyword
                keyword = 'bc-{}'.format(
                    self.append_legal(article=article,
                                      truncate=True)).replace(' ', '-')
                keyword = keyword[:24] if len(keyword) > 24 else keyword
                anpa.append(keyword.encode('ascii'))
                anpa.append(b'\x20')

                # version field
                anpa.append(b'\x20')

                # reference field
                anpa.append(b'\x20')

                # filing date
                anpa.append('{}-{}'.format(
                    article['_updated'].strftime('%m'),
                    article['_updated'].strftime('%d')).encode('ascii'))
                anpa.append(b'\x20')

                # add the word count
                anpa.append(
                    str(article.get('word_count',
                                    '0000')).zfill(4).encode('ascii'))
                anpa.append(b'\x0D\x0A')

                anpa.append(b'\x02')  # STX

                self._process_headline(anpa, article,
                                       category['qcode'].encode('ascii'))

                keyword = self.append_legal(article=article,
                                            truncate=True).encode(
                                                'ascii', 'ignore')
                anpa.append(keyword)
                take_key = article.get('anpa_take_key',
                                       '').encode('ascii', 'ignore')
                anpa.append((b'\x20' + take_key) if len(take_key) > 0 else b'')
                anpa.append(b'\x0D\x0A')

                if BYLINE in article:
                    anpa.append(article.get(BYLINE).encode('ascii', 'ignore'))
                    anpa.append(b'\x0D\x0A')

                if article.get('dateline', {}).get('text'):
                    anpa.append(
                        article.get('dateline').get('text').encode(
                            'ascii', 'ignore'))

                body = self.append_body_footer(article)
                if article.get(EMBARGO):
                    embargo = '{}{}'.format(
                        'Embargo Content. Timestamp: ',
                        get_utc_schedule(article, EMBARGO).isoformat())
                    body = embargo + body

                if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED:
                    anpa.append(body.encode('ascii', 'replace'))
                else:
                    anpa.append(
                        BeautifulSoup(body, "html.parser").text.encode(
                            'ascii', 'replace'))

                anpa.append(b'\x0D\x0A')
                if article.get('more_coming', False):
                    anpa.append('MORE'.encode('ascii'))
                else:
                    anpa.append(article.get('source', '').encode('ascii'))
                sign_off = article.get('sign_off', '').encode('ascii')
                anpa.append((b'\x20' + sign_off) if len(sign_off) > 0 else b'')
                anpa.append(b'\x0D\x0A')

                anpa.append(b'\x03')  # ETX

                # time and date
                anpa.append(datetime.datetime.now().strftime(
                    '%d-%m-%y %H-%M-%S').encode('ascii'))

                anpa.append(b'\x04')  # EOT
                anpa.append(
                    b'\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A'
                )

                docs.append((pub_seq_num, b''.join(anpa)))

            return docs
        except Exception as ex:
            raise FormatterError.AnpaFormatterError(ex, subscriber)
Example #48
0
    def format(self, article, subscriber, codes=None):
        try:
            docs = []
            formatted_article = deepcopy(article)
            for category in self._get_category_list(
                    formatted_article.get('anpa_category')):
                mapped_source = self._get_mapped_source(formatted_article)
                formatted_article[config.ID_FIELD] = formatted_article.get(
                    'item_id', formatted_article.get(config.ID_FIELD))
                pub_seq_num = superdesk.get_resource_service(
                    'subscribers').generate_sequence_number(subscriber)
                anpa = []

                if codes:
                    anpa.append(b'\x05')
                    anpa.append(' '.join(codes).encode('ascii'))
                    anpa.append(b'\x0D\x0A')

                # start of message header (syn syn soh)
                anpa.append(b'\x16\x16\x01')
                anpa.append(
                    get_service_level(category,
                                      formatted_article).encode('ascii'))

                # story number
                anpa.append(str(pub_seq_num).zfill(4).encode('ascii'))

                # field seperator
                anpa.append(b'\x0A')  # -LF
                anpa.append(
                    map_priority(
                        formatted_article.get('priority')).encode('ascii'))
                anpa.append(b'\x20')

                anpa.append(category['qcode'].lower().encode('ascii'))

                anpa.append(b'\x13')
                # format identifier
                if formatted_article.get(FORMAT,
                                         FORMATS.HTML) == FORMATS.PRESERVED:
                    anpa.append(b'\x12')
                else:
                    anpa.append(b'\x11')
                anpa.append(b'\x20')

                # keyword
                keyword = 'bc-{}'.format(
                    self.append_legal(article=formatted_article,
                                      truncate=True)).replace(' ', '-')
                keyword = keyword[:24] if len(keyword) > 24 else keyword
                anpa.append(keyword.encode('ascii'))
                anpa.append(b'\x20')

                # version field
                anpa.append(b'\x20')

                # reference field
                anpa.append(b'\x20')

                # filing date
                anpa.append('{}-{}'.format(
                    formatted_article['_updated'].strftime('%m'),
                    formatted_article['_updated'].strftime('%d')).encode(
                        'ascii'))
                anpa.append(b'\x20')

                # add the word count
                anpa.append(
                    str(formatted_article.get(
                        'word_count', '0000')).zfill(4).encode('ascii'))
                anpa.append(b'\x0D\x0A')

                anpa.append(b'\x02')  # STX

                self._process_headline(anpa, formatted_article,
                                       category['qcode'].encode('ascii'))

                keyword = SluglineMapper().map(
                    article=formatted_article,
                    category=category['qcode'].upper(),
                    truncate=True).encode('ascii', 'ignore')
                anpa.append(keyword)
                take_key = (formatted_article.get('anpa_take_key', '')
                            or '').encode('ascii', 'ignore')
                anpa.append((b'\x20' + take_key) if len(take_key) > 0 else b'')
                anpa.append(b'\x0D\x0A')

                if formatted_article.get('ednote', '') != '':
                    ednote = '{}\r\n'.format(
                        to_ascii(formatted_article.get('ednote')))
                    anpa.append(ednote.encode('ascii', 'replace'))

                if formatted_article.get(BYLINE):
                    anpa.append(
                        get_text(formatted_article.get(BYLINE)).encode(
                            'ascii', 'replace'))
                    anpa.append(b'\x0D\x0A')

                if formatted_article.get(FORMAT) == FORMATS.PRESERVED:
                    anpa.append(
                        get_text(self.append_body_footer(formatted_article),
                                 content='html').encode('ascii', 'replace'))
                else:
                    body = to_ascii(formatted_article.get('body_html', ''))
                    # we need to inject the dateline
                    if formatted_article.get(
                            'dateline', {}).get('text') and not article.get(
                                'auto_publish', False):
                        body_html_elem = parse_html(
                            formatted_article.get('body_html'))
                        ptag = body_html_elem.find('.//p')
                        if ptag is not None:
                            ptag.text = formatted_article['dateline'][
                                'text'] + ' ' + (ptag.text or '')
                            body = to_string(body_html_elem)
                    anpa.append(self.get_text_content(body))
                    if formatted_article.get('body_footer'):
                        anpa.append(
                            self.get_text_content(
                                to_ascii(
                                    formatted_article.get('body_footer', ''))))

                anpa.append(b'\x0D\x0A')
                anpa.append(mapped_source.encode('ascii'))
                sign_off = (formatted_article.get('sign_off', '')
                            or '').encode('ascii')
                anpa.append((b'\x20' + sign_off) if len(sign_off) > 0 else b'')
                anpa.append(b'\x0D\x0A')

                anpa.append(b'\x03')  # ETX

                # time and date
                anpa.append(datetime.datetime.now().strftime(
                    '%d-%m-%y %H-%M-%S').encode('ascii'))

                anpa.append(b'\x04')  # EOT
                anpa.append(
                    b'\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A'
                )

                docs.append({
                    'published_seq_num': pub_seq_num,
                    'encoded_item': b''.join(anpa),
                    'formatted_item': b''.join(anpa).decode('ascii')
                })

            return docs
        except Exception as ex:
            raise FormatterError.AnpaFormatterError(ex, subscriber)
    def format(self, article, subscriber):
        """
        Formats the article as require by the subscriber
        :param dict article: article to be formatted
        :param dict subscriber: subscriber receiving the article
        :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string
        """
        try:

            article['slugline'] = self.append_legal(article=article,
                                                    truncate=True)
            pub_seq_num = superdesk.get_resource_service(
                'subscribers').generate_sequence_number(subscriber)
            body_html = self.append_body_footer(article).strip('\r\n')
            soup = BeautifulSoup(body_html, 'html.parser')

            if not len(soup.find_all('p')):
                for br in soup.find_all('br'):
                    # remove the <br> tag
                    br.replace_with(' {}'.format(br.get_text()))

            for p in soup.find_all('p'):
                # replace <p> tag with two carriage return
                for br in p.find_all('br'):
                    # remove the <br> tag
                    br.replace_with(' {}'.format(br.get_text()))

                para_text = p.get_text().strip()
                if para_text != '':
                    p.replace_with('{}\r\n\r\n'.format(para_text))
                else:
                    p.replace_with('')

            article['body_text'] = re.sub(' +', ' ', soup.get_text())
            # get the first category and derive the locator
            category = next((iter(article.get('anpa_category', []))), None)
            if category:
                locator = LocatorMapper().map(article,
                                              category.get('qcode').upper())
                if locator:
                    article['place'] = [{'qcode': locator, 'name': locator}]

                article['first_category'] = category
                article['first_subject'] = set_subject(category, article)

            odbc_item = {
                'id':
                article.get(config.ID_FIELD),
                'version':
                article.get(config.VERSION),
                ITEM_TYPE:
                article.get(ITEM_TYPE),
                PACKAGE_TYPE:
                article.get(PACKAGE_TYPE, ''),
                'headline':
                article.get('headline', '').replace('\'', '\'\''),
                'slugline':
                article.get('slugline', '').replace('\'', '\'\''),
                'data':
                superdesk.json.dumps(
                    article, default=json_serialize_datetime_objectId).replace(
                        '\'', '\'\'')
            }

            return [(pub_seq_num,
                     json.dumps(odbc_item,
                                default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
    def format(self, article, subscriber, codes=None):
        """
        Formats the article as require by the subscriber
        :param dict article: article to be formatted
        :param dict subscriber: subscriber receiving the article
        :param list codes: selector codes
        :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string
        """
        try:
            formatted_article = deepcopy(article)

            pub_seq_num = superdesk.get_resource_service(
                'subscribers').generate_sequence_number(subscriber)
            body_html = to_ascii(
                self.append_body_footer(formatted_article)).strip('\r\n')

            # get the desk name
            desk_name = superdesk.get_resource_service('desks').\
                get_desk_name(formatted_article.get('task', {}).get('desk'))

            # force the content to source 'NZN' if desk is 'NZN'
            if 'new zealand' in desk_name.lower().strip():
                formatted_article['source'] = 'NZN'

            # this is temporary fix for bulletin builder formatter
            if formatted_article.get(ITEM_STATE,
                                     '') == CONTENT_STATE.SCHEDULED:
                formatted_article['versioncreated'] = utcnow()

            formatted_article['body_text'] = self.get_text_content(body_html)
            formatted_article['abstract'] = self.get_text_content(
                to_ascii(formatted_article.get('abstract', '') or '')).strip()
            formatted_article['headline'] = self.get_text_content(
                to_ascii(formatted_article.get('headline', ''))).strip()
            formatted_article['byline'] = self.get_text_content(
                to_ascii(formatted_article.get('byline', '') or '')).strip()

            if len(formatted_article.get('anpa_category') or []) > 1:
                formatted_article['anpa_category'] = [
                    cat
                    for cat in (formatted_article.get('anpa_category') or [])
                    if cat.get('qcode') != 'c'
                ]

            self._handle_auto_publish(formatted_article)

            # get the first category and derive the locator
            category = next((iter(
                (formatted_article.get('anpa_category') or []))), None)

            if category:
                locator = LocatorMapper().map(formatted_article,
                                              category.get('qcode').upper())
                if locator:
                    formatted_article['place'] = [{
                        'qcode': locator,
                        'name': locator
                    }]

                formatted_article['first_category'] = category
                formatted_article['first_subject'] = set_subject(
                    category, formatted_article)
                formatted_article['slugline'] = self.get_text_content(
                    to_ascii(SluglineMapper().map(
                        article=formatted_article,
                        category=category.get('qcode').upper(),
                        truncate=(not formatted_article.get('auto_publish')
                                  )))).strip()

            self.format_associated_item(formatted_article)

            odbc_item = {
                'id':
                formatted_article.get(config.ID_FIELD),
                'version':
                formatted_article.get(config.VERSION),
                ITEM_TYPE:
                formatted_article.get(ITEM_TYPE),
                'package_type':
                '',
                'headline':
                formatted_article.get('headline', '').replace('\'', '\'\''),
                'slugline':
                formatted_article.get('slugline', '').replace('\'', '\'\''),
                'data':
                superdesk.json.dumps(
                    formatted_article,
                    default=json_serialize_datetime_objectId).replace(
                        '\'', '\'\'')
            }

            return [(pub_seq_num,
                     json.dumps(odbc_item,
                                default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)