コード例 #1
0
    def format(self, article, subscriber):
        """
        Formats the article as require by the subscriber
        :param dict article: article to be formatted
        :param dict subscriber: subscriber receiving the article
        :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string
        """
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
            body_html = article.get('body_html', '').strip('\r\n')
            soup = BeautifulSoup(body_html)
            for br in soup.find_all('br'):
                # remove the <br> tag
                br.replace_with(' {}'.format(br.get_text()))

            for p in soup.find_all('p'):
                # replace <p> tag with two carriage return
                p.replace_with('{}\r\n\r\n'.format(p.get_text()))

            article['body_text'] = soup.get_text()

            # get the first category and derive the locator
            category = next((iter(article.get('anpa_category', []))), None)
            if category:
                locator = LocatorMapper().map(article, category.get('qcode').upper())
                if locator:
                    article['place'] = [{'qcode': locator, 'name': locator}]

            return [(pub_seq_num, superdesk.json.dumps(article, default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
コード例 #2
0
    def format(self, article, subscriber):
        """
        Formats the article as require by the subscriber
        :param dict article: article to be formatted
        :param dict subscriber: subscriber receiving the article
        :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string
        """
        try:
            pub_seq_num = superdesk.get_resource_service(
                'subscribers').generate_sequence_number(subscriber)
            body_html = article.get('body_html', '').strip('\r\n')
            soup = BeautifulSoup(body_html)
            for br in soup.find_all('br'):
                # remove the <br> tag
                br.replace_with(' {}'.format(br.get_text()))

            for p in soup.find_all('p'):
                # replace <p> tag with two carriage return
                p.replace_with('{}\r\n\r\n'.format(p.get_text()))

            article['body_text'] = soup.get_text()

            # get the first category and derive the locator
            category = next((iter(article.get('anpa_category', []))), None)
            if category:
                locator = LocatorMapper().map(article,
                                              category.get('qcode').upper())
                if locator:
                    article['place'] = [{'qcode': locator, 'name': locator}]

            return [(pub_seq_num,
                     superdesk.json.dumps(
                         article, default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
コード例 #3
0
    def format(self, article, subscriber):
        """
        Formats the article as require by the subscriber
        :param dict article: article to be formatted
        :param dict subscriber: subscriber receiving the article
        :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string
        """
        try:

            article['slugline'] = self.append_legal(article=article, truncate=True)
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
            body_html = self.append_body_footer(article).strip('\r\n')
            soup = BeautifulSoup(body_html, 'html.parser')

            if not len(soup.find_all('p')):
                for br in soup.find_all('br'):
                    # remove the <br> tag
                    br.replace_with(' {}'.format(br.get_text()))

            for p in soup.find_all('p'):
                # replace <p> tag with two carriage return
                for br in p.find_all('br'):
                    # remove the <br> tag
                    br.replace_with(' {}'.format(br.get_text()))

                para_text = p.get_text().strip()
                if para_text != '':
                    p.replace_with('{}\r\n\r\n'.format(para_text))
                else:
                    p.replace_with('')

            article['body_text'] = re.sub(' +', ' ', soup.get_text())
            # get the first category and derive the locator
            category = next((iter(article.get('anpa_category', []))), None)
            if category:
                locator = LocatorMapper().map(article, category.get('qcode').upper())
                if locator:
                    article['place'] = [{'qcode': locator, 'name': locator}]

                article['first_category'] = category
                article['first_subject'] = set_subject(category, article)

            odbc_item = {
                'id': article.get(config.ID_FIELD),
                'version': article.get(config.VERSION),
                ITEM_TYPE: article.get(ITEM_TYPE),
                PACKAGE_TYPE: article.get(PACKAGE_TYPE, ''),
                'headline': article.get('headline', '').replace('\'', '\'\''),
                'slugline': article.get('slugline', '').replace('\'', '\'\''),
                'data': superdesk.json.dumps(article, default=json_serialize_datetime_objectId).replace('\'', '\'\'')
            }

            return [(pub_seq_num, json.dumps(odbc_item, default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
コード例 #4
0
    def format(self, article, subscriber):
        try:
            pub_seq_num = superdesk.get_resource_service("subscribers").generate_sequence_number(subscriber)
            body_html = article.get("body_html", "").strip("\r\n")
            soup = BeautifulSoup(body_html)
            for br in soup.find_all("br"):
                # remove the <br> tag
                br.replace_with(" {}".format(br.get_text()))

            for p in soup.find_all("p"):
                # replace <p> tag with two carriage return
                p.replace_with("{}\r\n\r\n".format(p.get_text()))

            article["body_text"] = soup.get_text()

            return [(pub_seq_num, superdesk.json.dumps(article, default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
コード例 #5
0
    def format(self, article, subscriber, codes=None):
        """
        Formats the article as require by the subscriber
        :param dict article: article to be formatted
        :param dict subscriber: subscriber receiving the article
        :param list codes: selector codes
        :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string
        """
        try:
            formatted_article = deepcopy(article)

            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
            body_html = to_ascii(self.append_body_footer(formatted_article)).strip('\r\n')

            # get the desk name
            desk_name = superdesk.get_resource_service('desks').\
                get_desk_name(formatted_article.get('task', {}).get('desk'))

            # force the content to source 'NZN' if desk is 'NZN'
            if 'new zealand' in desk_name.lower().strip():
                formatted_article['source'] = 'NZN'

            # this is temporary fix for bulletin builder formatter
            if formatted_article.get(ITEM_STATE, '') == CONTENT_STATE.SCHEDULED:
                formatted_article['versioncreated'] = utcnow()

            formatted_article['body_text'] = self.get_text_content(body_html)
            formatted_article['abstract'] = self.get_text_content(
                to_ascii(formatted_article.get('abstract', '') or '')).strip()
            formatted_article['headline'] = self.get_text_content(
                to_ascii(formatted_article.get('headline', ''))).strip()
            formatted_article['byline'] = self.get_text_content(
                to_ascii(formatted_article.get('byline', '') or '')).strip()

            if len(formatted_article.get('anpa_category') or []) > 1:
                formatted_article['anpa_category'] = [cat for cat in (formatted_article.get('anpa_category') or [])
                                                      if cat.get('qcode') != 'c']

            self._handle_auto_publish(formatted_article)

            # get the first category and derive the locator
            category = next((iter((formatted_article.get('anpa_category') or []))), None)

            if category:
                locator = LocatorMapper().map(formatted_article, category.get('qcode').upper())
                if locator:
                    formatted_article['place'] = [{'qcode': locator, 'name': locator}]

                formatted_article['first_category'] = category
                formatted_article['first_subject'] = set_subject(category, formatted_article)
                formatted_article['slugline'] = self.get_text_content(
                    to_ascii(SluglineMapper().map(article=formatted_article,
                                                  category=category.get('qcode').upper(),
                                                  truncate=(not formatted_article.get('auto_publish')))).strip())

            self.format_associated_item(formatted_article)

            odbc_item = {
                'id': formatted_article.get(config.ID_FIELD),
                'version': formatted_article.get(config.VERSION),
                ITEM_TYPE: formatted_article.get(ITEM_TYPE),
                PACKAGE_TYPE: formatted_article.get(PACKAGE_TYPE, ''),
                'headline': formatted_article.get('headline', '').replace('\'', '\'\''),
                'slugline': formatted_article.get('slugline', '').replace('\'', '\'\''),
                'data': superdesk.json.dumps(formatted_article,
                                             default=json_serialize_datetime_objectId).replace('\'', '\'\'')
            }

            return [(pub_seq_num, json.dumps(odbc_item, default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
コード例 #6
0
    def format(self, article, subscriber):
        """
        Formats the article as require by the subscriber
        :param dict article: article to be formatted
        :param dict subscriber: subscriber receiving the article
        :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string
        """
        try:

            article['slugline'] = self.append_legal(article=article,
                                                    truncate=True)
            pub_seq_num = superdesk.get_resource_service(
                'subscribers').generate_sequence_number(subscriber)
            body_html = self.append_body_footer(article).strip('\r\n')
            soup = BeautifulSoup(body_html, 'html.parser')

            if not len(soup.find_all('p')):
                for br in soup.find_all('br'):
                    # remove the <br> tag
                    br.replace_with(' {}'.format(br.get_text()))

            for p in soup.find_all('p'):
                # replace <p> tag with two carriage return
                for br in p.find_all('br'):
                    # remove the <br> tag
                    br.replace_with(' {}'.format(br.get_text()))

                para_text = p.get_text().strip()
                if para_text != '':
                    p.replace_with('{}\r\n\r\n'.format(para_text))
                else:
                    p.replace_with('')

            article['body_text'] = re.sub(' +', ' ', soup.get_text())
            # get the first category and derive the locator
            category = next((iter(article.get('anpa_category', []))), None)
            if category:
                locator = LocatorMapper().map(article,
                                              category.get('qcode').upper())
                if locator:
                    article['place'] = [{'qcode': locator, 'name': locator}]

                article['first_category'] = category
                article['first_subject'] = set_subject(category, article)

            odbc_item = {
                'id':
                article.get(config.ID_FIELD),
                'version':
                article.get(config.VERSION),
                ITEM_TYPE:
                article.get(ITEM_TYPE),
                PACKAGE_TYPE:
                article.get(PACKAGE_TYPE, ''),
                'headline':
                article.get('headline', '').replace('\'', '\'\''),
                'slugline':
                article.get('slugline', '').replace('\'', '\'\''),
                'data':
                superdesk.json.dumps(
                    article, default=json_serialize_datetime_objectId).replace(
                        '\'', '\'\'')
            }

            return [(pub_seq_num,
                     json.dumps(odbc_item,
                                default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
コード例 #7
0
    def format(self, article, subscriber, codes=None):
        """
        Formats the article as require by the subscriber
        :param dict article: article to be formatted
        :param dict subscriber: subscriber receiving the article
        :param list codes: selector codes
        :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string
        """
        try:
            formatted_article = deepcopy(article)

            pub_seq_num = superdesk.get_resource_service(
                'subscribers').generate_sequence_number(subscriber)
            body_html = to_ascii(
                self.append_body_footer(formatted_article)).strip('\r\n')

            # get the desk name
            desk_name = superdesk.get_resource_service('desks').\
                get_desk_name(formatted_article.get('task', {}).get('desk'))

            # force the content to source 'NZN' if desk is 'NZN'
            if 'new zealand' in desk_name.lower().strip():
                formatted_article['source'] = 'NZN'

            # this is temporary fix for bulletin builder formatter
            if formatted_article.get(ITEM_STATE,
                                     '') == CONTENT_STATE.SCHEDULED:
                formatted_article['versioncreated'] = utcnow()

            formatted_article['body_text'] = self.get_text_content(body_html)
            formatted_article['abstract'] = self.get_text_content(
                to_ascii(formatted_article.get('abstract', '') or '')).strip()
            formatted_article['headline'] = self.get_text_content(
                to_ascii(formatted_article.get('headline', ''))).strip()
            formatted_article['byline'] = self.get_text_content(
                to_ascii(formatted_article.get('byline', '') or '')).strip()

            if len(formatted_article.get('anpa_category') or []) > 1:
                formatted_article['anpa_category'] = [
                    cat
                    for cat in (formatted_article.get('anpa_category') or [])
                    if cat.get('qcode') != 'c'
                ]

            self._handle_auto_publish(formatted_article)

            # get the first category and derive the locator
            category = next((iter(
                (formatted_article.get('anpa_category') or []))), None)

            if category:
                locator = LocatorMapper().map(formatted_article,
                                              category.get('qcode').upper())
                if locator:
                    formatted_article['place'] = [{
                        'qcode': locator,
                        'name': locator
                    }]

                formatted_article['first_category'] = category
                formatted_article['first_subject'] = set_subject(
                    category, formatted_article)
                formatted_article['slugline'] = self.get_text_content(
                    to_ascii(SluglineMapper().map(
                        article=formatted_article,
                        category=category.get('qcode').upper(),
                        truncate=(not formatted_article.get('auto_publish')
                                  )))).strip()

            self.format_associated_item(formatted_article)

            odbc_item = {
                'id':
                formatted_article.get(config.ID_FIELD),
                'version':
                formatted_article.get(config.VERSION),
                ITEM_TYPE:
                formatted_article.get(ITEM_TYPE),
                'package_type':
                '',
                'headline':
                formatted_article.get('headline', '').replace('\'', '\'\''),
                'slugline':
                formatted_article.get('slugline', '').replace('\'', '\'\''),
                'data':
                superdesk.json.dumps(
                    formatted_article,
                    default=json_serialize_datetime_objectId).replace(
                        '\'', '\'\'')
            }

            return [(pub_seq_num,
                     json.dumps(odbc_item,
                                default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)