Esempio n. 1
0
    def _map_locator_code(self, article, category, locators):
        """
        Based on the category and subject code it returns the locator
        :param dict article: original article
        :param str category: category of the article
        :param dict locators: subject code locator mapping dictionary
        :return: if found then the locator as string else None
        """
        subjects = article.get('subject') or []

        # for sports category
        if category == 'S' or category == 'T':
            subject = set_subject({'qcode': category}, article) or ''
            feature = locators.get('{}000'.format(
                subject[:5])) or locators.get(subject)
            if feature:
                return feature

        for subject in subjects:
            qcode = subject.get('qcode', '')
            feature = locators.get(qcode)
            if feature:
                if qcode == '10006000':
                    return '{}{}'.format(feature,
                                         'I' if category == 'I' else 'D')
                else:
                    return feature
        return None
Esempio n. 2
0
    def _map_locator_code(self, article, category, locators):
        """
        Based on the category and subject code it returns the locator
        :param dict article: original article
        :param str category: category of the article
        :param dict locators: subject code locator mapping dictionary
        :return: if found then the locator as string else None
        """
        subjects = article.get('subject') or []

        # for sports category
        if category == 'S' or category == 'T':
            subject = set_subject({'qcode': category}, article) or ''
            feature = locators.get('{}000'.format(subject[:5])) or locators.get(subject)
            if feature:
                return feature

        for subject in subjects:
            qcode = subject.get('qcode', '')
            feature = locators.get(qcode)
            if feature:
                if qcode == '10006000':
                    return '{}{}'.format(feature, 'I' if category == 'I' else 'D')
                else:
                    return feature
        return None
    def format(self, article, subscriber):
        """
        Formats the article as require by the subscriber
        :param dict article: article to be formatted
        :param dict subscriber: subscriber receiving the article
        :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string
        """
        try:

            article['slugline'] = self.append_legal(article=article, truncate=True)
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
            body_html = self.append_body_footer(article).strip('\r\n')
            soup = BeautifulSoup(body_html, 'html.parser')

            if not len(soup.find_all('p')):
                for br in soup.find_all('br'):
                    # remove the <br> tag
                    br.replace_with(' {}'.format(br.get_text()))

            for p in soup.find_all('p'):
                # replace <p> tag with two carriage return
                for br in p.find_all('br'):
                    # remove the <br> tag
                    br.replace_with(' {}'.format(br.get_text()))

                para_text = p.get_text().strip()
                if para_text != '':
                    p.replace_with('{}\r\n\r\n'.format(para_text))
                else:
                    p.replace_with('')

            article['body_text'] = re.sub(' +', ' ', soup.get_text())
            # get the first category and derive the locator
            category = next((iter(article.get('anpa_category', []))), None)
            if category:
                locator = LocatorMapper().map(article, category.get('qcode').upper())
                if locator:
                    article['place'] = [{'qcode': locator, 'name': locator}]

                article['first_category'] = category
                article['first_subject'] = set_subject(category, article)

            odbc_item = {
                'id': article.get(config.ID_FIELD),
                'version': article.get(config.VERSION),
                ITEM_TYPE: article.get(ITEM_TYPE),
                PACKAGE_TYPE: article.get(PACKAGE_TYPE, ''),
                'headline': article.get('headline', '').replace('\'', '\'\''),
                'slugline': article.get('slugline', '').replace('\'', '\'\''),
                'data': superdesk.json.dumps(article, default=json_serialize_datetime_objectId).replace('\'', '\'\'')
            }

            return [(pub_seq_num, json.dumps(odbc_item, default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
    def test_subject(self):
        article = {
            'anpa_category': [{'qcode': 'a'}, {'qcode': 's'}],
            'subject': [{'qcode': '04001005'}, {'qcode': '15011002'}]
        }

        self.assertEqual(set_subject({'qcode': 'a'}, article), '04001005')
        self.assertEqual(set_subject({'qcode': 's'}, article), '15011002')
        article = {
            'anpa_category': [{'qcode': 'a'}, {'qcode': 's'}],
            'subject': None
        }

        self.assertEqual(set_subject({'qcode': 'a'}, article), None)
        self.assertEqual(set_subject({'qcode': 's'}, article), None)

        article = {
            'anpa_category': None,
            'subject': [{'qcode': '04001005'}, {'qcode': '15011002'}]
        }

        self.assertEqual(set_subject(None, article), '04001005')
    def test_subject(self):
        article = {
            'anpa_category': [{
                'qcode': 'a'
            }, {
                'qcode': 's'
            }],
            'subject': [{
                'qcode': '04001005'
            }, {
                'qcode': '15011002'
            }]
        }

        self.assertEqual(set_subject({'qcode': 'a'}, article), '04001005')
        self.assertEqual(set_subject({'qcode': 's'}, article), '15011002')
        article = {
            'anpa_category': [{
                'qcode': 'a'
            }, {
                'qcode': 's'
            }],
            'subject': None
        }

        self.assertEqual(set_subject({'qcode': 'a'}, article), None)
        self.assertEqual(set_subject({'qcode': 's'}, article), None)

        article = {
            'anpa_category': None,
            'subject': [{
                'qcode': '04001005'
            }, {
                'qcode': '15011002'
            }]
        }

        self.assertEqual(set_subject(None, article), '04001005')
    def format(self, article, subscriber):
        """
        Constructs a dictionary that represents the parameters passed to the IPNews InsertNews stored procedure
        :return: returns the sequence number of the subscriber and the constructed parameter dictionary
        """
        try:
            docs = []
            for category in article.get('anpa_category'):
                pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
                odbc_item = {'originator': article.get('source', None), 'sequence': pub_seq_num,
                             'category': category.get('qcode'),
                             'headline': article.get('headline', '').replace('\'', '\'\''),
                             'author': article.get('byline', '').replace('\'', '\'\''),
                             'keyword': article.get('slugline', None).replace('\'', '\'\''),
                             'subject_reference': set_subject(category, article)}

                if 'subject_reference' in odbc_item and odbc_item['subject_reference'] is not None \
                        and odbc_item['subject_reference'] != '00000000':
                    odbc_item['subject'] = subject_codes[odbc_item['subject_reference'][:2] + '000000']
                    if odbc_item['subject_reference'][2:5] != '000':
                        odbc_item['subject_matter'] = subject_codes[odbc_item['subject_reference'][:5] + '000']
                    else:
                        odbc_item['subject_matter'] = ''
                    if not odbc_item['subject_reference'].endswith('000'):
                        odbc_item['subject_detail'] = subject_codes[odbc_item['subject_reference']]
                    else:
                        odbc_item['subject_detail'] = ''
                else:
                    odbc_item['subject_reference'] = '00000000'

                odbc_item['take_key'] = article.get('anpa_take_key', None)  # @take_key
                odbc_item['usn'] = article.get('unique_id', None)  # @usn
                if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED:
                    odbc_item['article_text'] = article.get('body_html', '').replace('\'', '\'\'')  # @article_text
                elif article[ITEM_TYPE] == CONTENT_TYPE.TEXT:
                    soup = BeautifulSoup(article.get('body_html', ''))
                    text = StringIO()
                    for p in soup.findAll('p'):
                        text.write('\x19\r\n')
                        ptext = p.get_text('\n')
                        for l in ptext.split('\n'):
                            if len(l) > 80:
                                text.write(textwrap.fill(l, 80).replace('\n', ' \r\n'))
                            else:
                                text.write(l + ' \r\n')
                    odbc_item['article_text'] = text.getvalue().replace('\'', '\'\'')

                if 'genre' in article:
                    odbc_item['genre'] = article['genre'][0].get('name', None)
                else:
                    odbc_item['genre'] = 'Current'  # @genre
                if article.get(ITEM_TYPE, CONTENT_TYPE.TEXT) == CONTENT_TYPE.TEXT:
                    odbc_item['texttab'] = 'x'
                elif article.get(ITEM_TYPE, None) == CONTENT_TYPE.PREFORMATTED:
                    odbc_item['texttab'] = 't'
                odbc_item['wordcount'] = article.get('word_count', None)  # @wordcount
                odbc_item['news_item_type'] = 'News'
                odbc_item['priority'] = map_priority(article.get('priority'))  # @priority
                odbc_item['service_level'] = 'a'  # @service_level
                odbc_item['fullStory'] = 1
                odbc_item['ident'] = '0'  # @ident

                SelectorcodeMapper().map(article, category.get('qcode').upper(),
                                         subscriber=subscriber,
                                         formatted_item=odbc_item)
                headline_prefix = LocatorMapper().map(article, category.get('qcode').upper())
                if headline_prefix:
                    odbc_item['headline'] = '{}:{}'.format(headline_prefix, odbc_item['headline'])

                if article.get(EMBARGO):
                    embargo = '{}{}'.format('Embargo Content. Timestamp: ', article.get(EMBARGO).isoformat())
                    odbc_item['article_text'] = embargo + odbc_item['article_text']

                docs.append((pub_seq_num, odbc_item))

            return docs
        except Exception as ex:
            raise FormatterError.AAPIpNewsFormatterError(ex, subscriber)
Esempio n. 7
0
    def format(self, article, subscriber):
        """
        Constructs a dictionary that represents the parameters passed to the IPNews InsertNews stored procedure
        :return: returns the sequence number of the subscriber and the constructed parameter dictionary
        """
        try:
            docs = []
            for category in article.get("anpa_category"):
                pub_seq_num = superdesk.get_resource_service("subscribers").generate_sequence_number(subscriber)
                odbc_item = {
                    "originator": article.get("source", None),
                    "sequence": pub_seq_num,
                    "category": category.get("qcode"),
                    "headline": article.get("headline", "").replace("'", "''"),
                    "author": article.get("byline", "").replace("'", "''"),
                    "keyword": self.append_legal(article=article, truncate=True).replace("'", "''"),
                    "subject_reference": set_subject(category, article),
                }

                if (
                    "subject_reference" in odbc_item
                    and odbc_item["subject_reference"] is not None
                    and odbc_item["subject_reference"] != "00000000"
                ):
                    odbc_item["subject"] = subject_codes[odbc_item["subject_reference"][:2] + "000000"]
                    if odbc_item["subject_reference"][2:5] != "000":
                        odbc_item["subject_matter"] = subject_codes[odbc_item["subject_reference"][:5] + "000"]
                    else:
                        odbc_item["subject_matter"] = ""
                    if not odbc_item["subject_reference"].endswith("000"):
                        odbc_item["subject_detail"] = subject_codes[odbc_item["subject_reference"]]
                    else:
                        odbc_item["subject_detail"] = ""
                else:
                    odbc_item["subject_reference"] = "00000000"

                odbc_item["take_key"] = article.get("anpa_take_key", None)  # @take_key
                odbc_item["usn"] = article.get("unique_id", None)  # @usn
                if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED:  # @article_text
                    odbc_item["article_text"] = self.append_body_footer(article).replace("'", "''")
                elif article[ITEM_TYPE] == CONTENT_TYPE.TEXT:
                    soup = BeautifulSoup(self.append_body_footer(article), "html.parser")
                    text = StringIO()
                    for p in soup.findAll("p"):
                        text.write("\x19\r\n")
                        ptext = p.get_text("\n")
                        for l in ptext.split("\n"):
                            if len(l) > 80:
                                text.write(textwrap.fill(l, 80).replace("\n", " \r\n"))
                            else:
                                text.write(l + " \r\n")
                    odbc_item["article_text"] = text.getvalue().replace("'", "''")

                if "genre" in article:
                    odbc_item["genre"] = article["genre"][0].get("name", None)
                else:
                    odbc_item["genre"] = "Current"  # @genre
                if article.get(ITEM_TYPE, CONTENT_TYPE.TEXT) == CONTENT_TYPE.TEXT:
                    odbc_item["texttab"] = "x"
                elif article.get(ITEM_TYPE, None) == CONTENT_TYPE.PREFORMATTED:
                    odbc_item["texttab"] = "t"
                odbc_item["wordcount"] = article.get("word_count", None)  # @wordcount
                odbc_item["news_item_type"] = "News"
                odbc_item["priority"] = map_priority(article.get("priority"))  # @priority
                odbc_item["service_level"] = "a"  # @service_level
                odbc_item["fullStory"] = 1
                odbc_item["ident"] = "0"  # @ident

                SelectorcodeMapper().map(
                    article, category.get("qcode").upper(), subscriber=subscriber, formatted_item=odbc_item
                )
                headline_prefix = LocatorMapper().map(article, category.get("qcode").upper())
                if headline_prefix:
                    odbc_item["headline"] = "{}:{}".format(headline_prefix, odbc_item["headline"])

                if article.get(EMBARGO):
                    embargo = "{}{}".format("Embargo Content. Timestamp: ", article.get(EMBARGO).isoformat())
                    odbc_item["article_text"] = embargo + odbc_item["article_text"]

                docs.append((pub_seq_num, odbc_item))

            return docs
        except Exception as ex:
            raise FormatterError.AAPIpNewsFormatterError(ex, subscriber)
    def format(self, article, subscriber):
        """
        Formats the article as require by the subscriber
        :param dict article: article to be formatted
        :param dict subscriber: subscriber receiving the article
        :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string
        """
        try:

            article['slugline'] = self.append_legal(article=article,
                                                    truncate=True)
            pub_seq_num = superdesk.get_resource_service(
                'subscribers').generate_sequence_number(subscriber)
            body_html = self.append_body_footer(article).strip('\r\n')
            soup = BeautifulSoup(body_html, 'html.parser')

            if not len(soup.find_all('p')):
                for br in soup.find_all('br'):
                    # remove the <br> tag
                    br.replace_with(' {}'.format(br.get_text()))

            for p in soup.find_all('p'):
                # replace <p> tag with two carriage return
                for br in p.find_all('br'):
                    # remove the <br> tag
                    br.replace_with(' {}'.format(br.get_text()))

                para_text = p.get_text().strip()
                if para_text != '':
                    p.replace_with('{}\r\n\r\n'.format(para_text))
                else:
                    p.replace_with('')

            article['body_text'] = re.sub(' +', ' ', soup.get_text())
            # get the first category and derive the locator
            category = next((iter(article.get('anpa_category', []))), None)
            if category:
                locator = LocatorMapper().map(article,
                                              category.get('qcode').upper())
                if locator:
                    article['place'] = [{'qcode': locator, 'name': locator}]

                article['first_category'] = category
                article['first_subject'] = set_subject(category, article)

            odbc_item = {
                'id':
                article.get(config.ID_FIELD),
                'version':
                article.get(config.VERSION),
                ITEM_TYPE:
                article.get(ITEM_TYPE),
                PACKAGE_TYPE:
                article.get(PACKAGE_TYPE, ''),
                'headline':
                article.get('headline', '').replace('\'', '\'\''),
                'slugline':
                article.get('slugline', '').replace('\'', '\'\''),
                'data':
                superdesk.json.dumps(
                    article, default=json_serialize_datetime_objectId).replace(
                        '\'', '\'\'')
            }

            return [(pub_seq_num,
                     json.dumps(odbc_item,
                                default=json_serialize_datetime_objectId))]
        except Exception as ex:
            raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
Esempio n. 9
0
    def format(self, article, subscriber):
        """
        Constructs a dictionary that represents the parameters passed to the IPNews InsertNews stored procedure
        :return: returns the sequence number of the subscriber and the constructed parameter dictionary
        """
        try:
            docs = []
            for category in article.get('anpa_category'):
                pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
                odbc_item = {'originator': article.get('source', None), 'sequence': pub_seq_num,
                             'category': category.get('qcode'),
                             'headline': article.get('headline', '').replace('\'', '\'\''),
                             'author': article.get('byline', '').replace('\'', '\'\''),
                             'keyword': self.append_legal(article=article, truncate=True).replace('\'', '\'\''),
                             'subject_reference': set_subject(category, article)}

                if 'subject_reference' in odbc_item and odbc_item['subject_reference'] is not None \
                        and odbc_item['subject_reference'] != '00000000':
                    odbc_item['subject'] = subject_codes[odbc_item['subject_reference'][:2] + '000000']
                    if odbc_item['subject_reference'][2:5] != '000':
                        odbc_item['subject_matter'] = subject_codes[odbc_item['subject_reference'][:5] + '000']
                    else:
                        odbc_item['subject_matter'] = ''
                    if not odbc_item['subject_reference'].endswith('000'):
                        odbc_item['subject_detail'] = subject_codes[odbc_item['subject_reference']]
                    else:
                        odbc_item['subject_detail'] = ''
                else:
                    odbc_item['subject_reference'] = '00000000'

                odbc_item['take_key'] = article.get('anpa_take_key', None)  # @take_key
                odbc_item['usn'] = article.get('unique_id', None)  # @usn
                if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED:  # @article_text
                    odbc_item['article_text'] = self.append_body_footer(article).replace('\'', '\'\'')
                elif article[ITEM_TYPE] == CONTENT_TYPE.TEXT:
                    soup = BeautifulSoup(self.append_body_footer(article), "html.parser")
                    text = StringIO()
                    for p in soup.findAll('p'):
                        text.write('\x19\r\n')
                        ptext = p.get_text('\n')
                        for l in ptext.split('\n'):
                            if len(l) > 80:
                                text.write(textwrap.fill(l, 80).replace('\n', ' \r\n'))
                            else:
                                text.write(l + ' \r\n')
                    odbc_item['article_text'] = text.getvalue().replace('\'', '\'\'')

                if 'genre' in article:
                    odbc_item['genre'] = article['genre'][0].get('name', None)
                else:
                    odbc_item['genre'] = 'Current'  # @genre
                if article.get(ITEM_TYPE, CONTENT_TYPE.TEXT) == CONTENT_TYPE.TEXT:
                    odbc_item['texttab'] = 'x'
                elif article.get(ITEM_TYPE, None) == CONTENT_TYPE.PREFORMATTED:
                    odbc_item['texttab'] = 't'
                odbc_item['wordcount'] = article.get('word_count', None)  # @wordcount
                odbc_item['news_item_type'] = 'News'
                odbc_item['priority'] = map_priority(article.get('priority'))  # @priority
                odbc_item['service_level'] = 'a'  # @service_level
                odbc_item['fullStory'] = 1
                odbc_item['ident'] = '0'  # @ident

                SelectorcodeMapper().map(article, category.get('qcode').upper(),
                                         subscriber=subscriber,
                                         formatted_item=odbc_item)
                headline_prefix = LocatorMapper().map(article, category.get('qcode').upper())
                if headline_prefix:
                    odbc_item['headline'] = '{}:{}'.format(headline_prefix, odbc_item['headline'])

                if article.get(EMBARGO):
                    embargo = '{}{}'.format('Embargo Content. Timestamp: ', article.get(EMBARGO).isoformat())
                    odbc_item['article_text'] = embargo + odbc_item['article_text']

                docs.append((pub_seq_num, odbc_item))

            return docs
        except Exception as ex:
            raise FormatterError.AAPIpNewsFormatterError(ex, subscriber)