def _map_locator_code(self, article, category, locators): """ Based on the category and subject code it returns the locator :param dict article: original article :param str category: category of the article :param dict locators: subject code locator mapping dictionary :return: if found then the locator as string else None """ subjects = article.get('subject') or [] # for sports category if category == 'S' or category == 'T': subject = set_subject({'qcode': category}, article) or '' feature = locators.get('{}000'.format( subject[:5])) or locators.get(subject) if feature: return feature for subject in subjects: qcode = subject.get('qcode', '') feature = locators.get(qcode) if feature: if qcode == '10006000': return '{}{}'.format(feature, 'I' if category == 'I' else 'D') else: return feature return None
def _map_locator_code(self, article, category, locators): """ Based on the category and subject code it returns the locator :param dict article: original article :param str category: category of the article :param dict locators: subject code locator mapping dictionary :return: if found then the locator as string else None """ subjects = article.get('subject') or [] # for sports category if category == 'S' or category == 'T': subject = set_subject({'qcode': category}, article) or '' feature = locators.get('{}000'.format(subject[:5])) or locators.get(subject) if feature: return feature for subject in subjects: qcode = subject.get('qcode', '') feature = locators.get(qcode) if feature: if qcode == '10006000': return '{}{}'.format(feature, 'I' if category == 'I' else 'D') else: return feature return None
def format(self, article, subscriber): """ Formats the article as require by the subscriber :param dict article: article to be formatted :param dict subscriber: subscriber receiving the article :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string """ try: article['slugline'] = self.append_legal(article=article, truncate=True) pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) body_html = self.append_body_footer(article).strip('\r\n') soup = BeautifulSoup(body_html, 'html.parser') if not len(soup.find_all('p')): for br in soup.find_all('br'): # remove the <br> tag br.replace_with(' {}'.format(br.get_text())) for p in soup.find_all('p'): # replace <p> tag with two carriage return for br in p.find_all('br'): # remove the <br> tag br.replace_with(' {}'.format(br.get_text())) para_text = p.get_text().strip() if para_text != '': p.replace_with('{}\r\n\r\n'.format(para_text)) else: p.replace_with('') article['body_text'] = re.sub(' +', ' ', soup.get_text()) # get the first category and derive the locator category = next((iter(article.get('anpa_category', []))), None) if category: locator = LocatorMapper().map(article, category.get('qcode').upper()) if locator: article['place'] = [{'qcode': locator, 'name': locator}] article['first_category'] = category article['first_subject'] = set_subject(category, article) odbc_item = { 'id': article.get(config.ID_FIELD), 'version': article.get(config.VERSION), ITEM_TYPE: article.get(ITEM_TYPE), PACKAGE_TYPE: article.get(PACKAGE_TYPE, ''), 'headline': article.get('headline', '').replace('\'', '\'\''), 'slugline': article.get('slugline', '').replace('\'', '\'\''), 'data': superdesk.json.dumps(article, default=json_serialize_datetime_objectId).replace('\'', '\'\'') } return [(pub_seq_num, json.dumps(odbc_item, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
def test_subject(self): article = { 'anpa_category': [{'qcode': 'a'}, {'qcode': 's'}], 'subject': [{'qcode': '04001005'}, {'qcode': '15011002'}] } self.assertEqual(set_subject({'qcode': 'a'}, article), '04001005') self.assertEqual(set_subject({'qcode': 's'}, article), '15011002') article = { 'anpa_category': [{'qcode': 'a'}, {'qcode': 's'}], 'subject': None } self.assertEqual(set_subject({'qcode': 'a'}, article), None) self.assertEqual(set_subject({'qcode': 's'}, article), None) article = { 'anpa_category': None, 'subject': [{'qcode': '04001005'}, {'qcode': '15011002'}] } self.assertEqual(set_subject(None, article), '04001005')
def test_subject(self): article = { 'anpa_category': [{ 'qcode': 'a' }, { 'qcode': 's' }], 'subject': [{ 'qcode': '04001005' }, { 'qcode': '15011002' }] } self.assertEqual(set_subject({'qcode': 'a'}, article), '04001005') self.assertEqual(set_subject({'qcode': 's'}, article), '15011002') article = { 'anpa_category': [{ 'qcode': 'a' }, { 'qcode': 's' }], 'subject': None } self.assertEqual(set_subject({'qcode': 'a'}, article), None) self.assertEqual(set_subject({'qcode': 's'}, article), None) article = { 'anpa_category': None, 'subject': [{ 'qcode': '04001005' }, { 'qcode': '15011002' }] } self.assertEqual(set_subject(None, article), '04001005')
def format(self, article, subscriber): """ Constructs a dictionary that represents the parameters passed to the IPNews InsertNews stored procedure :return: returns the sequence number of the subscriber and the constructed parameter dictionary """ try: docs = [] for category in article.get('anpa_category'): pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) odbc_item = {'originator': article.get('source', None), 'sequence': pub_seq_num, 'category': category.get('qcode'), 'headline': article.get('headline', '').replace('\'', '\'\''), 'author': article.get('byline', '').replace('\'', '\'\''), 'keyword': article.get('slugline', None).replace('\'', '\'\''), 'subject_reference': set_subject(category, article)} if 'subject_reference' in odbc_item and odbc_item['subject_reference'] is not None \ and odbc_item['subject_reference'] != '00000000': odbc_item['subject'] = subject_codes[odbc_item['subject_reference'][:2] + '000000'] if odbc_item['subject_reference'][2:5] != '000': odbc_item['subject_matter'] = subject_codes[odbc_item['subject_reference'][:5] + '000'] else: odbc_item['subject_matter'] = '' if not odbc_item['subject_reference'].endswith('000'): odbc_item['subject_detail'] = subject_codes[odbc_item['subject_reference']] else: odbc_item['subject_detail'] = '' else: odbc_item['subject_reference'] = '00000000' odbc_item['take_key'] = article.get('anpa_take_key', None) # @take_key odbc_item['usn'] = article.get('unique_id', None) # @usn if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED: odbc_item['article_text'] = article.get('body_html', '').replace('\'', '\'\'') # @article_text elif article[ITEM_TYPE] == CONTENT_TYPE.TEXT: soup = BeautifulSoup(article.get('body_html', '')) text = StringIO() for p in soup.findAll('p'): text.write('\x19\r\n') ptext = p.get_text('\n') for l in ptext.split('\n'): if len(l) > 80: text.write(textwrap.fill(l, 80).replace('\n', ' \r\n')) else: text.write(l + ' \r\n') odbc_item['article_text'] = text.getvalue().replace('\'', '\'\'') if 'genre' in article: odbc_item['genre'] = article['genre'][0].get('name', None) else: odbc_item['genre'] = 'Current' # @genre if article.get(ITEM_TYPE, CONTENT_TYPE.TEXT) == CONTENT_TYPE.TEXT: odbc_item['texttab'] = 'x' elif article.get(ITEM_TYPE, None) == CONTENT_TYPE.PREFORMATTED: odbc_item['texttab'] = 't' odbc_item['wordcount'] = article.get('word_count', None) # @wordcount odbc_item['news_item_type'] = 'News' odbc_item['priority'] = map_priority(article.get('priority')) # @priority odbc_item['service_level'] = 'a' # @service_level odbc_item['fullStory'] = 1 odbc_item['ident'] = '0' # @ident SelectorcodeMapper().map(article, category.get('qcode').upper(), subscriber=subscriber, formatted_item=odbc_item) headline_prefix = LocatorMapper().map(article, category.get('qcode').upper()) if headline_prefix: odbc_item['headline'] = '{}:{}'.format(headline_prefix, odbc_item['headline']) if article.get(EMBARGO): embargo = '{}{}'.format('Embargo Content. Timestamp: ', article.get(EMBARGO).isoformat()) odbc_item['article_text'] = embargo + odbc_item['article_text'] docs.append((pub_seq_num, odbc_item)) return docs except Exception as ex: raise FormatterError.AAPIpNewsFormatterError(ex, subscriber)
def format(self, article, subscriber): """ Constructs a dictionary that represents the parameters passed to the IPNews InsertNews stored procedure :return: returns the sequence number of the subscriber and the constructed parameter dictionary """ try: docs = [] for category in article.get("anpa_category"): pub_seq_num = superdesk.get_resource_service("subscribers").generate_sequence_number(subscriber) odbc_item = { "originator": article.get("source", None), "sequence": pub_seq_num, "category": category.get("qcode"), "headline": article.get("headline", "").replace("'", "''"), "author": article.get("byline", "").replace("'", "''"), "keyword": self.append_legal(article=article, truncate=True).replace("'", "''"), "subject_reference": set_subject(category, article), } if ( "subject_reference" in odbc_item and odbc_item["subject_reference"] is not None and odbc_item["subject_reference"] != "00000000" ): odbc_item["subject"] = subject_codes[odbc_item["subject_reference"][:2] + "000000"] if odbc_item["subject_reference"][2:5] != "000": odbc_item["subject_matter"] = subject_codes[odbc_item["subject_reference"][:5] + "000"] else: odbc_item["subject_matter"] = "" if not odbc_item["subject_reference"].endswith("000"): odbc_item["subject_detail"] = subject_codes[odbc_item["subject_reference"]] else: odbc_item["subject_detail"] = "" else: odbc_item["subject_reference"] = "00000000" odbc_item["take_key"] = article.get("anpa_take_key", None) # @take_key odbc_item["usn"] = article.get("unique_id", None) # @usn if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED: # @article_text odbc_item["article_text"] = self.append_body_footer(article).replace("'", "''") elif article[ITEM_TYPE] == CONTENT_TYPE.TEXT: soup = BeautifulSoup(self.append_body_footer(article), "html.parser") text = StringIO() for p in soup.findAll("p"): text.write("\x19\r\n") ptext = p.get_text("\n") for l in ptext.split("\n"): if len(l) > 80: text.write(textwrap.fill(l, 80).replace("\n", " \r\n")) else: text.write(l + " \r\n") odbc_item["article_text"] = text.getvalue().replace("'", "''") if "genre" in article: odbc_item["genre"] = article["genre"][0].get("name", None) else: odbc_item["genre"] = "Current" # @genre if article.get(ITEM_TYPE, CONTENT_TYPE.TEXT) == CONTENT_TYPE.TEXT: odbc_item["texttab"] = "x" elif article.get(ITEM_TYPE, None) == CONTENT_TYPE.PREFORMATTED: odbc_item["texttab"] = "t" odbc_item["wordcount"] = article.get("word_count", None) # @wordcount odbc_item["news_item_type"] = "News" odbc_item["priority"] = map_priority(article.get("priority")) # @priority odbc_item["service_level"] = "a" # @service_level odbc_item["fullStory"] = 1 odbc_item["ident"] = "0" # @ident SelectorcodeMapper().map( article, category.get("qcode").upper(), subscriber=subscriber, formatted_item=odbc_item ) headline_prefix = LocatorMapper().map(article, category.get("qcode").upper()) if headline_prefix: odbc_item["headline"] = "{}:{}".format(headline_prefix, odbc_item["headline"]) if article.get(EMBARGO): embargo = "{}{}".format("Embargo Content. Timestamp: ", article.get(EMBARGO).isoformat()) odbc_item["article_text"] = embargo + odbc_item["article_text"] docs.append((pub_seq_num, odbc_item)) return docs except Exception as ex: raise FormatterError.AAPIpNewsFormatterError(ex, subscriber)
def format(self, article, subscriber): """ Formats the article as require by the subscriber :param dict article: article to be formatted :param dict subscriber: subscriber receiving the article :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string """ try: article['slugline'] = self.append_legal(article=article, truncate=True) pub_seq_num = superdesk.get_resource_service( 'subscribers').generate_sequence_number(subscriber) body_html = self.append_body_footer(article).strip('\r\n') soup = BeautifulSoup(body_html, 'html.parser') if not len(soup.find_all('p')): for br in soup.find_all('br'): # remove the <br> tag br.replace_with(' {}'.format(br.get_text())) for p in soup.find_all('p'): # replace <p> tag with two carriage return for br in p.find_all('br'): # remove the <br> tag br.replace_with(' {}'.format(br.get_text())) para_text = p.get_text().strip() if para_text != '': p.replace_with('{}\r\n\r\n'.format(para_text)) else: p.replace_with('') article['body_text'] = re.sub(' +', ' ', soup.get_text()) # get the first category and derive the locator category = next((iter(article.get('anpa_category', []))), None) if category: locator = LocatorMapper().map(article, category.get('qcode').upper()) if locator: article['place'] = [{'qcode': locator, 'name': locator}] article['first_category'] = category article['first_subject'] = set_subject(category, article) odbc_item = { 'id': article.get(config.ID_FIELD), 'version': article.get(config.VERSION), ITEM_TYPE: article.get(ITEM_TYPE), PACKAGE_TYPE: article.get(PACKAGE_TYPE, ''), 'headline': article.get('headline', '').replace('\'', '\'\''), 'slugline': article.get('slugline', '').replace('\'', '\'\''), 'data': superdesk.json.dumps( article, default=json_serialize_datetime_objectId).replace( '\'', '\'\'') } return [(pub_seq_num, json.dumps(odbc_item, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
def format(self, article, subscriber): """ Constructs a dictionary that represents the parameters passed to the IPNews InsertNews stored procedure :return: returns the sequence number of the subscriber and the constructed parameter dictionary """ try: docs = [] for category in article.get('anpa_category'): pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) odbc_item = {'originator': article.get('source', None), 'sequence': pub_seq_num, 'category': category.get('qcode'), 'headline': article.get('headline', '').replace('\'', '\'\''), 'author': article.get('byline', '').replace('\'', '\'\''), 'keyword': self.append_legal(article=article, truncate=True).replace('\'', '\'\''), 'subject_reference': set_subject(category, article)} if 'subject_reference' in odbc_item and odbc_item['subject_reference'] is not None \ and odbc_item['subject_reference'] != '00000000': odbc_item['subject'] = subject_codes[odbc_item['subject_reference'][:2] + '000000'] if odbc_item['subject_reference'][2:5] != '000': odbc_item['subject_matter'] = subject_codes[odbc_item['subject_reference'][:5] + '000'] else: odbc_item['subject_matter'] = '' if not odbc_item['subject_reference'].endswith('000'): odbc_item['subject_detail'] = subject_codes[odbc_item['subject_reference']] else: odbc_item['subject_detail'] = '' else: odbc_item['subject_reference'] = '00000000' odbc_item['take_key'] = article.get('anpa_take_key', None) # @take_key odbc_item['usn'] = article.get('unique_id', None) # @usn if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED: # @article_text odbc_item['article_text'] = self.append_body_footer(article).replace('\'', '\'\'') elif article[ITEM_TYPE] == CONTENT_TYPE.TEXT: soup = BeautifulSoup(self.append_body_footer(article), "html.parser") text = StringIO() for p in soup.findAll('p'): text.write('\x19\r\n') ptext = p.get_text('\n') for l in ptext.split('\n'): if len(l) > 80: text.write(textwrap.fill(l, 80).replace('\n', ' \r\n')) else: text.write(l + ' \r\n') odbc_item['article_text'] = text.getvalue().replace('\'', '\'\'') if 'genre' in article: odbc_item['genre'] = article['genre'][0].get('name', None) else: odbc_item['genre'] = 'Current' # @genre if article.get(ITEM_TYPE, CONTENT_TYPE.TEXT) == CONTENT_TYPE.TEXT: odbc_item['texttab'] = 'x' elif article.get(ITEM_TYPE, None) == CONTENT_TYPE.PREFORMATTED: odbc_item['texttab'] = 't' odbc_item['wordcount'] = article.get('word_count', None) # @wordcount odbc_item['news_item_type'] = 'News' odbc_item['priority'] = map_priority(article.get('priority')) # @priority odbc_item['service_level'] = 'a' # @service_level odbc_item['fullStory'] = 1 odbc_item['ident'] = '0' # @ident SelectorcodeMapper().map(article, category.get('qcode').upper(), subscriber=subscriber, formatted_item=odbc_item) headline_prefix = LocatorMapper().map(article, category.get('qcode').upper()) if headline_prefix: odbc_item['headline'] = '{}:{}'.format(headline_prefix, odbc_item['headline']) if article.get(EMBARGO): embargo = '{}{}'.format('Embargo Content. Timestamp: ', article.get(EMBARGO).isoformat()) odbc_item['article_text'] = embargo + odbc_item['article_text'] docs.append((pub_seq_num, odbc_item)) return docs except Exception as ex: raise FormatterError.AAPIpNewsFormatterError(ex, subscriber)