def format(self, article, subscriber, codes=None): """ Create article in NewsML G2 format :param dict article: :param dict subscriber: :param list codes: selector codes :return [(int, str)]: return a List of tuples. A tuple consist of publish sequence number and formatted article string. :raises FormatterError: if the formatter fails to format an article """ try: pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) is_package = self._is_package(article) self._message_attrib.update(self._debug_message_extra) news_message = etree.Element('newsMessage', attrib=self._message_attrib) self._format_header(article, news_message, pub_seq_num) item_set = self._format_item(news_message) if is_package: item = self._format_item_set(article, item_set, 'packageItem') self._format_groupset(article, item) elif article[ITEM_TYPE] in {CONTENT_TYPE.PICTURE, CONTENT_TYPE.AUDIO, CONTENT_TYPE.VIDEO}: item = self._format_item_set(article, item_set, 'newsItem') self._format_contentset(article, item) else: nitfFormater = NITFFormatter() nitf = nitfFormater.get_nitf(article, subscriber, pub_seq_num) newsItem = self._format_item_set(article, item_set, 'newsItem') self._format_content(article, newsItem, nitf) return [(pub_seq_num, self.XML_ROOT + etree.tostring(news_message).decode('utf-8'))] except Exception as ex: raise FormatterError.newmsmlG2FormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): """ Create article in Belga NewsML 1.2 format :param dict article: :param dict subscriber: :param list codes: :return [(int, str)]: return a List of tuples. A tuple consist of publish sequence number and formatted article string. :raises FormatterError: if the formatter fails to format an article """ try: pub_seq_num = superdesk.get_resource_service( 'subscribers').generate_sequence_number(subscriber) self._newsml = etree.Element('NewsML') self._article = article self._now = utcnow() self._string_now = self._now.strftime(self.DATETIME_FORMAT) # SD does not have the same structure, there are no packages, # but to cover old belga's news ml 1.2 output, this value will be used: self._package_duid = 'pkg_{}'.format(self._article[GUID_FIELD]) self._format_catalog() self._format_newsenvelope() self._format_newsitem() xml_string = self.XML_ROOT + '\n' + etree.tostring( self._newsml, pretty_print=True).decode('utf-8') return [(pub_seq_num, xml_string)] except Exception as ex: raise FormatterError.newml12FormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): try: formatted_doc = {} formatted_doc['headline'] = get_text(article.get('headline', ''), content='html') formatted_doc['headline'] = formatted_doc['headline'].replace( '\'', '\'\'').replace('\xA0', ' ') formatted_doc['keyword'] = article.get('slugline', '').replace('\'', '\'\'') # body formatting if article.get(FORMAT) == FORMATS.PRESERVED: body = get_text(self.append_body_footer(article), content='html') formatted_doc['article_text'] = body.replace('\'', '\'\'') elif article.get(FORMAT, FORMATS.HTML) == FORMATS.HTML: body = self.get_wrapped_text_content( to_ascii(self.append_body_footer(article))).replace( '\'', '\'\'') formatted_doc['article_text'] = body self.refine_article_body(formatted_doc, article) # Frame the text output according to AAP requirement formatted_output = 'KEYWORD: ' + formatted_doc.get('keyword', '') + '\r\n' formatted_output += 'HEADLINE: ' + formatted_doc.get( 'headline', '') + '\r\n' formatted_output += ' ' + formatted_doc.get('article_text', '') return [(0, json.dumps({'article_text': formatted_output}))] except Exception as ex: raise FormatterError.AAPTextFormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): """ Create article in NewsML1.2 format :param dict article: :param dict subscriber: :param list codes: :return [(int, str)]: return a List of tuples. A tuple consist of publish sequence number and formatted article string. :raises FormatterError: if the formatter fails to format an article """ try: pub_seq_num = superdesk.get_resource_service( "subscribers").generate_sequence_number(subscriber) self.now = utcnow() self.string_now = self.now.strftime("%Y%m%dT%H%M%S+0000") newsml = etree.Element("NewsML") SubElement( newsml, "Catalog", { "Href": "http://www.iptc.org/std/catalog/catalog.IptcMasterCatalog.xml" }) news_envelope = SubElement(newsml, "NewsEnvelope") news_item = SubElement(newsml, "NewsItem") self._format_news_envelope(article, news_envelope, pub_seq_num) self._format_identification(article, news_item) self._format_news_management(article, news_item) self._format_news_component(article, news_item) return [(pub_seq_num, self.XML_ROOT + etree.tostring( newsml, encoding=self.ENCODING).decode(self.ENCODING))] except Exception as ex: raise FormatterError.newml12FormatterError(ex, subscriber)
def format(self, article, subscriber): """ Create article in NewsML1.2 format :param dict article: :param dict subscriber: :return [(int, str)]: return a List of tuples. A tuple consist of publish sequence number and formatted article string. :raises FormatterError: if the formatter fails to format an article """ try: pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) newsml = etree.Element("NewsML") SubElement(newsml, "Catalog", {'Href': 'http://www.iptc.org/std/catalog/catalog.IptcMasterCatalog.xml'}) news_envelope = SubElement(newsml, "NewsEnvelope") news_item = SubElement(newsml, "NewsItem") self._format_news_envelope(article, news_envelope, pub_seq_num) self._format_identification(article, news_item) self._format_news_management(article, news_item) self._format_news_component(article, news_item) return [(pub_seq_num, self.XML_ROOT + etree.tostring(newsml).decode('utf-8'))] except Exception as ex: raise FormatterError.newml12FormatterError(ex, subscriber)
def format(self, article, subscriber): """ Constructs a dictionary that represents the parameters passed to the SMS InsertAlerts stored procedure :return: returns the sequence number of the subscriber and the constructed parameter dictionary """ try: pub_seq_num = superdesk.get_resource_service( 'subscribers').generate_sequence_number(subscriber) odbc_item = { 'Sequence': pub_seq_num, 'Category': article.get('anpa_category', [{}])[0].get('qcode'), 'Headline': article.get('headline', '').replace('\'', '\'\''), 'Priority': map_priority(article.get('priority')) } if article.get(EMBARGO): embargo = '{}{}'.format('Embargo Content. Timestamp: ', article.get(EMBARGO).isoformat()) article['body_html'] = embargo + article['body_html'] if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED: odbc_item['StoryText'] = article.get('body_html', '').replace( '\'', '\'\'') # @article_text elif article[ITEM_TYPE] == CONTENT_TYPE.TEXT: soup = BeautifulSoup(article.get('body_html', '')) odbc_item['StoryText'] = soup.text.replace('\'', '\'\'') odbc_item['ident'] = '0' return [(pub_seq_num, odbc_item)] except Exception as ex: raise FormatterError.AAPSMSFormatterError(ex, subscriber)
def format(self, article, subscriber): try: pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) ninjs = { '_id': article['_id'], 'version': str(article['_current_version']), 'type': self._get_type(article) } try: ninjs['byline'] = self._get_byline(article) except: pass for copy_property in self.direct_copy_properties: if copy_property in article: ninjs[copy_property] = article[copy_property] if 'description' in article: ninjs['description_text'] = article['description'] if article['type'] == 'composite': ninjs['associations'] = self._get_associations(article) return [(pub_seq_num, json.dumps(ninjs, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.ninjsFormatterError(ex, subscriber)
def format(self, article, subscriber): """ Formats the article as require by the subscriber :param dict article: article to be formatted :param dict subscriber: subscriber receiving the article :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string """ try: pub_seq_num = superdesk.get_resource_service( 'subscribers').generate_sequence_number(subscriber) body_html = article.get('body_html', '').strip('\r\n') soup = BeautifulSoup(body_html) for br in soup.find_all('br'): # remove the <br> tag br.replace_with(' {}'.format(br.get_text())) for p in soup.find_all('p'): # replace <p> tag with two carriage return p.replace_with('{}\r\n\r\n'.format(p.get_text())) article['body_text'] = soup.get_text() # get the first category and derive the locator category = next((iter(article.get('anpa_category', []))), None) if category: locator = LocatorMapper().map(article, category.get('qcode').upper()) if locator: article['place'] = [{'qcode': locator, 'name': locator}] return [(pub_seq_num, superdesk.json.dumps( article, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): """ Create article in NewsML1.2 format :param dict article: :param dict subscriber: :param list codes: :return [(int, str)]: return a List of tuples. A tuple consist of publish sequence number and formatted article string. :raises FormatterError: if the formatter fails to format an article """ try: formatted_article = deepcopy(article) pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) self.now = utcnow() self.string_now = self.now.strftime('%Y%m%dT%H%M%S+0000') newsml = etree.Element("NewsML", {'Version': '1.2'}) SubElement(newsml, "Catalog", { 'Href': 'http://about.reuters.com/newsml/vocabulary/catalog-reuters-3rdParty-master_catalog.xml'}) news_envelope = SubElement(newsml, "NewsEnvelope") news_item = SubElement(newsml, "NewsItem") self._format_news_envelope(formatted_article, news_envelope, pub_seq_num) self._format_identification(formatted_article, news_item) self._format_news_management(formatted_article, news_item) self._format_news_component(formatted_article, news_item) return [(pub_seq_num, self.XML_ROOT + etree.tostring(newsml).decode('utf-8'))] except Exception as ex: raise FormatterError.newml12FormatterError(ex, subscriber)
def format(self, article, subscriber): try: pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) ninjs = { '_id': article['_id'], 'version': str(article['_current_version']), 'type': self._get_type(article) } try: ninjs['byline'] = self._get_byline(article) except: pass located = article.get('dateline', {}).get('located', {}).get('city') if located: ninjs['located'] = article.get('dateline', {}).get('located', {}).get('city', '') for copy_property in self.direct_copy_properties: if copy_property in article: ninjs[copy_property] = article[copy_property] if 'description' in article: ninjs['description_text'] = article['description'] if article[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: ninjs['associations'] = self._get_associations(article) if article.get(EMBARGO): ninjs['embargoed'] = article.get(EMBARGO).isoformat() return [(pub_seq_num, json.dumps(ninjs, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.ninjsFormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): """Create article in NewsML G2 format :param dict article: :param dict subscriber: :param list codes: selector codes :return [(int, str)]: return a List of tuples. A tuple consist of publish sequence number and formatted article string. :raises FormatterError: if the formatter fails to format an article """ try: self.subscriber = subscriber pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) is_package = self._is_package(article) news_message = etree.Element('newsMessage', attrib=self._debug_message_extra, nsmap=self._message_nsmap) self._format_header(article, news_message, pub_seq_num) item_set = self._format_item(news_message) if is_package: item = self._format_item_set(article, item_set, 'packageItem') self._format_groupset(article, item) elif article[ITEM_TYPE] in {CONTENT_TYPE.PICTURE, CONTENT_TYPE.AUDIO, CONTENT_TYPE.VIDEO}: item = self._format_item_set(article, item_set, 'newsItem') self._format_contentset(article, item) else: nitfFormater = NITFFormatter() nitf = nitfFormater.get_nitf(article, subscriber, pub_seq_num) newsItem = self._format_item_set(article, item_set, 'newsItem') self._format_content(article, newsItem, nitf) sd_etree.fix_html_void_elements(news_message) return [(pub_seq_num, self.XML_ROOT + etree.tostring(news_message, pretty_print=True).decode('utf-8'))] except Exception as ex: raise FormatterError.newmsmlG2FormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): """ Create article in NewsML1.2 format :param dict article: :param dict subscriber: :param list codes: :return [(int, str)]: return a List of tuples. A tuple consist of publish sequence number and formatted article string. :raises FormatterError: if the formatter fails to format an article """ try: formatted_article = deepcopy(article) pub_seq_num = superdesk.get_resource_service( 'subscribers').generate_sequence_number(subscriber) newsml = etree.Element("NewsML", {'Version': '1.2'}) SubElement( newsml, "Catalog", { 'Href': 'http://about.reuters.com/newsml/vocabulary/catalog-reuters-3rdParty-master_catalog.xml' }) news_envelope = SubElement(newsml, "NewsEnvelope") news_item = SubElement(newsml, "NewsItem") self._format_news_envelope(formatted_article, news_envelope, pub_seq_num) self._format_identification(formatted_article, news_item) self._format_news_management(formatted_article, news_item) self._format_news_component(formatted_article, news_item) return [(pub_seq_num, self.XML_ROOT + etree.tostring(newsml).decode('utf-8'))] except Exception as ex: raise FormatterError.newml12FormatterError(ex, subscriber)
def format(self, article, subscriber): """ Formats the article as require by the subscriber :param dict article: article to be formatted :param dict subscriber: subscriber receiving the article :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string """ try: pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) body_html = article.get('body_html', '').strip('\r\n') soup = BeautifulSoup(body_html) for br in soup.find_all('br'): # remove the <br> tag br.replace_with(' {}'.format(br.get_text())) for p in soup.find_all('p'): # replace <p> tag with two carriage return p.replace_with('{}\r\n\r\n'.format(p.get_text())) article['body_text'] = soup.get_text() # get the first category and derive the locator category = next((iter(article.get('anpa_category', []))), None) if category: locator = LocatorMapper().map(article, category.get('qcode').upper()) if locator: article['place'] = [{'qcode': locator, 'name': locator}] return [(pub_seq_num, superdesk.json.dumps(article, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): formatted_article = deepcopy(article) pub_seq_num = superdesk.get_resource_service( 'subscribers').generate_sequence_number(subscriber) doc = {} try: # If there is a dateline inject it into the body if formatted_article.get( FORMAT) == FORMATS.HTML and formatted_article.get( 'dateline', {}).get('text'): soup = BeautifulSoup(formatted_article.get('body_html'), "html.parser") ptag = soup.find('p') if ptag is not None: ptag.insert( 0, NavigableString('{} '.format( formatted_article.get('dateline').get('text')))) formatted_article['body_html'] = str(soup) doc['message_html'] = render_template( 'email_article_body.html', article=formatted_article) else: doc['message_html'] = None doc['message_text'] = render_template('email_article_body.txt', article=formatted_article) doc['message_subject'] = render_template( 'email_article_subject.txt', article=formatted_article) except Exception as ex: raise FormatterError.EmailFormatterError(ex, FormatterError) return [(pub_seq_num, json.dumps(doc))]
def format(self, original_article, subscriber, codes=None, encoding="us-ascii"): article = deepcopy(original_article) self._populate_metadata(article) global tz if tz is None: # first time this method is launched # we set timezone and NTB specific filter tz = pytz.timezone(superdesk.app.config['DEFAULT_TIMEZONE']) try: if article.get('body_html'): article['body_html'] = article['body_html'].replace('<br>', '<br />') pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) nitf = self.get_nitf(article, subscriber, pub_seq_num) try: nitf.attrib['baselang'] = _get_language(article) except KeyError: pass encoded = etree.tostring(nitf, encoding=ENCODING, xml_declaration=False, pretty_print=True) return [{'published_seq_num': pub_seq_num, # formatted_item can be used for preview, so we keep unicode version there 'formatted_item': self.XML_DECLARATION + '\n' + etree.tostring(nitf, encoding="unicode"), 'encoded_item': (self.XML_DECLARATION + '\n').encode(ENCODING) + encoded}] except Exception as ex: app.sentry.captureException() raise FormatterError.nitfFormatterError(ex, subscriber)
def format(self, article, subscriber): try: pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) ninjs = { '_id': article['_id'], 'version': str(article.get(config.VERSION, 1)), 'type': self._get_type(article) } try: ninjs['byline'] = self._get_byline(article) except: pass located = article.get('dateline', {}).get('located', {}) if located: ninjs['located'] = located.get('city', '') for copy_property in self.direct_copy_properties: if article.get(copy_property) is not None: ninjs[copy_property] = article[copy_property] if article.get('body_html'): ninjs['body_html'] = self.append_body_footer(article) if article.get('description'): ninjs['description_html'] = self.append_body_footer(article) if article[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: ninjs['associations'] = self._get_associations(article) elif article.get('associations', {}): ninjs['associations'] = self._format_related(article, subscriber) if article.get(EMBARGO): ninjs['embargoed'] = article.get(EMBARGO).isoformat() if article.get('priority'): ninjs['priority'] = article['priority'] else: ninjs['priority'] = 5 if article.get('subject'): ninjs['subject'] = self._get_subject(article) if article.get('anpa_category'): ninjs['service'] = self._get_service(article) if article.get('renditions'): ninjs['renditions'] = self._get_renditions(article) if article.get('abstract'): ninjs['description_text'] = article.get('abstract') elif article.get('description_text'): ninjs['description_text'] = article.get('description_text') return [(pub_seq_num, json.dumps(ninjs, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.ninjsFormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): try: pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) ninjs = self._transform_to_ninjs(article, subscriber) return [(pub_seq_num, json.dumps(ninjs, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.ninjsFormatterError(ex, subscriber)
def format(self, article, subscriber): try: pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) nitf = self.get_nitf(article, subscriber, pub_seq_num) return [(pub_seq_num, self.XML_ROOT + etree.tostring(nitf).decode('utf-8'))] except Exception as ex: raise FormatterError.nitfFormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): try: pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) nitf = self.get_nitf(article, subscriber, pub_seq_num) return [(pub_seq_num, self.XML_ROOT + etree.tostring(nitf).decode('utf-8'))] except Exception as ex: raise FormatterError.nitfFormatterError(ex, subscriber)
def format(self, article, subscriber): try: pub_seq_num = superdesk.get_resource_service( 'subscribers').generate_sequence_number(subscriber) ninjs = { '_id': article['_id'], 'version': str(article.get(config.VERSION, 1)), 'type': self._get_type(article) } try: ninjs['byline'] = self._get_byline(article) except: pass located = article.get('dateline', {}).get('located', {}) if located: ninjs['located'] = located.get('city', '') for copy_property in self.direct_copy_properties: if article.get(copy_property) is not None: ninjs[copy_property] = article[copy_property] if article.get('body_html'): ninjs['body_html'] = self.append_body_footer(article) if article.get('description'): ninjs['description_html'] = self.append_body_footer(article) if article[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: ninjs['associations'] = self._get_associations(article) elif article.get('associations', {}): ninjs['associations'] = self._format_related( article, subscriber) if article.get(EMBARGO): ninjs['embargoed'] = article.get(EMBARGO).isoformat() if article.get('priority'): ninjs['priority'] = article['priority'] else: ninjs['priority'] = 5 if article.get('subject'): ninjs['subject'] = self._get_subject(article) if article.get('anpa_category'): ninjs['service'] = self._get_service(article) if article.get('renditions'): ninjs['renditions'] = self._get_renditions(article) return [(pub_seq_num, json.dumps(ninjs, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.ninjsFormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): try: pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) nitf = self.get_nitf(article, subscriber, pub_seq_num) return [{'published_seq_num': pub_seq_num, 'formatted_item': etree.tostring(nitf, encoding='ascii').decode('ascii'), 'item_encoding': 'ascii'}] except Exception as ex: raise FormatterError.nitfFormatterError(ex, subscriber)
def format(self, article, destination, selector_codes=None): try: pub_seq_num = superdesk.get_resource_service('output_channels').generate_sequence_number(destination) nitf = self.get_nitf(article, destination, pub_seq_num) return pub_seq_num, self.XML_ROOT + etree.tostring(nitf).decode('utf-8') except Exception as ex: raise FormatterError.nitfFormatterError(ex, destination)
def format(self, article, subscriber): """ Formats the article as require by the subscriber :param dict article: article to be formatted :param dict subscriber: subscriber receiving the article :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string """ try: article['slugline'] = self.append_legal(article=article, truncate=True) pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) body_html = self.append_body_footer(article).strip('\r\n') soup = BeautifulSoup(body_html, 'html.parser') if not len(soup.find_all('p')): for br in soup.find_all('br'): # remove the <br> tag br.replace_with(' {}'.format(br.get_text())) for p in soup.find_all('p'): # replace <p> tag with two carriage return for br in p.find_all('br'): # remove the <br> tag br.replace_with(' {}'.format(br.get_text())) para_text = p.get_text().strip() if para_text != '': p.replace_with('{}\r\n\r\n'.format(para_text)) else: p.replace_with('') article['body_text'] = re.sub(' +', ' ', soup.get_text()) # get the first category and derive the locator category = next((iter(article.get('anpa_category', []))), None) if category: locator = LocatorMapper().map(article, category.get('qcode').upper()) if locator: article['place'] = [{'qcode': locator, 'name': locator}] article['first_category'] = category article['first_subject'] = set_subject(category, article) odbc_item = { 'id': article.get(config.ID_FIELD), 'version': article.get(config.VERSION), ITEM_TYPE: article.get(ITEM_TYPE), PACKAGE_TYPE: article.get(PACKAGE_TYPE, ''), 'headline': article.get('headline', '').replace('\'', '\'\''), 'slugline': article.get('slugline', '').replace('\'', '\'\''), 'data': superdesk.json.dumps(article, default=json_serialize_datetime_objectId).replace('\'', '\'\'') } return [(pub_seq_num, json.dumps(odbc_item, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): try: pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) nitf = self.get_nitf(article, subscriber, pub_seq_num) return [(pub_seq_num, self.XML_ROOT + etree.tostring( nitf, pretty_print=True, encoding=self.ENCODING ).decode(self.ENCODING))] except Exception as ex: raise FormatterError.nitfFormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): try: publish_seq_num = superdesk.get_resource_service( "subscribers").generate_sequence_number(subscriber) idml_bytes = Converter().create_idml(article) except Exception as e: raise FormatterError.IDMLFormatterError(e, subscriber) return [{ "published_seq_num": publish_seq_num, "encoded_item": idml_bytes, "formatted_item": "", }]
def format_for_source(self, article, subscriber, source, codes=None): try: pass_through = article.get('auto_publish', False) docs = [] for category in self._get_category_list( article.get('anpa_category')): article['source'] = source pub_seq_num, odbc_item = self.get_odbc_item( article, subscriber, category, codes, pass_through) is_last_take = self.is_last_take(article) if article.get(FORMAT) == FORMATS.PRESERVED: # @article_text soup = BeautifulSoup( self.append_body_footer(article) if is_last_take else article.get('body_html', ''), "html.parser") odbc_item['article_text'] = soup.get_text().replace( '\'', '\'\'') else: body = self.get_text_content( to_ascii( self.append_body_footer(article) if is_last_take else article.get('body_html', ''))) if self.is_first_part(article) and 'dateline' in article \ and 'text' in article.get('dateline', {}) and not pass_through: if body.startswith(' '): body = ' {} {}'.format( article.get('dateline').get('text'), body[3:]) odbc_item['article_text'] = body.replace('\'', '\'\'') if self.is_first_part(article) and not pass_through: self.add_ednote(odbc_item, article) self.add_embargo(odbc_item, article) self.add_byline(odbc_item, article) if not is_last_take: odbc_item['article_text'] += '\r\nMORE' else: odbc_item['article_text'] += '\r\n' + source sign_off = article.get('sign_off', '') or '' if len(sign_off) > 0: odbc_item['article_text'] += ' ' + sign_off odbc_item['category'] = odbc_item.get('category', '').upper() odbc_item['selector_codes'] = odbc_item.get( 'selector_codes', '').upper() docs.append((pub_seq_num, json.dumps(odbc_item))) return docs except Exception as ex: raise FormatterError.AAPNewscentreFormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): try: pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) nitf = self.get_nitf(article, subscriber, pub_seq_num) strip_elements(nitf, 'body.end') nitf_string = etree.tostring(nitf, encoding='utf-8').decode() headers = ['<?xml version=\"1.0\" encoding=\"UTF-8\"?>', '<!-- <!DOCTYPE nitf SYSTEM \"./nitf-3-3.dtd\"> -->'] return [{ 'published_seq_num': pub_seq_num, 'formatted_item': '{}\r\n{}'.format("\r\n".join(headers), nitf_string). replace(' \n', self.line_ender)}] except Exception as ex: raise FormatterError.nitfFormatterError(ex, subscriber)
def format(self, article, destination): try: nitf = etree.Element("nitf") head = SubElement(nitf, "head") body = SubElement(nitf, "body") body_head = SubElement(body, "body.head") body_content = SubElement(body, "body.content") body_content.text = article['body_html'] body_end = SubElement(body, "body.end") etree.Element('doc-id', attrib={'id-string': article['guid']}) self.__format_head(article, head) self.__format_body_head(article, body_head) self.__format_body_end(article, body_end) return self.XML_ROOT + str(etree.tostring(nitf)) except Exception as ex: raise FormatterError.nitfFormatterError(ex, destination)
def format_for_source(self, article, subscriber, source, codes=None): """Constructs a dictionary that represents the parameters passed to the IPNews InsertNews stored procedure :type article: object :return: returns the sequence number of the subscriber and the constructed parameter dictionary """ pass_through = article.get('auto_publish', False) try: docs = [] for category in self._get_category_list(article.get('anpa_category')): # All NZN sourced content is AAP content for the AAP output formatted article['source'] = source pub_seq_num, odbc_item = self.get_odbc_item(article, subscriber, category, codes, pass_through) if article.get(FORMAT) == FORMATS.PRESERVED: # @article_text body = get_text(self.append_body_footer(article)) odbc_item['article_text'] = body.replace('\'', '\'\'') odbc_item['texttab'] = 't' elif article.get(FORMAT, FORMATS.HTML) == FORMATS.HTML: body = self.get_wrapped_text_content( to_ascii(self.append_body_footer(article))).replace('\'', '\'\'') # if we have a dateline inject it if 'dateline' in article and 'text' in article.get('dateline', {}) and not pass_through: if body.startswith(' '): body = ' {} {}'.format(article.get('dateline') .get('text').replace('\'', '\'\''), body[3:]) odbc_item['article_text'] = body odbc_item['texttab'] = 'x' if not pass_through: self.add_ednote(odbc_item, article) self.add_byline(odbc_item, article) odbc_item['article_text'] += '\r\n' + article.get('source', '') sign_off = article.get('sign_off', '') or '' if len(sign_off) > 0: odbc_item['article_text'] += ' ' + sign_off odbc_item['service_level'] = get_service_level(category, article) # @service_level odbc_item['wordcount'] = article.get('word_count') or 0 # @wordcount odbc_item['priority'] = map_priority(article.get('priority')) # @priority docs.append((pub_seq_num, json.dumps(odbc_item))) return docs except Exception as ex: raise FormatterError.AAPIpNewsFormatterError(ex, subscriber)
def format(self, article, subscriber): try: pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) newsml = etree.Element("NewsML") SubElement(newsml, "Catalog", {'Href': 'http://www.aap.com.au/xml-res/aap-master-catalog.xml'}) news_envelope = SubElement(newsml, "NewsEnvelope") news_item = SubElement(newsml, "NewsItem") self._format_news_envelope(article, news_envelope, pub_seq_num) self._format_identification(article, news_item) self._format_news_management(article, news_item) self._format_news_component(article, news_item) return [(pub_seq_num, self.XML_ROOT + etree.tostring(newsml).decode('utf-8'))] except Exception as ex: raise FormatterError.newml12FormatterError(ex, subscriber)
def format(self, article, destination, selector_codes=None): try: pub_seq_num = superdesk.get_resource_service('output_channels').generate_sequence_number(destination) nitfFormater = NITFFormatter() nitf = nitfFormater.get_nitf(article, destination, pub_seq_num) self._message_attrib.update(self._debug_message_extra) newsMessage = etree.Element('newsMessage', attrib=self._message_attrib) self._format_header(article, newsMessage, pub_seq_num) itemSet = self._format_item(newsMessage) if article['type'] == 'text' or article['type'] == 'preformatted': self._format_newsitem(article, itemSet, nitf) return pub_seq_num, self.XML_ROOT + etree.tostring(newsMessage).decode('utf-8') except Exception as ex: raise FormatterError.newmsmlG2FormatterError(ex, destination)
def format(self, article, subscriber, codes=None): formatted_article = deepcopy(article) pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) doc = {} try: if formatted_article.get(FORMAT) == FORMATS.HTML: if formatted_article.get('dateline', {}).get('text'): # If there is a dateline inject it into the body self._inject_dateline(formatted_article) doc['message_html'] = render_template('email_article_body.html', article=formatted_article) else: doc['message_html'] = None doc['message_text'] = render_template('email_article_body.txt', article=formatted_article) doc['message_subject'] = render_template('email_article_subject.txt', article=formatted_article) except Exception as ex: raise FormatterError.EmailFormatterError(ex, FormatterError) return [(pub_seq_num, json.dumps(doc))]
def format(self, article, subscriber): try: pub_seq_num = superdesk.get_resource_service("subscribers").generate_sequence_number(subscriber) body_html = article.get("body_html", "").strip("\r\n") soup = BeautifulSoup(body_html) for br in soup.find_all("br"): # remove the <br> tag br.replace_with(" {}".format(br.get_text())) for p in soup.find_all("p"): # replace <p> tag with two carriage return p.replace_with("{}\r\n\r\n".format(p.get_text())) article["body_text"] = soup.get_text() return [(pub_seq_num, superdesk.json.dumps(article, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
def format(self, article, destination, selector_codes=None): try: pub_seq_num = superdesk.get_resource_service("output_channels").generate_sequence_number(destination) newsml = etree.Element("NewsML") SubElement(newsml, "Catalog", {"Href": "http://www.aap.com.au/xml-res/aap-master-catalog.xml"}) news_envelope = SubElement(newsml, "NewsEnvelope") news_item = SubElement(newsml, "NewsItem") self._format_news_envelope(article, news_envelope, pub_seq_num) self._format_identification(article, news_item) self._format_news_management(article, news_item) self._format_news_component(article, news_item) return pub_seq_num, self.XML_ROOT + etree.tostring(newsml).decode("utf-8") except Exception as ex: raise FormatterError.newml12FormatterError(ex, destination)
def format(self, article, subscriber, codes=None): try: pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) ninjs = self._transform_to_ninjs(self._merge_versions(article), subscriber) ninjs['extra'] = {'published_id': article.get('_id')} # If the item was ingested and auto-published, the guid is set to the ingest_id # which in FileFeeds will be the path to the file that was ingested # [STTNHUB-58] - Auto published ingested items should preserve id # (https://github.com/superdesk/superdesk-core/pull/1579) # Change the guid back to using the family_id of the item if (ninjs.get(GUID_FIELD) or '').startswith('/mnt/'): ninjs[GUID_FIELD] = article.get(FAMILY_ID) return [(pub_seq_num, json.dumps(ninjs, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.ninjsFormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): try: pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) ninjs = self._transform_to_ninjs(article, subscriber) # if the article has an abstract then the description text has been over written by the abstract if article.get('abstract'): # if it is a picture then put it back if article.get('type') == 'picture': ninjs['description_text'] = article.get('description_text', '') media = article.get('associations', {}).get('featuremedia') ninjs_media = article.get('associations', {}).get('featuremedia') if media and media.get('type') == 'picture': ninjs_media['description_text'] = media.get('description_text') return [(pub_seq_num, json.dumps(ninjs, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.ninjsFormatterError(ex, subscriber)
def format(self, article, destination, selector_codes=None): try: pub_seq_num = superdesk.get_resource_service('output_channels').generate_sequence_number(destination) ninjs = {} ninjs['_id'] = article['_id'] ninjs['version'] = str(article['version']) ninjs['type'] = self._get_type(article) try: ninjs['byline'] = self._get_byline(article) except: pass for copy_property in self.direct_copy_properties: if copy_property in article: ninjs[copy_property] = article[copy_property] if article['type'] == 'composite': article['associations'] = self._get_associations(article) return pub_seq_num, json.dumps(ninjs, default=json_util.default) except Exception as ex: raise FormatterError.ninjsFormatterError(ex, destination)
def format(self, article, subscriber, codes=None): """ Constructs a dictionary that represents the parameters passed to the SMS InsertAlerts stored procedure :return: returns the sequence number of the subscriber and the constructed parameter dictionary """ try: pub_seq_num = superdesk.get_resource_service( 'subscribers').generate_sequence_number(subscriber) sms_message = article.get('sms_message', article.get('abstract', '')).replace('\'', '\'\'') # category = 1 is used to indicate a test message category = '1' if superdesk.app.config.get('TEST_SMS_OUTPUT', True) is True \ else article.get('anpa_category', [{}])[0].get('qcode').upper() odbc_item = { 'Sequence': pub_seq_num, 'Category': category, 'Headline': BeautifulSoup(sms_message, 'html.parser').text, 'Priority': map_priority(article.get('priority')) } body = self.append_body_footer(article) if article.get(EMBARGO): embargo = '{}{}'.format( 'Embargo Content. Timestamp: ', get_utc_schedule(article, EMBARGO).isoformat()) body = embargo + body if article[ITEM_TYPE] == CONTENT_TYPE.TEXT: body = BeautifulSoup(body, "html.parser").text odbc_item['StoryText'] = body.replace('\'', '\'\'') # @article_text odbc_item['ident'] = '0' return [(pub_seq_num, json.dumps(odbc_item))] except Exception as ex: raise FormatterError.AAPSMSFormatterError(ex, subscriber)
def format(self, article, subscriber): try: pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) is_package = self._is_package(article) self._message_attrib.update(self._debug_message_extra) newsMessage = etree.Element('newsMessage', attrib=self._message_attrib) self._format_header(article, newsMessage, pub_seq_num) itemSet = self._format_item(newsMessage) if is_package: item = self._format_item_set(article, itemSet, 'packageItem') self._format_groupset(article, item) elif article[ITEM_TYPE] == CONTENT_TYPE.PICTURE: item = self._format_item_set(article, itemSet, 'newsItem') self._format_contentset(article, item) else: nitfFormater = NITFFormatter() nitf = nitfFormater.get_nitf(article, subscriber, pub_seq_num) newsItem = self._format_item_set(article, itemSet, 'newsItem') self._format_content(article, newsItem, nitf) return [(pub_seq_num, self.XML_ROOT + etree.tostring(newsMessage).decode('utf-8'))] except Exception as ex: raise FormatterError.newmsmlG2FormatterError(ex, subscriber)
def format(self, article, destination, selector_codes=None): try: pub_seq_num = superdesk.get_resource_service('output_channels').generate_sequence_number(destination) nitf = etree.Element("nitf") head = SubElement(nitf, "head") body = SubElement(nitf, "body") body_head = SubElement(body, "body.head") body_content = SubElement(body, "body.content") body_content.text = article['body_html'] body_end = SubElement(body, "body.end") etree.Element('doc-id', attrib={'id-string': article['guid']}) self.__append_meta(article, head, destination, pub_seq_num) self.__format_head(article, head) self.__format_body_head(article, body_head) self.__format_body_end(article, body_end) return pub_seq_num, self.XML_ROOT + str(etree.tostring(nitf)) except Exception as ex: raise FormatterError.nitfFormatterError(ex, destination)
def format(self, article, destination): try: pub_seq_num = self.generate_sequence_number(destination) nitf = etree.Element("nitf") head = SubElement(nitf, "head") body = SubElement(nitf, "body") body_head = SubElement(body, "body.head") body_content = SubElement(body, "body.content") body_content.text = article['body_html'] body_end = SubElement(body, "body.end") etree.Element('doc-id', attrib={'id-string': article['guid']}) self.__append_meta(article, head, destination, pub_seq_num) self.__format_head(article, head) self.__format_body_head(article, body_head) self.__format_body_end(article, body_end) return pub_seq_num, self.XML_ROOT + str(etree.tostring(nitf)) except Exception as ex: raise FormatterError.nitfFormatterError(ex, destination)
def format(self, article, subscriber, codes=None): formatted_article = deepcopy(article) pub_seq_num = superdesk.get_resource_service( "subscribers").generate_sequence_number(subscriber) doc = {} try: if formatted_article.get(FORMAT) == FORMATS.HTML: if formatted_article.get("dateline", {}).get("text"): # If there is a dateline inject it into the body self._inject_dateline(formatted_article) doc["message_html"] = render_template( "email_article_body.html", article=formatted_article) else: doc["message_html"] = None doc["message_text"] = render_template("email_article_body.txt", article=formatted_article) doc["message_subject"] = render_template( "email_article_subject.txt", article=formatted_article) doc["renditions"] = ((formatted_article.get("associations", {}) or {}).get("featuremedia", {}) or {}).get("renditions") except Exception as ex: raise FormatterError.EmailFormatterError(ex, FormatterError) return [(pub_seq_num, json.dumps(doc))]
def format(self, article, subscriber, codes=None): try: pub_seq_num = superdesk.get_resource_service( 'subscribers').generate_sequence_number(subscriber) ninjs = self._transform_to_ninjs(article, subscriber) # if the article has an abstract then the description text has been over written by the abstract if article.get('abstract'): # if it is a picture then put it back if article.get('type') == 'picture': ninjs['description_text'] = article.get( 'description_text', '') media = article.get('associations', {}).get('featuremedia') ninjs_media = article.get('associations', {}).get('featuremedia') if media and media.get('type') == 'picture': ninjs_media['description_text'] = media.get('description_text') return [(pub_seq_num, json.dumps(ninjs, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.ninjsFormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): """ Formats the article as require by the subscriber :param dict article: article to be formatted :param dict subscriber: subscriber receiving the article :param list codes: selector codes :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string """ try: formatted_article = deepcopy(article) pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) body_html = to_ascii(self.append_body_footer(formatted_article)).strip('\r\n') # get the desk name desk_name = superdesk.get_resource_service('desks').\ get_desk_name(formatted_article.get('task', {}).get('desk')) # force the content to source 'NZN' if desk is 'NZN' if 'new zealand' in desk_name.lower().strip(): formatted_article['source'] = 'NZN' # this is temporary fix for bulletin builder formatter if formatted_article.get(ITEM_STATE, '') == CONTENT_STATE.SCHEDULED: formatted_article['versioncreated'] = utcnow() formatted_article['body_text'] = self.get_text_content(body_html) formatted_article['abstract'] = self.get_text_content( to_ascii(formatted_article.get('abstract', '') or '')).strip() formatted_article['headline'] = self.get_text_content( to_ascii(formatted_article.get('headline', ''))).strip() formatted_article['byline'] = self.get_text_content( to_ascii(formatted_article.get('byline', '') or '')).strip() if len(formatted_article.get('anpa_category') or []) > 1: formatted_article['anpa_category'] = [cat for cat in (formatted_article.get('anpa_category') or []) if cat.get('qcode') != 'c'] self._handle_auto_publish(formatted_article) # get the first category and derive the locator category = next((iter((formatted_article.get('anpa_category') or []))), None) if category: locator = LocatorMapper().map(formatted_article, category.get('qcode').upper()) if locator: formatted_article['place'] = [{'qcode': locator, 'name': locator}] formatted_article['first_category'] = category formatted_article['first_subject'] = set_subject(category, formatted_article) formatted_article['slugline'] = self.get_text_content( to_ascii(SluglineMapper().map(article=formatted_article, category=category.get('qcode').upper(), truncate=(not formatted_article.get('auto_publish')))).strip()) self.format_associated_item(formatted_article) odbc_item = { 'id': formatted_article.get(config.ID_FIELD), 'version': formatted_article.get(config.VERSION), ITEM_TYPE: formatted_article.get(ITEM_TYPE), PACKAGE_TYPE: formatted_article.get(PACKAGE_TYPE, ''), 'headline': formatted_article.get('headline', '').replace('\'', '\'\''), 'slugline': formatted_article.get('slugline', '').replace('\'', '\'\''), 'data': superdesk.json.dumps(formatted_article, default=json_serialize_datetime_objectId).replace('\'', '\'\'') } return [(pub_seq_num, json.dumps(odbc_item, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
def format(self, article, subscriber): try: docs = [] for category in article.get('anpa_category'): pub_seq_num = superdesk.get_resource_service( 'subscribers').generate_sequence_number(subscriber) anpa = [] # selector codes are only injected for those subscribers that are defined # in the mapper selectors = dict() SelectorcodeMapper().map(article, category.get('qcode').upper(), subscriber=subscriber, formatted_item=selectors) if 'selector_codes' in selectors and selectors[ 'selector_codes']: anpa.append(b'\x05') anpa.append(selectors['selector_codes'].encode('ascii')) anpa.append(b'\x0D\x0A') # start of message header (syn syn soh) anpa.append(b'\x16\x16\x01') anpa.append( article.get('service_level', 'a').lower().encode('ascii')) # story number anpa.append(str(pub_seq_num).zfill(4).encode('ascii')) # field seperator anpa.append(b'\x0A') # -LF anpa.append( map_priority(article.get('priority')).encode('ascii')) anpa.append(b'\x20') anpa.append(category['qcode'].encode('ascii')) anpa.append(b'\x13') # format identifier if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED: anpa.append(b'\x12') else: anpa.append(b'\x11') anpa.append(b'\x20') # keyword keyword = 'bc-{}'.format( self.append_legal(article=article, truncate=True)).replace(' ', '-') keyword = keyword[:24] if len(keyword) > 24 else keyword anpa.append(keyword.encode('ascii')) anpa.append(b'\x20') # version field anpa.append(b'\x20') # reference field anpa.append(b'\x20') # filing date anpa.append('{}-{}'.format( article['_updated'].strftime('%m'), article['_updated'].strftime('%d')).encode('ascii')) anpa.append(b'\x20') # add the word count anpa.append( str(article.get('word_count', '0000')).zfill(4).encode('ascii')) anpa.append(b'\x0D\x0A') anpa.append(b'\x02') # STX self._process_headline(anpa, article, category['qcode'].encode('ascii')) keyword = self.append_legal(article=article, truncate=True).encode( 'ascii', 'ignore') anpa.append(keyword) take_key = article.get('anpa_take_key', '').encode('ascii', 'ignore') anpa.append((b'\x20' + take_key) if len(take_key) > 0 else b'') anpa.append(b'\x0D\x0A') if BYLINE in article: anpa.append(article.get(BYLINE).encode('ascii', 'ignore')) anpa.append(b'\x0D\x0A') if article.get('dateline', {}).get('text'): anpa.append( article.get('dateline').get('text').encode( 'ascii', 'ignore')) body = self.append_body_footer(article) if article.get(EMBARGO): embargo = '{}{}'.format( 'Embargo Content. Timestamp: ', get_utc_schedule(article, EMBARGO).isoformat()) body = embargo + body if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED: anpa.append(body.encode('ascii', 'replace')) else: anpa.append( BeautifulSoup(body, "html.parser").text.encode( 'ascii', 'replace')) anpa.append(b'\x0D\x0A') if article.get('more_coming', False): anpa.append('MORE'.encode('ascii')) else: anpa.append(article.get('source', '').encode('ascii')) sign_off = article.get('sign_off', '').encode('ascii') anpa.append((b'\x20' + sign_off) if len(sign_off) > 0 else b'') anpa.append(b'\x0D\x0A') anpa.append(b'\x03') # ETX # time and date anpa.append(datetime.datetime.now().strftime( '%d-%m-%y %H-%M-%S').encode('ascii')) anpa.append(b'\x04') # EOT anpa.append( b'\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A' ) docs.append((pub_seq_num, b''.join(anpa))) return docs except Exception as ex: raise FormatterError.AnpaFormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): try: docs = [] formatted_article = deepcopy(article) for category in self._get_category_list( formatted_article.get('anpa_category')): mapped_source = self._get_mapped_source(formatted_article) formatted_article[config.ID_FIELD] = formatted_article.get( 'item_id', formatted_article.get(config.ID_FIELD)) pub_seq_num = superdesk.get_resource_service( 'subscribers').generate_sequence_number(subscriber) anpa = [] if codes: anpa.append(b'\x05') anpa.append(' '.join(codes).encode('ascii')) anpa.append(b'\x0D\x0A') # start of message header (syn syn soh) anpa.append(b'\x16\x16\x01') anpa.append( get_service_level(category, formatted_article).encode('ascii')) # story number anpa.append(str(pub_seq_num).zfill(4).encode('ascii')) # field seperator anpa.append(b'\x0A') # -LF anpa.append( map_priority( formatted_article.get('priority')).encode('ascii')) anpa.append(b'\x20') anpa.append(category['qcode'].lower().encode('ascii')) anpa.append(b'\x13') # format identifier if formatted_article.get(FORMAT, FORMATS.HTML) == FORMATS.PRESERVED: anpa.append(b'\x12') else: anpa.append(b'\x11') anpa.append(b'\x20') # keyword keyword = 'bc-{}'.format( self.append_legal(article=formatted_article, truncate=True)).replace(' ', '-') keyword = keyword[:24] if len(keyword) > 24 else keyword anpa.append(keyword.encode('ascii')) anpa.append(b'\x20') # version field anpa.append(b'\x20') # reference field anpa.append(b'\x20') # filing date anpa.append('{}-{}'.format( formatted_article['_updated'].strftime('%m'), formatted_article['_updated'].strftime('%d')).encode( 'ascii')) anpa.append(b'\x20') # add the word count anpa.append( str(formatted_article.get( 'word_count', '0000')).zfill(4).encode('ascii')) anpa.append(b'\x0D\x0A') anpa.append(b'\x02') # STX self._process_headline(anpa, formatted_article, category['qcode'].encode('ascii')) keyword = SluglineMapper().map( article=formatted_article, category=category['qcode'].upper(), truncate=True).encode('ascii', 'ignore') anpa.append(keyword) take_key = (formatted_article.get('anpa_take_key', '') or '').encode('ascii', 'ignore') anpa.append((b'\x20' + take_key) if len(take_key) > 0 else b'') anpa.append(b'\x0D\x0A') if formatted_article.get('ednote', '') != '': ednote = '{}\r\n'.format( to_ascii(formatted_article.get('ednote'))) anpa.append(ednote.encode('ascii', 'replace')) if formatted_article.get(BYLINE): anpa.append( get_text(formatted_article.get(BYLINE)).encode( 'ascii', 'replace')) anpa.append(b'\x0D\x0A') if formatted_article.get(FORMAT) == FORMATS.PRESERVED: anpa.append( get_text(self.append_body_footer(formatted_article), content='html').encode('ascii', 'replace')) else: body = to_ascii(formatted_article.get('body_html', '')) # we need to inject the dateline if formatted_article.get( 'dateline', {}).get('text') and not article.get( 'auto_publish', False): body_html_elem = parse_html( formatted_article.get('body_html')) ptag = body_html_elem.find('.//p') if ptag is not None: ptag.text = formatted_article['dateline'][ 'text'] + ' ' + (ptag.text or '') body = to_string(body_html_elem) anpa.append(self.get_text_content(body)) if formatted_article.get('body_footer'): anpa.append( self.get_text_content( to_ascii( formatted_article.get('body_footer', '')))) anpa.append(b'\x0D\x0A') anpa.append(mapped_source.encode('ascii')) sign_off = (formatted_article.get('sign_off', '') or '').encode('ascii') anpa.append((b'\x20' + sign_off) if len(sign_off) > 0 else b'') anpa.append(b'\x0D\x0A') anpa.append(b'\x03') # ETX # time and date anpa.append(datetime.datetime.now().strftime( '%d-%m-%y %H-%M-%S').encode('ascii')) anpa.append(b'\x04') # EOT anpa.append( b'\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A' ) docs.append({ 'published_seq_num': pub_seq_num, 'encoded_item': b''.join(anpa), 'formatted_item': b''.join(anpa).decode('ascii') }) return docs except Exception as ex: raise FormatterError.AnpaFormatterError(ex, subscriber)
def format(self, article, subscriber): """ Formats the article as require by the subscriber :param dict article: article to be formatted :param dict subscriber: subscriber receiving the article :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string """ try: article['slugline'] = self.append_legal(article=article, truncate=True) pub_seq_num = superdesk.get_resource_service( 'subscribers').generate_sequence_number(subscriber) body_html = self.append_body_footer(article).strip('\r\n') soup = BeautifulSoup(body_html, 'html.parser') if not len(soup.find_all('p')): for br in soup.find_all('br'): # remove the <br> tag br.replace_with(' {}'.format(br.get_text())) for p in soup.find_all('p'): # replace <p> tag with two carriage return for br in p.find_all('br'): # remove the <br> tag br.replace_with(' {}'.format(br.get_text())) para_text = p.get_text().strip() if para_text != '': p.replace_with('{}\r\n\r\n'.format(para_text)) else: p.replace_with('') article['body_text'] = re.sub(' +', ' ', soup.get_text()) # get the first category and derive the locator category = next((iter(article.get('anpa_category', []))), None) if category: locator = LocatorMapper().map(article, category.get('qcode').upper()) if locator: article['place'] = [{'qcode': locator, 'name': locator}] article['first_category'] = category article['first_subject'] = set_subject(category, article) odbc_item = { 'id': article.get(config.ID_FIELD), 'version': article.get(config.VERSION), ITEM_TYPE: article.get(ITEM_TYPE), PACKAGE_TYPE: article.get(PACKAGE_TYPE, ''), 'headline': article.get('headline', '').replace('\'', '\'\''), 'slugline': article.get('slugline', '').replace('\'', '\'\''), 'data': superdesk.json.dumps( article, default=json_serialize_datetime_objectId).replace( '\'', '\'\'') } return [(pub_seq_num, json.dumps(odbc_item, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): """ Formats the article as require by the subscriber :param dict article: article to be formatted :param dict subscriber: subscriber receiving the article :param list codes: selector codes :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string """ try: formatted_article = deepcopy(article) pub_seq_num = superdesk.get_resource_service( 'subscribers').generate_sequence_number(subscriber) body_html = to_ascii( self.append_body_footer(formatted_article)).strip('\r\n') # get the desk name desk_name = superdesk.get_resource_service('desks').\ get_desk_name(formatted_article.get('task', {}).get('desk')) # force the content to source 'NZN' if desk is 'NZN' if 'new zealand' in desk_name.lower().strip(): formatted_article['source'] = 'NZN' # this is temporary fix for bulletin builder formatter if formatted_article.get(ITEM_STATE, '') == CONTENT_STATE.SCHEDULED: formatted_article['versioncreated'] = utcnow() formatted_article['body_text'] = self.get_text_content(body_html) formatted_article['abstract'] = self.get_text_content( to_ascii(formatted_article.get('abstract', '') or '')).strip() formatted_article['headline'] = self.get_text_content( to_ascii(formatted_article.get('headline', ''))).strip() formatted_article['byline'] = self.get_text_content( to_ascii(formatted_article.get('byline', '') or '')).strip() if len(formatted_article.get('anpa_category') or []) > 1: formatted_article['anpa_category'] = [ cat for cat in (formatted_article.get('anpa_category') or []) if cat.get('qcode') != 'c' ] self._handle_auto_publish(formatted_article) # get the first category and derive the locator category = next((iter( (formatted_article.get('anpa_category') or []))), None) if category: locator = LocatorMapper().map(formatted_article, category.get('qcode').upper()) if locator: formatted_article['place'] = [{ 'qcode': locator, 'name': locator }] formatted_article['first_category'] = category formatted_article['first_subject'] = set_subject( category, formatted_article) formatted_article['slugline'] = self.get_text_content( to_ascii(SluglineMapper().map( article=formatted_article, category=category.get('qcode').upper(), truncate=(not formatted_article.get('auto_publish') )))).strip() self.format_associated_item(formatted_article) odbc_item = { 'id': formatted_article.get(config.ID_FIELD), 'version': formatted_article.get(config.VERSION), ITEM_TYPE: formatted_article.get(ITEM_TYPE), 'package_type': '', 'headline': formatted_article.get('headline', '').replace('\'', '\'\''), 'slugline': formatted_article.get('slugline', '').replace('\'', '\'\''), 'data': superdesk.json.dumps( formatted_article, default=json_serialize_datetime_objectId).replace( '\'', '\'\'') } return [(pub_seq_num, json.dumps(odbc_item, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)