def format(self, article, subscriber): """ Formats the article as require by the subscriber :param dict article: article to be formatted :param dict subscriber: subscriber receiving the article :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string """ try: pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) body_html = article.get('body_html', '').strip('\r\n') soup = BeautifulSoup(body_html) for br in soup.find_all('br'): # remove the <br> tag br.replace_with(' {}'.format(br.get_text())) for p in soup.find_all('p'): # replace <p> tag with two carriage return p.replace_with('{}\r\n\r\n'.format(p.get_text())) article['body_text'] = soup.get_text() # get the first category and derive the locator category = next((iter(article.get('anpa_category', []))), None) if category: locator = LocatorMapper().map(article, category.get('qcode').upper()) if locator: article['place'] = [{'qcode': locator, 'name': locator}] return [(pub_seq_num, superdesk.json.dumps(article, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
def format(self, article, subscriber): """ Formats the article as require by the subscriber :param dict article: article to be formatted :param dict subscriber: subscriber receiving the article :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string """ try: pub_seq_num = superdesk.get_resource_service( 'subscribers').generate_sequence_number(subscriber) body_html = article.get('body_html', '').strip('\r\n') soup = BeautifulSoup(body_html) for br in soup.find_all('br'): # remove the <br> tag br.replace_with(' {}'.format(br.get_text())) for p in soup.find_all('p'): # replace <p> tag with two carriage return p.replace_with('{}\r\n\r\n'.format(p.get_text())) article['body_text'] = soup.get_text() # get the first category and derive the locator category = next((iter(article.get('anpa_category', []))), None) if category: locator = LocatorMapper().map(article, category.get('qcode').upper()) if locator: article['place'] = [{'qcode': locator, 'name': locator}] return [(pub_seq_num, superdesk.json.dumps( article, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
def format(self, article, subscriber): """ Formats the article as require by the subscriber :param dict article: article to be formatted :param dict subscriber: subscriber receiving the article :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string """ try: article['slugline'] = self.append_legal(article=article, truncate=True) pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) body_html = self.append_body_footer(article).strip('\r\n') soup = BeautifulSoup(body_html, 'html.parser') if not len(soup.find_all('p')): for br in soup.find_all('br'): # remove the <br> tag br.replace_with(' {}'.format(br.get_text())) for p in soup.find_all('p'): # replace <p> tag with two carriage return for br in p.find_all('br'): # remove the <br> tag br.replace_with(' {}'.format(br.get_text())) para_text = p.get_text().strip() if para_text != '': p.replace_with('{}\r\n\r\n'.format(para_text)) else: p.replace_with('') article['body_text'] = re.sub(' +', ' ', soup.get_text()) # get the first category and derive the locator category = next((iter(article.get('anpa_category', []))), None) if category: locator = LocatorMapper().map(article, category.get('qcode').upper()) if locator: article['place'] = [{'qcode': locator, 'name': locator}] article['first_category'] = category article['first_subject'] = set_subject(category, article) odbc_item = { 'id': article.get(config.ID_FIELD), 'version': article.get(config.VERSION), ITEM_TYPE: article.get(ITEM_TYPE), PACKAGE_TYPE: article.get(PACKAGE_TYPE, ''), 'headline': article.get('headline', '').replace('\'', '\'\''), 'slugline': article.get('slugline', '').replace('\'', '\'\''), 'data': superdesk.json.dumps(article, default=json_serialize_datetime_objectId).replace('\'', '\'\'') } return [(pub_seq_num, json.dumps(odbc_item, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
def format(self, article, subscriber): try: pub_seq_num = superdesk.get_resource_service("subscribers").generate_sequence_number(subscriber) body_html = article.get("body_html", "").strip("\r\n") soup = BeautifulSoup(body_html) for br in soup.find_all("br"): # remove the <br> tag br.replace_with(" {}".format(br.get_text())) for p in soup.find_all("p"): # replace <p> tag with two carriage return p.replace_with("{}\r\n\r\n".format(p.get_text())) article["body_text"] = soup.get_text() return [(pub_seq_num, superdesk.json.dumps(article, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): """ Formats the article as require by the subscriber :param dict article: article to be formatted :param dict subscriber: subscriber receiving the article :param list codes: selector codes :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string """ try: formatted_article = deepcopy(article) pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) body_html = to_ascii(self.append_body_footer(formatted_article)).strip('\r\n') # get the desk name desk_name = superdesk.get_resource_service('desks').\ get_desk_name(formatted_article.get('task', {}).get('desk')) # force the content to source 'NZN' if desk is 'NZN' if 'new zealand' in desk_name.lower().strip(): formatted_article['source'] = 'NZN' # this is temporary fix for bulletin builder formatter if formatted_article.get(ITEM_STATE, '') == CONTENT_STATE.SCHEDULED: formatted_article['versioncreated'] = utcnow() formatted_article['body_text'] = self.get_text_content(body_html) formatted_article['abstract'] = self.get_text_content( to_ascii(formatted_article.get('abstract', '') or '')).strip() formatted_article['headline'] = self.get_text_content( to_ascii(formatted_article.get('headline', ''))).strip() formatted_article['byline'] = self.get_text_content( to_ascii(formatted_article.get('byline', '') or '')).strip() if len(formatted_article.get('anpa_category') or []) > 1: formatted_article['anpa_category'] = [cat for cat in (formatted_article.get('anpa_category') or []) if cat.get('qcode') != 'c'] self._handle_auto_publish(formatted_article) # get the first category and derive the locator category = next((iter((formatted_article.get('anpa_category') or []))), None) if category: locator = LocatorMapper().map(formatted_article, category.get('qcode').upper()) if locator: formatted_article['place'] = [{'qcode': locator, 'name': locator}] formatted_article['first_category'] = category formatted_article['first_subject'] = set_subject(category, formatted_article) formatted_article['slugline'] = self.get_text_content( to_ascii(SluglineMapper().map(article=formatted_article, category=category.get('qcode').upper(), truncate=(not formatted_article.get('auto_publish')))).strip()) self.format_associated_item(formatted_article) odbc_item = { 'id': formatted_article.get(config.ID_FIELD), 'version': formatted_article.get(config.VERSION), ITEM_TYPE: formatted_article.get(ITEM_TYPE), PACKAGE_TYPE: formatted_article.get(PACKAGE_TYPE, ''), 'headline': formatted_article.get('headline', '').replace('\'', '\'\''), 'slugline': formatted_article.get('slugline', '').replace('\'', '\'\''), 'data': superdesk.json.dumps(formatted_article, default=json_serialize_datetime_objectId).replace('\'', '\'\'') } return [(pub_seq_num, json.dumps(odbc_item, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
def format(self, article, subscriber): """ Formats the article as require by the subscriber :param dict article: article to be formatted :param dict subscriber: subscriber receiving the article :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string """ try: article['slugline'] = self.append_legal(article=article, truncate=True) pub_seq_num = superdesk.get_resource_service( 'subscribers').generate_sequence_number(subscriber) body_html = self.append_body_footer(article).strip('\r\n') soup = BeautifulSoup(body_html, 'html.parser') if not len(soup.find_all('p')): for br in soup.find_all('br'): # remove the <br> tag br.replace_with(' {}'.format(br.get_text())) for p in soup.find_all('p'): # replace <p> tag with two carriage return for br in p.find_all('br'): # remove the <br> tag br.replace_with(' {}'.format(br.get_text())) para_text = p.get_text().strip() if para_text != '': p.replace_with('{}\r\n\r\n'.format(para_text)) else: p.replace_with('') article['body_text'] = re.sub(' +', ' ', soup.get_text()) # get the first category and derive the locator category = next((iter(article.get('anpa_category', []))), None) if category: locator = LocatorMapper().map(article, category.get('qcode').upper()) if locator: article['place'] = [{'qcode': locator, 'name': locator}] article['first_category'] = category article['first_subject'] = set_subject(category, article) odbc_item = { 'id': article.get(config.ID_FIELD), 'version': article.get(config.VERSION), ITEM_TYPE: article.get(ITEM_TYPE), PACKAGE_TYPE: article.get(PACKAGE_TYPE, ''), 'headline': article.get('headline', '').replace('\'', '\'\''), 'slugline': article.get('slugline', '').replace('\'', '\'\''), 'data': superdesk.json.dumps( article, default=json_serialize_datetime_objectId).replace( '\'', '\'\'') } return [(pub_seq_num, json.dumps(odbc_item, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): """ Formats the article as require by the subscriber :param dict article: article to be formatted :param dict subscriber: subscriber receiving the article :param list codes: selector codes :return: tuple (int, str) of publish sequence of the subscriber, formatted article as string """ try: formatted_article = deepcopy(article) pub_seq_num = superdesk.get_resource_service( 'subscribers').generate_sequence_number(subscriber) body_html = to_ascii( self.append_body_footer(formatted_article)).strip('\r\n') # get the desk name desk_name = superdesk.get_resource_service('desks').\ get_desk_name(formatted_article.get('task', {}).get('desk')) # force the content to source 'NZN' if desk is 'NZN' if 'new zealand' in desk_name.lower().strip(): formatted_article['source'] = 'NZN' # this is temporary fix for bulletin builder formatter if formatted_article.get(ITEM_STATE, '') == CONTENT_STATE.SCHEDULED: formatted_article['versioncreated'] = utcnow() formatted_article['body_text'] = self.get_text_content(body_html) formatted_article['abstract'] = self.get_text_content( to_ascii(formatted_article.get('abstract', '') or '')).strip() formatted_article['headline'] = self.get_text_content( to_ascii(formatted_article.get('headline', ''))).strip() formatted_article['byline'] = self.get_text_content( to_ascii(formatted_article.get('byline', '') or '')).strip() if len(formatted_article.get('anpa_category') or []) > 1: formatted_article['anpa_category'] = [ cat for cat in (formatted_article.get('anpa_category') or []) if cat.get('qcode') != 'c' ] self._handle_auto_publish(formatted_article) # get the first category and derive the locator category = next((iter( (formatted_article.get('anpa_category') or []))), None) if category: locator = LocatorMapper().map(formatted_article, category.get('qcode').upper()) if locator: formatted_article['place'] = [{ 'qcode': locator, 'name': locator }] formatted_article['first_category'] = category formatted_article['first_subject'] = set_subject( category, formatted_article) formatted_article['slugline'] = self.get_text_content( to_ascii(SluglineMapper().map( article=formatted_article, category=category.get('qcode').upper(), truncate=(not formatted_article.get('auto_publish') )))).strip() self.format_associated_item(formatted_article) odbc_item = { 'id': formatted_article.get(config.ID_FIELD), 'version': formatted_article.get(config.VERSION), ITEM_TYPE: formatted_article.get(ITEM_TYPE), 'package_type': '', 'headline': formatted_article.get('headline', '').replace('\'', '\'\''), 'slugline': formatted_article.get('slugline', '').replace('\'', '\'\''), 'data': superdesk.json.dumps( formatted_article, default=json_serialize_datetime_objectId).replace( '\'', '\'\'') } return [(pub_seq_num, json.dumps(odbc_item, default=json_serialize_datetime_objectId))] except Exception as ex: raise FormatterError.bulletinBuilderFormatterError(ex, subscriber)