def test_remove_takes_package(self): """ Tests the behavior of remove_expired() when just takes package expires """ def expire(published_takes_pkg): published_service.update( published_takes_pkg[config.ID_FIELD], {'expiry': utcnow() + timedelta(minutes=-60)}, published_takes_pkg) RemoveExpiredPublishContent().run() if published_takes_pkg[ITEM_STATE] == CONTENT_STATE.PUBLISHED: self.assertEqual(published_takes_pkg[ITEM_OPERATION], 'publish') elif published_takes_pkg[ITEM_STATE] == CONTENT_STATE.KILLED: self.assertEqual(published_takes_pkg[ITEM_OPERATION], 'kill') doc = self.articles[0].copy() self._create_and_insert_into_versions(doc, False) published_version_number = doc[config.VERSION] + 1 get_resource_service(ARCHIVE_PUBLISH).patch( id=doc[config.ID_FIELD], updates={ ITEM_STATE: CONTENT_STATE.PUBLISHED, config.VERSION: published_version_number }) insert_into_versions(id_=doc[config.ID_FIELD]) published_version_number += 1 get_resource_service(ARCHIVE_KILL).patch(id=doc[config.ID_FIELD], updates={ ITEM_STATE: CONTENT_STATE.KILLED, config.VERSION: published_version_number }) insert_into_versions(id_=doc[config.ID_FIELD]) published_service = get_resource_service(PUBLISHED) items_in_published_repo = list( published_service.get_from_mongo(req=None, lookup=None)) self.assertEqual(len(items_in_published_repo), 4) # Expiring the Takes Package whose state is Published published_takes_pkg = [ g for g in items_in_published_repo if is_takes_package(g) and g[ITEM_STATE] == CONTENT_STATE.PUBLISHED ] expire(published_takes_pkg[0]) # Expiring the Takes Package whose state is Killed published_takes_pkg = [ g for g in items_in_published_repo if is_takes_package(g) and g[ITEM_STATE] == CONTENT_STATE.KILLED ] expire(published_takes_pkg[0])
def append_body_footer(self, article): """ Checks if the article has any Public Service Announcements and if available appends each of them to the body. :return: body with public service announcements. """ body = '' if article[ITEM_TYPE] in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] or is_takes_package(article): body = article.get('body_html', '') elif article[ITEM_TYPE] in [CONTENT_TYPE.AUDIO, CONTENT_TYPE.PICTURE, CONTENT_TYPE.VIDEO]: body = article.get('description', '') if body and article.get(FORMAT, '') == FORMATS.PRESERVED: body = body.replace('\n', '\r\n').replace('\r\r', '\r') soup = BeautifulSoup(body, 'html.parser') for br in soup.find_all('br'): br.replace_with('\r\n') body = str(soup) if body and article.get('body_footer'): footer = article.get('body_footer') if article.get(FORMAT, '') == FORMATS.PRESERVED: soup = BeautifulSoup(footer, 'html.parser') body = '{}\r\n{}'.format(body, soup.get_text()) else: body = '{}{}'.format(body, footer) return body
def append_body_footer(self, article): """ Checks if the article has any Public Service Announcements and if available appends each of them to the body. :return: body with public service announcements. """ body = '' if article[ITEM_TYPE] in [ CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED ] or is_takes_package(article): body = article.get('body_html', '') elif article[ITEM_TYPE] in [ CONTENT_TYPE.AUDIO, CONTENT_TYPE.PICTURE, CONTENT_TYPE.VIDEO ]: body = article.get('description', '') if body and article.get('body_footer'): footer = article.get('body_footer') if article.get('type', '') == 'preformatted': soup = BeautifulSoup(footer, 'html.parser') body = '{}\r\n{}'.format(body, soup.get_text()) else: body = '{}{}'.format(body, footer) return body
def append_body_footer(self, article): """ Checks if the article has any Public Service Announcements and if available appends each of them to the body. :return: body with public service announcements. """ body = '' if article[ITEM_TYPE] in [ CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED ] or is_takes_package(article): body = article.get('body_html', '') elif article[ITEM_TYPE] in [ CONTENT_TYPE.AUDIO, CONTENT_TYPE.PICTURE, CONTENT_TYPE.VIDEO ]: body = article.get('description', '') if body and article.get(FORMAT, '') == FORMATS.PRESERVED: body = body.replace('\n', '\r\n').replace('\r\r', '\r') soup = BeautifulSoup(body, 'html.parser') for br in soup.find_all('br'): br.replace_with('\r\n') body = str(soup) if body and article.get('body_footer'): footer = article.get('body_footer') if article.get(FORMAT, '') == FORMATS.PRESERVED: soup = BeautifulSoup(footer, 'html.parser') body = '{}\r\n{}'.format(body, soup.get_text()) else: body = '{}{}'.format(body, footer) return body
def test_remove_takes_package(self): """ Tests the behavior of remove_expired() when just takes package expires """ def expire(published_takes_pkg): published_service.update(published_takes_pkg[config.ID_FIELD], {'expiry': utcnow() + timedelta(minutes=-60)}, published_takes_pkg) RemoveExpiredPublishContent().run() if published_takes_pkg[ITEM_STATE] == CONTENT_STATE.PUBLISHED: self.assertEqual(published_takes_pkg[ITEM_OPERATION], 'publish') elif published_takes_pkg[ITEM_STATE] == CONTENT_STATE.KILLED: self.assertEqual(published_takes_pkg[ITEM_OPERATION], 'kill') doc = self.articles[0].copy() self._create_and_insert_into_versions(doc, False) published_version_number = doc[config.VERSION] + 1 get_resource_service(ARCHIVE_PUBLISH).patch(id=doc[config.ID_FIELD], updates={ITEM_STATE: CONTENT_STATE.PUBLISHED, config.VERSION: published_version_number}) insert_into_versions(id_=doc[config.ID_FIELD]) published_version_number += 1 get_resource_service(ARCHIVE_KILL).patch(id=doc[config.ID_FIELD], updates={ITEM_STATE: CONTENT_STATE.KILLED, config.VERSION: published_version_number}) insert_into_versions(id_=doc[config.ID_FIELD]) published_service = get_resource_service(PUBLISHED) items_in_published_repo = list(published_service.get_from_mongo(req=None, lookup=None)) self.assertEqual(len(items_in_published_repo), 4) # Expiring the Takes Package whose state is Published published_takes_pkg = [g for g in items_in_published_repo if is_takes_package(g) and g[ITEM_STATE] == CONTENT_STATE.PUBLISHED] expire(published_takes_pkg[0]) # Expiring the Takes Package whose state is Killed published_takes_pkg = [g for g in items_in_published_repo if is_takes_package(g) and g[ITEM_STATE] == CONTENT_STATE.KILLED] expire(published_takes_pkg[0])
def append_body_footer(self, article): """ Checks if the article has any Public Service Announcements and if available appends each of them to the body. :return: body with public service announcements. """ body = '' if article[ITEM_TYPE] in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] or is_takes_package(article): body = article.get('body_html', '') elif article[ITEM_TYPE] in [CONTENT_TYPE.AUDIO, CONTENT_TYPE.PICTURE, CONTENT_TYPE.VIDEO]: body = article.get('description', '') if body and article.get('body_footer'): body = '{}<br>{}'.format(body, article.get('body_footer')) return body
def append_body_footer(self, article): """ Checks if the article has any Public Service Announcements and if available appends each of them to the body. :return: body with public service announcements. """ body = '' if article[ITEM_TYPE] in [ CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED ] or is_takes_package(article): body = article.get('body_html', '') elif article[ITEM_TYPE] in [ CONTENT_TYPE.AUDIO, CONTENT_TYPE.PICTURE, CONTENT_TYPE.VIDEO ]: body = article.get('description', '') if body and article.get('body_footer'): body = '{}<br>{}'.format(body, article.get('body_footer')) return body
def append_body_footer(self, article): """ Checks if the article has any Public Service Announcements and if available appends each of them to the body. :return: body with public service announcements. """ body = '' if article[ITEM_TYPE] in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] or is_takes_package(article): body = article.get('body_html', '') elif article[ITEM_TYPE] in [CONTENT_TYPE.AUDIO, CONTENT_TYPE.PICTURE, CONTENT_TYPE.VIDEO]: body = article.get('description', '') if body and article.get('body_footer'): footer = article.get('body_footer') if article.get('type', '') == 'preformatted': soup = BeautifulSoup(footer, 'html.parser') body = '{}\r\n{}'.format(body, soup.get_text()) else: body = '{}{}'.format(body, footer) return body
def append_body_footer(self, article): """ Checks if the article has any Public Service Announcements and if available appends each of them to the body. :return: body with public service announcements. """ try: article['body_html'].replace('<br>', '<br/>') except KeyError: pass body = '' if article[ITEM_TYPE] in [ CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED ] or is_takes_package(article): body = article.get('body_html', '') elif article[ITEM_TYPE] in [ CONTENT_TYPE.AUDIO, CONTENT_TYPE.PICTURE, CONTENT_TYPE.VIDEO ]: body = article.get('description', '') if body and article.get(FORMAT, '') == FORMATS.PRESERVED: body = body.replace('\n', '\r\n').replace('\r\r', '\r') parsed = parse_html(body, content='html') for br in parsed.xpath('//br'): br.tail = '\r\n' + br.tail if br.tail else '\r\n' etree.strip_elements(parsed, 'br', with_tail=False) body = etree.tostring(parsed, encoding="unicode") if body and article.get('body_footer'): footer = article.get('body_footer') if article.get(FORMAT, '') == FORMATS.PRESERVED: body = '{}\r\n{}'.format(body, get_text(footer)) else: body = '{}{}'.format(body, footer) return body
def get_subscribers(self, doc, target_media_type): """Get the subscribers for this document based on the target_media_type for publishing. 1. Get all active subscribers. a. Get the list of takes subscribers if Takes Package 2. If takes package then subsequent takes are sent to same wire subscriber as first take. 3. Filter the subscriber list based on the publish filter and global filters (if configured). a. Publish to takes package subscribers if the takes package is received by the subscriber. b. Rewrites are sent to subscribers that received the original item or the previous rewrite. :param dict doc: Document to publish/correct/kill :param str target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article. Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire then the doc being queues is an Individual Article. :return: (list, dict, dict) List of filtered subscribers, product codes per subscriber, associations per subscriber """ subscribers, takes_subscribers, rewrite_subscribers = [], [], [] subscriber_codes, take_codes, codes, rewrite_codes = {}, {}, {}, {} associations, takes_associations, rewrite_associations = {}, {}, {} first_take = None # Step 3b rewrite_of = doc.get('rewrite_of') rewrite_take_package = None if rewrite_of: rewrite_of_item = get_resource_service('archive').find_one( req=None, _id=rewrite_of) if rewrite_of_item: if is_takes_package(rewrite_of_item): rewrite_take_package = rewrite_of_item else: rewrite_take_package = self.takes_package_service.get_take_package( rewrite_of_item) # Step 1 query = {'is_active': True} subscribers = list( get_resource_service('subscribers').get(req=None, lookup=query)) if doc.get(ITEM_TYPE) in [CONTENT_TYPE.COMPOSITE ] and doc.get(PACKAGE_TYPE) == TAKES_PACKAGE: # Step 1a query = { '$and': [{ 'item_id': doc['item_id'] }, { 'publishing_action': { '$in': [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED] } }] } takes_subscribers, take_codes, takes_associations = self._get_subscribers_for_previously_sent_items( query) if rewrite_of and rewrite_take_package: # Step 3b query = { '$and': [{ 'item_id': rewrite_take_package.get(config.ID_FIELD) }, { 'publishing_action': { '$in': [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED] } }] } rewrite_subscribers, rewrite_codes, rewrite_associations = \ self._get_subscribers_for_previously_sent_items(query) # Step 2 if doc.get(ITEM_TYPE) in [ CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED ]: # get first take first_take = self.takes_package_service.get_take_by_take_no(doc, 1) if str(doc['item_id']) == str(first_take): # if the current document is the first take then continue first_take = None if first_take: # if first take is published then subsequent takes should to same subscribers. query = { '$and': [{ 'item_id': first_take }, { 'publishing_action': { '$in': [CONTENT_STATE.PUBLISHED] } }] } subscribers, subscriber_codes, takes_associations = \ self._get_subscribers_for_previously_sent_items(query) if rewrite_of: # Step 3b if rewrite_take_package and rewrite_take_package.get( config.ID_FIELD) == rewrite_of: item_ids = self.package_service.get_residrefs( rewrite_take_package) else: item_ids = [rewrite_of] query = { '$and': [{ 'item_id': { '$in': item_ids } }, { 'publishing_action': { '$in': [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED] } }] } rewrite_subscribers, rewrite_codes, rewrite_associations = \ self._get_subscribers_for_previously_sent_items(query) # Step 3 if not first_take: subscribers, codes = self.filter_subscribers( doc, subscribers, target_media_type) if takes_subscribers: # Step 3a subscribers_ids = set(s[config.ID_FIELD] for s in takes_subscribers) subscribers = takes_subscribers + [ s for s in subscribers if s[config.ID_FIELD] not in subscribers_ids ] if rewrite_subscribers: # Step 3b subscribers_ids = set(s[config.ID_FIELD] for s in rewrite_subscribers) subscribers = rewrite_subscribers + [ s for s in subscribers if s[config.ID_FIELD] not in subscribers_ids ] if take_codes: # join the codes subscriber_codes.update(take_codes) if rewrite_codes: # join the codes subscriber_codes.update(rewrite_codes) if codes: # join the codes subscriber_codes.update(codes) # update associations self._update_associations(associations, rewrite_associations) self._update_associations(associations, takes_associations) # handle associations associations = self._filter_subscribers_for_associations( subscribers, doc, target_media_type, associations) return subscribers, subscriber_codes, associations
def get_subscribers(self, doc, target_media_type): """ Get the subscribers for this document based on the target_media_type for publishing. 1. If the item has embargo and is a future date then fetch active Wire Subscribers. Otherwise get all active subscribers. a. Get the list of takes subscribers if Takes Package 2. If takes package then subsequent takes are sent to same wire subscriber as first take. 3. Filter the subscriber list based on the publish filter and global filters (if configured). a. Publish to takes package subscribers if the takes package is received by the subscriber. b. Rewrites are sent to subscribers that received the original item or the previous rewrite. :param dict doc: Document to publish/correct/kill :param str target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article. Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire then the doc being queues is an Individual Article. :return: (list, list, dict) List of filtered subscriber, List of subscribers that have not received item previously (empty list in this case). List of product codes per subscriber """ subscribers, subscribers_yet_to_receive, takes_subscribers, rewrite_subscribers = [], [], [], [] subscriber_codes, take_codes, codes, rewrite_codes = {}, {}, {}, {} first_take = None # Step 3b rewrite_of = doc.get('rewrite_of') rewrite_take_package = None if rewrite_of: rewrite_of_item = get_resource_service('archive').find_one(req=None, _id=rewrite_of) if rewrite_of_item: if is_takes_package(rewrite_of_item): rewrite_take_package = rewrite_of_item else: rewrite_take_package = self.takes_package_service.get_take_package(rewrite_of_item) # Step 1 query = {'is_active': True} if doc.get(EMBARGO) and get_utc_schedule(doc, EMBARGO) > utcnow(): query['subscriber_type'] = SUBSCRIBER_TYPES.WIRE # Ta 04/05/16: Commenting out this section for ticket SD-4465 # query['media_type'] = SUBSCRIBER_MEDIA_TYPES.MEDIA subscribers = list(get_resource_service('subscribers').get(req=None, lookup=query)) if doc.get(ITEM_TYPE) in [CONTENT_TYPE.COMPOSITE] and doc.get(PACKAGE_TYPE) == TAKES_PACKAGE: # Step 1a query = {'$and': [{'item_id': doc['item_id']}, {'publishing_action': {'$in': [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]}}]} takes_subscribers, take_codes = self._get_subscribers_for_previously_sent_items(query) if rewrite_of and rewrite_take_package: # Step 3b query = {'$and': [{'item_id': rewrite_take_package.get(config.ID_FIELD)}, {'publishing_action': {'$in': [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]}}]} rewrite_subscribers, rewrite_codes = self._get_subscribers_for_previously_sent_items(query) # Step 2 if doc.get(ITEM_TYPE) in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]: # get first take first_take = self.takes_package_service.get_take_by_take_no(doc, 1) if str(doc['item_id']) == str(first_take): # if the current document is the first take then continue first_take = None if first_take: # if first take is published then subsequent takes should to same subscribers. query = {'$and': [{'item_id': first_take}, {'publishing_action': {'$in': [CONTENT_STATE.PUBLISHED]}}]} subscribers, subscriber_codes = self._get_subscribers_for_previously_sent_items(query) if rewrite_of: # Step 3b if rewrite_take_package and rewrite_take_package.get(config.ID_FIELD) == rewrite_of: item_ids = self.package_service.get_residrefs(rewrite_take_package) else: item_ids = [rewrite_of] query = {'$and': [{'item_id': {'$in': item_ids}}, {'publishing_action': {'$in': [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]}}]} rewrite_subscribers, rewrite_codes = self._get_subscribers_for_previously_sent_items(query) # Step 3 if not first_take: subscribers, codes = self.filter_subscribers(doc, subscribers, target_media_type) if takes_subscribers: # Step 3a subscribers_ids = set(s[config.ID_FIELD] for s in takes_subscribers) subscribers = takes_subscribers + [s for s in subscribers if s[config.ID_FIELD] not in subscribers_ids] if rewrite_subscribers: # Step 3b subscribers_ids = set(s[config.ID_FIELD] for s in rewrite_subscribers) subscribers = rewrite_subscribers + [s for s in subscribers if s[config.ID_FIELD] not in subscribers_ids] if take_codes: # join the codes subscriber_codes.update(take_codes) if rewrite_codes: # join the codes subscriber_codes.update(rewrite_codes) if codes: # join the codes subscriber_codes.update(codes) return subscribers, subscribers_yet_to_receive, subscriber_codes