Example #1
0
    def test_remove_takes_package(self):
        """
        Tests the behavior of remove_expired() when just takes package expires
        """
        def expire(published_takes_pkg):
            published_service.update(
                published_takes_pkg[config.ID_FIELD],
                {'expiry': utcnow() + timedelta(minutes=-60)},
                published_takes_pkg)

            RemoveExpiredPublishContent().run()

            if published_takes_pkg[ITEM_STATE] == CONTENT_STATE.PUBLISHED:
                self.assertEqual(published_takes_pkg[ITEM_OPERATION],
                                 'publish')
            elif published_takes_pkg[ITEM_STATE] == CONTENT_STATE.KILLED:
                self.assertEqual(published_takes_pkg[ITEM_OPERATION], 'kill')

        doc = self.articles[0].copy()
        self._create_and_insert_into_versions(doc, False)

        published_version_number = doc[config.VERSION] + 1
        get_resource_service(ARCHIVE_PUBLISH).patch(
            id=doc[config.ID_FIELD],
            updates={
                ITEM_STATE: CONTENT_STATE.PUBLISHED,
                config.VERSION: published_version_number
            })
        insert_into_versions(id_=doc[config.ID_FIELD])

        published_version_number += 1
        get_resource_service(ARCHIVE_KILL).patch(id=doc[config.ID_FIELD],
                                                 updates={
                                                     ITEM_STATE:
                                                     CONTENT_STATE.KILLED,
                                                     config.VERSION:
                                                     published_version_number
                                                 })
        insert_into_versions(id_=doc[config.ID_FIELD])

        published_service = get_resource_service(PUBLISHED)
        items_in_published_repo = list(
            published_service.get_from_mongo(req=None, lookup=None))
        self.assertEqual(len(items_in_published_repo), 4)

        # Expiring the Takes Package whose state is Published
        published_takes_pkg = [
            g for g in items_in_published_repo
            if is_takes_package(g) and g[ITEM_STATE] == CONTENT_STATE.PUBLISHED
        ]
        expire(published_takes_pkg[0])

        # Expiring the Takes Package whose state is Killed
        published_takes_pkg = [
            g for g in items_in_published_repo
            if is_takes_package(g) and g[ITEM_STATE] == CONTENT_STATE.KILLED
        ]
        expire(published_takes_pkg[0])
Example #2
0
    def append_body_footer(self, article):
        """
        Checks if the article has any Public Service Announcements and if available appends each of them to the body.

        :return: body with public service announcements.
        """

        body = ''
        if article[ITEM_TYPE] in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] or is_takes_package(article):
            body = article.get('body_html', '')
        elif article[ITEM_TYPE] in [CONTENT_TYPE.AUDIO, CONTENT_TYPE.PICTURE, CONTENT_TYPE.VIDEO]:
            body = article.get('description', '')

        if body and article.get(FORMAT, '') == FORMATS.PRESERVED:
            body = body.replace('\n', '\r\n').replace('\r\r', '\r')
            soup = BeautifulSoup(body, 'html.parser')

            for br in soup.find_all('br'):
                br.replace_with('\r\n')
            body = str(soup)

        if body and article.get('body_footer'):
            footer = article.get('body_footer')
            if article.get(FORMAT, '') == FORMATS.PRESERVED:
                soup = BeautifulSoup(footer, 'html.parser')
                body = '{}\r\n{}'.format(body, soup.get_text())
            else:
                body = '{}{}'.format(body, footer)
        return body
Example #3
0
    def append_body_footer(self, article):
        """
        Checks if the article has any Public Service Announcements and if available appends each of them to the body.

        :return: body with public service announcements.
        """

        body = ''
        if article[ITEM_TYPE] in [
                CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED
        ] or is_takes_package(article):
            body = article.get('body_html', '')
        elif article[ITEM_TYPE] in [
                CONTENT_TYPE.AUDIO, CONTENT_TYPE.PICTURE, CONTENT_TYPE.VIDEO
        ]:
            body = article.get('description', '')

        if body and article.get('body_footer'):
            footer = article.get('body_footer')
            if article.get('type', '') == 'preformatted':
                soup = BeautifulSoup(footer, 'html.parser')
                body = '{}\r\n{}'.format(body, soup.get_text())
            else:
                body = '{}{}'.format(body, footer)
        return body
Example #4
0
    def append_body_footer(self, article):
        """
        Checks if the article has any Public Service Announcements and if available appends each of them to the body.

        :return: body with public service announcements.
        """

        body = ''
        if article[ITEM_TYPE] in [
                CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED
        ] or is_takes_package(article):
            body = article.get('body_html', '')
        elif article[ITEM_TYPE] in [
                CONTENT_TYPE.AUDIO, CONTENT_TYPE.PICTURE, CONTENT_TYPE.VIDEO
        ]:
            body = article.get('description', '')

        if body and article.get(FORMAT, '') == FORMATS.PRESERVED:
            body = body.replace('\n', '\r\n').replace('\r\r', '\r')
            soup = BeautifulSoup(body, 'html.parser')

            for br in soup.find_all('br'):
                br.replace_with('\r\n')
            body = str(soup)

        if body and article.get('body_footer'):
            footer = article.get('body_footer')
            if article.get(FORMAT, '') == FORMATS.PRESERVED:
                soup = BeautifulSoup(footer, 'html.parser')
                body = '{}\r\n{}'.format(body, soup.get_text())
            else:
                body = '{}{}'.format(body, footer)
        return body
Example #5
0
    def test_remove_takes_package(self):
        """
        Tests the behavior of remove_expired() when just takes package expires
        """

        def expire(published_takes_pkg):
            published_service.update(published_takes_pkg[config.ID_FIELD],
                                     {'expiry': utcnow() + timedelta(minutes=-60)}, published_takes_pkg)

            RemoveExpiredPublishContent().run()

            if published_takes_pkg[ITEM_STATE] == CONTENT_STATE.PUBLISHED:
                self.assertEqual(published_takes_pkg[ITEM_OPERATION], 'publish')
            elif published_takes_pkg[ITEM_STATE] == CONTENT_STATE.KILLED:
                self.assertEqual(published_takes_pkg[ITEM_OPERATION], 'kill')

        doc = self.articles[0].copy()
        self._create_and_insert_into_versions(doc, False)

        published_version_number = doc[config.VERSION] + 1
        get_resource_service(ARCHIVE_PUBLISH).patch(id=doc[config.ID_FIELD],
                                                    updates={ITEM_STATE: CONTENT_STATE.PUBLISHED,
                                                             config.VERSION: published_version_number})
        insert_into_versions(id_=doc[config.ID_FIELD])

        published_version_number += 1
        get_resource_service(ARCHIVE_KILL).patch(id=doc[config.ID_FIELD],
                                                 updates={ITEM_STATE: CONTENT_STATE.KILLED,
                                                          config.VERSION: published_version_number})
        insert_into_versions(id_=doc[config.ID_FIELD])

        published_service = get_resource_service(PUBLISHED)
        items_in_published_repo = list(published_service.get_from_mongo(req=None, lookup=None))
        self.assertEqual(len(items_in_published_repo), 4)

        # Expiring the Takes Package whose state is Published
        published_takes_pkg = [g for g in items_in_published_repo if is_takes_package(g) and
                               g[ITEM_STATE] == CONTENT_STATE.PUBLISHED]
        expire(published_takes_pkg[0])

        # Expiring the Takes Package whose state is Killed
        published_takes_pkg = [g for g in items_in_published_repo if is_takes_package(g) and
                               g[ITEM_STATE] == CONTENT_STATE.KILLED]
        expire(published_takes_pkg[0])
Example #6
0
    def append_body_footer(self, article):
        """
        Checks if the article has any Public Service Announcements and if available appends each of them to the body.

        :return: body with public service announcements.
        """

        body = ''
        if article[ITEM_TYPE] in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] or is_takes_package(article):
            body = article.get('body_html', '')
        elif article[ITEM_TYPE] in [CONTENT_TYPE.AUDIO, CONTENT_TYPE.PICTURE, CONTENT_TYPE.VIDEO]:
            body = article.get('description', '')

        if body and article.get('body_footer'):
            body = '{}<br>{}'.format(body, article.get('body_footer'))

        return body
Example #7
0
    def append_body_footer(self, article):
        """
        Checks if the article has any Public Service Announcements and if available appends each of them to the body.

        :return: body with public service announcements.
        """

        body = ''
        if article[ITEM_TYPE] in [
                CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED
        ] or is_takes_package(article):
            body = article.get('body_html', '')
        elif article[ITEM_TYPE] in [
                CONTENT_TYPE.AUDIO, CONTENT_TYPE.PICTURE, CONTENT_TYPE.VIDEO
        ]:
            body = article.get('description', '')

        if body and article.get('body_footer'):
            body = '{}<br>{}'.format(body, article.get('body_footer'))

        return body
Example #8
0
    def append_body_footer(self, article):
        """
        Checks if the article has any Public Service Announcements and if available appends each of them to the body.

        :return: body with public service announcements.
        """

        body = ''
        if article[ITEM_TYPE] in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] or is_takes_package(article):
            body = article.get('body_html', '')
        elif article[ITEM_TYPE] in [CONTENT_TYPE.AUDIO, CONTENT_TYPE.PICTURE, CONTENT_TYPE.VIDEO]:
            body = article.get('description', '')

        if body and article.get('body_footer'):
            footer = article.get('body_footer')
            if article.get('type', '') == 'preformatted':
                soup = BeautifulSoup(footer, 'html.parser')
                body = '{}\r\n{}'.format(body, soup.get_text())
            else:
                body = '{}{}'.format(body, footer)
        return body
Example #9
0
    def append_body_footer(self, article):
        """
        Checks if the article has any Public Service Announcements and if available appends each of them to the body.

        :return: body with public service announcements.
        """
        try:
            article['body_html'].replace('<br>', '<br/>')
        except KeyError:
            pass

        body = ''
        if article[ITEM_TYPE] in [
                CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED
        ] or is_takes_package(article):
            body = article.get('body_html', '')
        elif article[ITEM_TYPE] in [
                CONTENT_TYPE.AUDIO, CONTENT_TYPE.PICTURE, CONTENT_TYPE.VIDEO
        ]:
            body = article.get('description', '')

        if body and article.get(FORMAT, '') == FORMATS.PRESERVED:
            body = body.replace('\n', '\r\n').replace('\r\r', '\r')
            parsed = parse_html(body, content='html')

            for br in parsed.xpath('//br'):
                br.tail = '\r\n' + br.tail if br.tail else '\r\n'

            etree.strip_elements(parsed, 'br', with_tail=False)
            body = etree.tostring(parsed, encoding="unicode")

        if body and article.get('body_footer'):
            footer = article.get('body_footer')
            if article.get(FORMAT, '') == FORMATS.PRESERVED:
                body = '{}\r\n{}'.format(body, get_text(footer))
            else:
                body = '{}{}'.format(body, footer)
        return body
Example #10
0
    def get_subscribers(self, doc, target_media_type):
        """Get the subscribers for this document based on the target_media_type for publishing.

        1. Get all active subscribers.
            a. Get the list of takes subscribers if Takes Package
        2. If takes package then subsequent takes are sent to same wire subscriber as first take.
        3. Filter the subscriber list based on the publish filter and global filters (if configured).
            a. Publish to takes package subscribers if the takes package is received by the subscriber.
            b. Rewrites are sent to subscribers that received the original item or the previous rewrite.

        :param dict doc: Document to publish/correct/kill
        :param str target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article.
                Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire
                then the doc being queues is an Individual Article.
        :return: (list, dict, dict) List of filtered subscribers, product codes per subscriber,
                associations per subscriber
        """
        subscribers, takes_subscribers, rewrite_subscribers = [], [], []
        subscriber_codes, take_codes, codes, rewrite_codes = {}, {}, {}, {}
        associations, takes_associations, rewrite_associations = {}, {}, {}
        first_take = None

        # Step 3b
        rewrite_of = doc.get('rewrite_of')
        rewrite_take_package = None
        if rewrite_of:
            rewrite_of_item = get_resource_service('archive').find_one(
                req=None, _id=rewrite_of)
            if rewrite_of_item:
                if is_takes_package(rewrite_of_item):
                    rewrite_take_package = rewrite_of_item
                else:
                    rewrite_take_package = self.takes_package_service.get_take_package(
                        rewrite_of_item)

        # Step 1
        query = {'is_active': True}
        subscribers = list(
            get_resource_service('subscribers').get(req=None, lookup=query))

        if doc.get(ITEM_TYPE) in [CONTENT_TYPE.COMPOSITE
                                  ] and doc.get(PACKAGE_TYPE) == TAKES_PACKAGE:
            # Step 1a
            query = {
                '$and': [{
                    'item_id': doc['item_id']
                }, {
                    'publishing_action': {
                        '$in':
                        [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]
                    }
                }]
            }
            takes_subscribers, take_codes, takes_associations = self._get_subscribers_for_previously_sent_items(
                query)

            if rewrite_of and rewrite_take_package:
                # Step 3b
                query = {
                    '$and': [{
                        'item_id':
                        rewrite_take_package.get(config.ID_FIELD)
                    }, {
                        'publishing_action': {
                            '$in':
                            [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]
                        }
                    }]
                }
                rewrite_subscribers, rewrite_codes, rewrite_associations = \
                    self._get_subscribers_for_previously_sent_items(query)

        # Step 2
        if doc.get(ITEM_TYPE) in [
                CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED
        ]:
            # get first take
            first_take = self.takes_package_service.get_take_by_take_no(doc, 1)
            if str(doc['item_id']) == str(first_take):
                # if the current document is the first take then continue
                first_take = None

            if first_take:
                # if first take is published then subsequent takes should to same subscribers.
                query = {
                    '$and': [{
                        'item_id': first_take
                    }, {
                        'publishing_action': {
                            '$in': [CONTENT_STATE.PUBLISHED]
                        }
                    }]
                }
                subscribers, subscriber_codes, takes_associations = \
                    self._get_subscribers_for_previously_sent_items(query)

            if rewrite_of:
                # Step 3b
                if rewrite_take_package and rewrite_take_package.get(
                        config.ID_FIELD) == rewrite_of:
                    item_ids = self.package_service.get_residrefs(
                        rewrite_take_package)
                else:
                    item_ids = [rewrite_of]

                query = {
                    '$and': [{
                        'item_id': {
                            '$in': item_ids
                        }
                    }, {
                        'publishing_action': {
                            '$in':
                            [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]
                        }
                    }]
                }
                rewrite_subscribers, rewrite_codes, rewrite_associations = \
                    self._get_subscribers_for_previously_sent_items(query)

        # Step 3
        if not first_take:
            subscribers, codes = self.filter_subscribers(
                doc, subscribers, target_media_type)

        if takes_subscribers:
            # Step 3a
            subscribers_ids = set(s[config.ID_FIELD]
                                  for s in takes_subscribers)
            subscribers = takes_subscribers + [
                s for s in subscribers
                if s[config.ID_FIELD] not in subscribers_ids
            ]

        if rewrite_subscribers:
            # Step 3b
            subscribers_ids = set(s[config.ID_FIELD]
                                  for s in rewrite_subscribers)
            subscribers = rewrite_subscribers + [
                s for s in subscribers
                if s[config.ID_FIELD] not in subscribers_ids
            ]

        if take_codes:
            # join the codes
            subscriber_codes.update(take_codes)

        if rewrite_codes:
            # join the codes
            subscriber_codes.update(rewrite_codes)

        if codes:
            # join the codes
            subscriber_codes.update(codes)

        # update associations
        self._update_associations(associations, rewrite_associations)
        self._update_associations(associations, takes_associations)

        # handle associations
        associations = self._filter_subscribers_for_associations(
            subscribers, doc, target_media_type, associations)

        return subscribers, subscriber_codes, associations
    def get_subscribers(self, doc, target_media_type):
        """
        Get the subscribers for this document based on the target_media_type for publishing.
        1. If the item has embargo and is a future date then fetch active Wire Subscribers.
           Otherwise get all active subscribers.
            a. Get the list of takes subscribers if Takes Package
        2. If takes package then subsequent takes are sent to same wire subscriber as first take.
        3. Filter the subscriber list based on the publish filter and global filters (if configured).
            a. Publish to takes package subscribers if the takes package is received by the subscriber.
            b. Rewrites are sent to subscribers that received the original item or the previous rewrite.
        :param dict doc: Document to publish/correct/kill
        :param str target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article.
                Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire
                then the doc being queues is an Individual Article.
        :return: (list, list, dict) List of filtered subscriber,
                List of subscribers that have not received item previously (empty list in this case).
                List of product codes per subscriber
        """
        subscribers, subscribers_yet_to_receive, takes_subscribers, rewrite_subscribers = [], [], [], []
        subscriber_codes, take_codes, codes, rewrite_codes = {}, {}, {}, {}
        first_take = None

        # Step 3b
        rewrite_of = doc.get('rewrite_of')
        rewrite_take_package = None
        if rewrite_of:
            rewrite_of_item = get_resource_service('archive').find_one(req=None, _id=rewrite_of)
            if rewrite_of_item:
                if is_takes_package(rewrite_of_item):
                    rewrite_take_package = rewrite_of_item
                else:
                    rewrite_take_package = self.takes_package_service.get_take_package(rewrite_of_item)

        # Step 1
        query = {'is_active': True}
        if doc.get(EMBARGO) and get_utc_schedule(doc, EMBARGO) > utcnow():
            query['subscriber_type'] = SUBSCRIBER_TYPES.WIRE
            # Ta 04/05/16: Commenting out this section for ticket SD-4465
            # query['media_type'] = SUBSCRIBER_MEDIA_TYPES.MEDIA

        subscribers = list(get_resource_service('subscribers').get(req=None, lookup=query))

        if doc.get(ITEM_TYPE) in [CONTENT_TYPE.COMPOSITE] and doc.get(PACKAGE_TYPE) == TAKES_PACKAGE:
            # Step 1a
            query = {'$and': [{'item_id': doc['item_id']},
                              {'publishing_action': {'$in': [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]}}]}
            takes_subscribers, take_codes = self._get_subscribers_for_previously_sent_items(query)

            if rewrite_of and rewrite_take_package:
                # Step 3b
                query = {'$and': [{'item_id': rewrite_take_package.get(config.ID_FIELD)},
                                  {'publishing_action': {'$in': [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]}}]}
                rewrite_subscribers, rewrite_codes = self._get_subscribers_for_previously_sent_items(query)

        # Step 2
        if doc.get(ITEM_TYPE) in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]:
            # get first take
            first_take = self.takes_package_service.get_take_by_take_no(doc, 1)
            if str(doc['item_id']) == str(first_take):
                # if the current document is the first take then continue
                first_take = None

            if first_take:
                # if first take is published then subsequent takes should to same subscribers.
                query = {'$and': [{'item_id': first_take},
                                  {'publishing_action': {'$in': [CONTENT_STATE.PUBLISHED]}}]}
                subscribers, subscriber_codes = self._get_subscribers_for_previously_sent_items(query)

            if rewrite_of:
                # Step 3b
                if rewrite_take_package and rewrite_take_package.get(config.ID_FIELD) == rewrite_of:
                    item_ids = self.package_service.get_residrefs(rewrite_take_package)
                else:
                    item_ids = [rewrite_of]

                query = {'$and': [{'item_id': {'$in': item_ids}},
                                  {'publishing_action': {'$in': [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]}}]}
                rewrite_subscribers, rewrite_codes = self._get_subscribers_for_previously_sent_items(query)

        # Step 3
        if not first_take:
            subscribers, codes = self.filter_subscribers(doc, subscribers, target_media_type)

        if takes_subscribers:
            # Step 3a
            subscribers_ids = set(s[config.ID_FIELD] for s in takes_subscribers)
            subscribers = takes_subscribers + [s for s in subscribers if s[config.ID_FIELD] not in subscribers_ids]

        if rewrite_subscribers:
            # Step 3b
            subscribers_ids = set(s[config.ID_FIELD] for s in rewrite_subscribers)
            subscribers = rewrite_subscribers + [s for s in subscribers if s[config.ID_FIELD] not in subscribers_ids]

        if take_codes:
            # join the codes
            subscriber_codes.update(take_codes)

        if rewrite_codes:
            # join the codes
            subscriber_codes.update(rewrite_codes)

        if codes:
            # join the codes
            subscriber_codes.update(codes)

        return subscribers, subscribers_yet_to_receive, subscriber_codes