def test_remove_two_refs_from_package(self):
     anything_left1 = PackageService().remove_ref_from_inmem_package(self.package1, "456")
     anything_left2 = PackageService().remove_ref_from_inmem_package(self.package1, "123")
     self.assertEqual(len(self.package1.get("groups", [])), 2)
     root_group = self.package1.get("groups", [])[0]
     self.assertEqual(len(root_group.get("refs", [])), 1)
     self.assertTrue(anything_left1)
     self.assertTrue(anything_left2)
Esempio n. 2
0
 def test_remove_all_refs_from_package(self):
     anything_left1 = PackageService().remove_ref_from_inmem_package(self.package1, "456")
     anything_left2 = PackageService().remove_ref_from_inmem_package(self.package1, "789")
     anything_left3 = PackageService().remove_ref_from_inmem_package(self.package1, "123")
     self.assertEqual(len(self.package1.get('groups', [])), 1)
     root_group = self.package1.get('groups', [])[0]
     self.assertEqual(len(root_group.get('refs', [])), 0)
     self.assertTrue(anything_left1)
     self.assertTrue(anything_left2)
     self.assertFalse(anything_left3)
Esempio n. 3
0
    def setUp(self):
        super().setUp()
        self._init_data()

        self.app.data.insert('users', self.users)
        self.app.data.insert('desks', self.desks)
        self.app.data.insert('vocabularies', self.vocabularies)
        self.app.data.insert('subscribers', self.subscribers)
        self.app.data.insert(ARCHIVE, self.articles)

        self.filename = os.path.join(
            os.path.abspath(os.path.dirname(__file__)), "validators.json")
        self.json_data = [
            {
                "_id": "kill_text",
                "act": "kill",
                "type": "text",
                "schema": {
                    "headline": {
                        "type": "string"
                    }
                }
            },
            {
                "_id": "publish_text",
                "act": "publish",
                "type": "text",
                "schema": {}
            },
            {
                "_id": "correct_text",
                "act": "correct",
                "type": "text",
                "schema": {}
            },
            {
                "_id": "publish_composite",
                "act": "publish",
                "type": "composite",
                "schema": {}
            },
        ]

        with open(self.filename, "w+") as file:
            json.dump(self.json_data, file)
        init_app(self.app)
        ValidatorsPopulateCommand().run(self.filename)

        self.package_service = PackageService()
 def test_remove_ref_from_package(self):
     with self.app.app_context():
         anything_left = PackageService().remove_ref_from_inmem_package(self.package1, "456")
         self.assertEqual(len(self.package1.get("groups", [])), 2)
         root_group = self.package1.get("groups", [])[0]
         self.assertEqual(len(root_group.get("refs", [])), 1)
         self.assertTrue(anything_left)
Esempio n. 5
0
    def _can_remove_package_from_production(self, package):
        """
        Recursively checks if the package can be removed from production.

        :param package:
        :return: True if item can be removed from production, False otherwise.
        """

        item_refs = PackageService().get_residrefs(package)
        archived_items = list(
            get_resource_service('archived').find_by_item_ids(item_refs))
        is_removable = (len(item_refs) == len(archived_items))

        if is_removable:
            packages_in_archived_items = (
                p for p in archived_items
                if p[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE)

            for package in packages_in_archived_items:
                is_removable = self._can_remove_package_from_production(
                    package)
                if not is_removable:
                    break

        return is_removable
Esempio n. 6
0
    def unlock(self, item_filter, user_id, session_id, etag):
        item_model = get_model(ItemModel)
        item = item_model.find_one(item_filter)

        if not item:
            raise SuperdeskApiError.notFoundError()

        if not item.get(LOCK_USER):
            raise SuperdeskApiError.badRequestError(
                message=_("Item is not locked."))

        can_user_unlock, error_message = self.can_unlock(item, user_id)

        if can_user_unlock:
            self.app.on_item_unlock(item, user_id)
            updates = {}

            # delete the item if nothing is saved so far
            # version 0 created on lock item
            if item.get(config.VERSION,
                        0) == 0 and item[ITEM_STATE] == CONTENT_STATE.DRAFT:
                if item.get(ITEM_TYPE) == CONTENT_TYPE.COMPOSITE:
                    # if item is composite then update referenced items in package.
                    PackageService().update_groups({}, item)

                superdesk.get_resource_service("archive").delete_action(
                    lookup={"_id": item["_id"]})
                push_content_notification([item])
            else:
                updates = {}
                set_unlock_updates(updates)
                autosave = superdesk.get_resource_service(
                    "archive_autosave").find_one(req=None, _id=item["_id"])
                if autosave and item[ITEM_STATE] not in PUBLISH_STATES:
                    if not hasattr(
                            flask.g,
                            "user"):  # user is not set when session expires
                        flask.g.user = superdesk.get_resource_service(
                            "users").find_one(req=None, _id=user_id)
                    autosave.update(updates)
                    resolve_document_version(autosave, "archive", "PATCH",
                                             item)
                    superdesk.get_resource_service("archive").patch(
                        item["_id"], autosave)
                    item = superdesk.get_resource_service("archive").find_one(
                        req=None, _id=item["_id"])
                    insert_versioning_documents("archive", item)
                else:
                    item_model.update(item_filter, updates)
                    item = item_model.find_one(item_filter)
                self.app.on_item_unlocked(item, user_id)

            push_unlock_notification(item, user_id, session_id)
        else:
            raise SuperdeskApiError.forbiddenError(message=error_message)

        return item
Esempio n. 7
0
    def unlock(self, item_filter, user_id, session_id, etag):
        item_model = get_model(ItemModel)
        item = item_model.find_one(item_filter)

        if not item:
            raise SuperdeskApiError.notFoundError()

        if not item.get(LOCK_USER):
            raise SuperdeskApiError.badRequestError(message="Item is not locked.")

        can_user_unlock, error_message = self.can_unlock(item, user_id)

        if can_user_unlock:
            self.app.on_item_unlock(item, user_id)
            updates = {}

            # delete the item if nothing is saved so far
            # version 0 created on lock item
            if item.get(config.VERSION, 0) == 0 and item[ITEM_STATE] == CONTENT_STATE.DRAFT:
                if item.get(ITEM_TYPE) == CONTENT_TYPE.COMPOSITE:
                    # if item is composite then update referenced items in package.
                    PackageService().update_groups({}, item)

                superdesk.get_resource_service('archive').delete_action(lookup={'_id': item['_id']})
                push_content_notification([item])
            else:
                updates = {LOCK_USER: None, LOCK_SESSION: None, 'lock_time': None,
                           'lock_action': None, 'force_unlock': True}
                autosave = superdesk.get_resource_service('archive_autosave').find_one(req=None, _id=item['_id'])
                if autosave and item[ITEM_STATE] not in PUBLISH_STATES:
                    if not hasattr(flask.g, 'user'):  # user is not set when session expires
                        flask.g.user = superdesk.get_resource_service('users').find_one(req=None, _id=user_id)
                    autosave.update(updates)
                    resolve_document_version(autosave, 'archive', 'PATCH', item)
                    superdesk.get_resource_service('archive').patch(item['_id'], autosave)
                    item = superdesk.get_resource_service('archive').find_one(req=None, _id=item['_id'])
                    insert_versioning_documents('archive', item)
                else:
                    item_model.update(item_filter, updates)
                    item = item_model.find_one(item_filter)
                self.app.on_item_unlocked(item, user_id)

            push_notification('item:unlock',
                              item=str(item_filter.get(config.ID_FIELD)),
                              item_version=str(item.get(config.VERSION)),
                              state=item.get(ITEM_STATE),
                              user=str(user_id), lock_session=str(session_id),
                              _etag=item.get(config.ETAG))
        else:
            raise SuperdeskApiError.forbiddenError(message=error_message)

        return item
Esempio n. 8
0
    def unlock(self, item_filter, user_id, session_id, etag):
        item_model = get_model(ItemModel)
        item = item_model.find_one(item_filter)

        if not item:
            raise SuperdeskApiError.notFoundError()

        if not item.get(LOCK_USER):
            raise SuperdeskApiError.badRequestError(message="Item is not locked.")

        can_user_unlock, error_message = self.can_unlock(item, user_id)

        if can_user_unlock:
            self.app.on_item_unlock(item, user_id)

            # delete the item if nothing is saved so far
            # version 0 created on lock item
            if item.get(config.VERSION, 0) == 0 and item[ITEM_STATE] == CONTENT_STATE.DRAFT:
                if item.get(ITEM_TYPE) == CONTENT_TYPE.COMPOSITE:
                    # if item is composite then update referenced items in package.
                    PackageService().update_groups({}, item)

                superdesk.get_resource_service('archive').delete_action(lookup={'_id': item['_id']})
                push_content_notification([item])
            else:
                updates = {LOCK_USER: None, LOCK_SESSION: None, 'lock_time': None, 'force_unlock': True}
                item_model.update(item_filter, updates)
                self.app.on_item_unlocked(item, user_id)

            push_notification('item:unlock',
                              item=str(item_filter.get(config.ID_FIELD)),
                              item_version=str(item.get(config.VERSION)),
                              state=item.get(ITEM_STATE),
                              user=str(user_id), lock_session=str(session_id))
        else:
            raise SuperdeskApiError.forbiddenError(message=error_message)

        item = item_model.find_one(item_filter)
        return item
Esempio n. 9
0
    def setUp(self):
        super().setUp()
        self._init_data()

        self.app.data.insert('vocabularies', self.vocabularies)
        self.app.data.insert('subscribers', self.subscribers)
        self.app.data.insert(ARCHIVE, self.articles)

        self.filename = os.path.join(os.path.abspath(os.path.dirname(__file__)), "validators.json")
        self.json_data = [
            {"_id": "kill_text", "act": "kill", "type": "text", "schema": {"headline": {"type": "string"}}},
            {"_id": "publish_text", "act": "publish", "type": "text", "schema": {}},
            {"_id": "correct_text", "act": "correct", "type": "text", "schema": {}},
            {"_id": "publish_composite", "act": "publish", "type": "composite", "schema": {}},
        ]

        with open(self.filename, "w+") as file:
            json.dump(self.json_data, file)
        init_app(self.app)
        ValidatorsPopulateCommand().run(self.filename)

        self.package_service = PackageService()
Esempio n. 10
0
    def can_remove_from_production(self, doc):
        """
        Returns true if the doc in published collection can be removed from production, otherwise returns false.
        1. Returns false if item is published more than once
        2. Returns false if item is referenced by a package
        3. Returns false if the item is package and all items in the package are not found in archived collection.

        :param doc: article in published collection
        :return: True if item can be removed from production, False otherwise.
        """

        items = self.get_other_published_items(doc['item_id'])
        is_removable = (items.count() == 0)

        if is_removable:
            is_removable = (PackageService().get_packages(
                doc['item_id']).count() == 0)

            if is_removable and doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                return self._can_remove_package_from_production(doc)

        return is_removable
Esempio n. 11
0
class EnqueueService:
    """
    Creates the corresponding entries in the publish queue for items marked for publishing
    """

    publish_type = "publish"
    published_state = "published"

    non_digital = partial(
        filter,
        lambda s: s.get("subscriber_type", "") == SUBSCRIBER_TYPES.WIRE)
    digital = partial(
        filter, lambda s: (s.get("subscriber_type", "") in
                           {SUBSCRIBER_TYPES.DIGITAL, SUBSCRIBER_TYPES.ALL}))
    package_service = PackageService()

    filters = None

    def __init__(self, published_state=None):
        if published_state is not None:
            self.published_state = published_state

    def get_filters(self):
        """Retrieve all of the available filter conditions and content filters if they have not yet been retrieved or
        they have been updated. This avoids the filtering functions having to repeatedly retireve the individual filter
        records.

        :return:
        """

        # Get the most recent update time to the filter conditions and content_filters
        req = ParsedRequest()
        req.sort = "-_updated"
        req.max_results = 1
        mindate = datetime.min.replace(tzinfo=pytz.UTC)
        latest_fc = next(
            get_resource_service("filter_conditions").get_from_mongo(
                req=req, lookup=None), {}).get("_updated", mindate)
        latest_cf = next(
            get_resource_service("content_filters").get_from_mongo(
                req=req, lookup=None), {}).get("_updated", mindate)

        if (not self.filters or latest_fc > self.filters.get(
                "latest_filter_conditions", mindate) or latest_fc == mindate or
                latest_cf > self.filters.get("latest_content_filters", mindate)
                or latest_cf == mindate):
            logger.debug("Getting content filters and filter conditions")
            self.filters = dict()
            self.filters["filter_conditions"] = dict()
            self.filters["content_filters"] = dict()
            for fc in get_resource_service("filter_conditions").get(req=None,
                                                                    lookup={}):
                self.filters["filter_conditions"][fc.get("_id")] = {"fc": fc}
                self.filters["latest_filter_conditions"] = (
                    fc.get("_updated") if fc.get("_updated") >
                    self.filters.get("latest_filter_conditions", mindate) else
                    self.filters.get("latest_filter_conditions", mindate))
            for cf in get_resource_service("content_filters").get(req=None,
                                                                  lookup={}):
                self.filters["content_filters"][cf.get("_id")] = {"cf": cf}
                self.filters["latest_content_filters"] = (
                    cf.get("_updated") if cf.get("_updated") >
                    self.filters.get("latest_content_filters", mindate) else
                    self.filters.get("latest_content_filters", mindate))
        else:
            logger.debug(
                "Using chached content filters and filters conditions")

    def _enqueue_item(self, item, content_type=None):
        item_to_queue = deepcopy(item)
        if item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            queued = self._publish_package_items(item_to_queue)
            if not queued:  # this was only published to subscribers with config.packaged on
                return self.publish(doc=item_to_queue,
                                    target_media_type=SUBSCRIBER_TYPES.DIGITAL)
            else:
                return queued
        elif content_type:
            return self.publish(item_to_queue, None, content_type)
        elif item[ITEM_TYPE] not in [
                CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED
        ]:
            return self.publish(item_to_queue, SUBSCRIBER_TYPES.DIGITAL)
        else:
            return self.publish(item_to_queue, None)

    def _publish_package_items(self, package):
        """Publishes all items of a package recursively then publishes the package itself

        :param package: package to publish
        :param updates: payload
        """
        items = self.package_service.get_residrefs(package)
        subscriber_items = {}
        queued = False
        removed_items = []
        if self.publish_type in ["correct", "kill"]:
            removed_items, added_items = self._get_changed_items(
                items, package)
            # we raise error if correction is done on a empty package. Kill is fine.
            if len(removed_items) == len(items) and len(
                    added_items) == 0 and self.publish_type == "correct":
                raise SuperdeskApiError.badRequestError(
                    _("Corrected package cannot be empty!"))
            items.extend(added_items)

        if items:
            archive_service = get_resource_service("archive")
            for guid in items:
                package_item = archive_service.find_one(req=None, _id=guid)

                if not package_item:
                    raise SuperdeskApiError.badRequestError(
                        _("Package item with id: {guid} has not been published."
                          ).format(guid=guid))

                subscribers, subscriber_codes, associations = self._get_subscribers_for_package_item(
                    package_item)
                package_item_id = package_item[config.ID_FIELD]
                self._extend_subscriber_items(subscriber_items, subscribers,
                                              package_item, package_item_id,
                                              subscriber_codes)

            for removed_id in removed_items:
                package_item = archive_service.find_one(req=None,
                                                        _id=removed_id)
                subscribers, subscriber_codes, associations = self._get_subscribers_for_package_item(
                    package_item)
                package_item_id = None
                self._extend_subscriber_items(subscriber_items, subscribers,
                                              package_item, package_item_id,
                                              subscriber_codes)

            queued = self.publish_package(package,
                                          target_subscribers=subscriber_items)

        return queued

    def _get_changed_items(self, existing_items, package):
        """Returns the added and removed items from existing_items

        :param existing_items: Existing list
        :param updates: Changes
        :return: list of removed items and list of added items
        """
        published_service = get_resource_service("published")
        req = ParsedRequest()
        query = {
            "query": {
                "filtered": {
                    "filter": {
                        "and": [
                            {
                                "terms": {
                                    QUEUE_STATE: [
                                        PUBLISH_STATE.QUEUED,
                                        PUBLISH_STATE.QUEUED_NOT_TRANSMITTED
                                    ]
                                }
                            },
                            {
                                "term": {
                                    "item_id": package["item_id"]
                                }
                            },
                        ]
                    }
                }
            },
            "sort": [{
                "publish_sequence_no": "desc"
            }],
        }
        req.args = {"source": json.dumps(query)}
        req.max_results = 1
        previously_published_packages = published_service.get(req=req,
                                                              lookup=None)

        if not previously_published_packages.count():
            return [], []

        previously_published_package = previously_published_packages[0]

        if "groups" in previously_published_package:
            old_items = self.package_service.get_residrefs(
                previously_published_package)
            added_items = list(set(existing_items) - set(old_items))
            removed_items = list(set(old_items) - set(existing_items))
            return removed_items, added_items
        else:
            return [], []

    def enqueue_item(self, item, content_type=None):
        """Creates the corresponding entries in the publish queue for the given item

        :param item: Item to enqueue
        :param content_type: item content type
        :return bool: True if item is queued else false.
        """
        try:
            return self._enqueue_item(item, content_type)
        except SuperdeskApiError as e:
            raise e
        except KeyError as e:
            raise SuperdeskApiError.badRequestError(message=_(
                "Key is missing on article to be published: {exception}").
                                                    format(exception=str(e)))
        except Exception as e:
            logger.exception(
                "Something bad happened while publishing {}".format(id))
            raise SuperdeskApiError.internalError(
                message=_("Failed to publish the item: {exception}").format(
                    exception=str(e)),
                exception=e)

    def get_subscribers(self, doc, target_media_type):
        """Get subscribers for doc based on target_media_type.

        Override this method in the ArchivePublishService, ArchiveCorrectService and ArchiveKillService

        :param doc: Document to publish/correct/kill
        :param target_media_type: Valid values are - Wire, Digital.
        :return: (list, list) List of filtered subscriber,
                List of subscribers that have not received item previously (empty list in this case).
        """
        raise NotImplementedError()

    def publish(self, doc, target_media_type=None, content_type=None):
        """Queue the content for publishing.

        1. Get the subscribers.
        2. Queue the content for subscribers
        3. Sends notification if no formatter has found for any of the formats configured in Subscriber.
        4. If not queued and not formatters then raise exception.
        5. Publish the content to content api.

        :param dict doc: document to publish
        :param str target_media_type: Valid values are - Wire, Digital.
        :param str content_type: doc content type, None for content
        :return bool: if content is queued then True else False
        :raises PublishQueueError.item_not_queued_error:
                If the nothing is queued.
        """
        sent = False

        # Step 1
        subscribers, subscriber_codes, associations = self.get_subscribers(
            doc, target_media_type)
        # Step 2
        no_formatters, queued = self.queue_transmission(
            deepcopy(doc), subscribers, subscriber_codes, associations, sent)

        # Step 3
        self._push_formatter_notification(doc, no_formatters)

        # Step 4
        if not target_media_type and not queued:
            level = logging.INFO
            if app.config["PUBLISH_NOT_QUEUED_ERROR"] and not app.config.get(
                    "SUPERDESK_TESTING"):
                level = logging.ERROR
            logger.log(
                level,
                "Nothing is saved to publish queue for story: {} for action: {}"
                .format(doc[config.ID_FIELD], self.publish_type),
            )

        # Step 5
        if not content_type:
            self.publish_content_api(
                doc, [s for s in subscribers if s.get("api_enabled")])

        return queued

    def publish_content_api(self, doc, subscribers):
        """
        Publish item to content api
        :param dict doc: content api item
        :param list subscribers: list of subscribers
        """
        try:
            if content_api.is_enabled():
                get_resource_service("content_api").publish(doc, subscribers)
        except Exception:
            logger.exception(
                "Failed to queue item to API for item: {} for action {}".
                format(doc[config.ID_FIELD], self.publish_type))

    def _push_formatter_notification(self, doc, no_formatters=None):
        if no_formatters is None:
            no_formatters = []

        if len(no_formatters) > 0:
            user = get_user()
            push_notification(
                "item:publish:wrong:format",
                item=str(doc[config.ID_FIELD]),
                unique_name=doc.get("unique_name"),
                desk=str(doc.get("task", {}).get("desk", "")),
                user=str(user.get(config.ID_FIELD, "")),
                formats=no_formatters,
            )

    def _get_subscriber_codes(self, subscribers):
        subscriber_codes = {}
        all_products = list(
            get_resource_service("products").get(req=None, lookup=None))

        for subscriber in subscribers:
            codes = self._get_codes(subscriber)
            products = [
                p for p in all_products
                if p[config.ID_FIELD] in subscriber.get("products", [])
            ]

            for product in products:
                codes.extend(self._get_codes(product))
                subscriber_codes[subscriber[config.ID_FIELD]] = list(
                    set(codes))

        return subscriber_codes

    def resend(self, doc, subscribers):
        """Resend doc to subscribers

        :param dict doc: doc to resend
        :param list subscribers: list of subscribers
        :return:
        """
        subscriber_codes = self._get_subscriber_codes(subscribers)
        wire_subscribers = list(self.non_digital(subscribers))
        digital_subscribers = list(self.digital(subscribers))

        for subscriber in wire_subscribers:
            subscriber["api_enabled"] = len(
                subscriber.get("api_products") or []) > 0

        for subscriber in digital_subscribers:
            subscriber["api_enabled"] = len(
                subscriber.get("api_products") or []) > 0

        doc["item_id"] = doc[config.ID_FIELD]
        associations = self._resend_associations_to_subscribers(
            doc, subscribers)
        if len(wire_subscribers) > 0:
            self._resend_to_subscribers(doc, wire_subscribers,
                                        subscriber_codes, associations)
            self.publish_content_api(doc, [
                subscriber for subscriber in wire_subscribers
                if subscriber.get("api_enabled")
            ])

        if len(digital_subscribers) > 0:
            package = None
            self._resend_to_subscribers(doc, digital_subscribers,
                                        subscriber_codes, associations)

            self.publish_content_api(package or doc, [
                subscriber for subscriber in digital_subscribers
                if subscriber.get("api_enabled")
            ])

    def _resend_associations_to_subscribers(self, doc, subscribers):
        """
        On resend association are also sent to the subscribers.
        :param dict doc: item to resend
        :param list subscribers: list of subscribers
        :return dict: associations
        """
        if not doc.get(ASSOCIATIONS):
            return {}

        associations = {}

        for assoc_id, item in doc.get(ASSOCIATIONS).items():
            if not item:
                continue

            item["subscribers"] = []

            for s in subscribers:
                item["subscribers"].append(s.get(config.ID_FIELD))
                if not associations.get(s.get(config.ID_FIELD)):
                    associations[s.get(config.ID_FIELD)] = []

                associations[s.get(config.ID_FIELD)].append(
                    item.get(config.ID_FIELD))
        return associations

    def _resend_to_subscribers(self,
                               doc,
                               subscribers,
                               subscriber_codes,
                               associations=None):
        if associations is None:
            associations = {}
        formatter_messages, queued = self.queue_transmission(
            doc, subscribers, subscriber_codes, associations)
        self._push_formatter_notification(doc, formatter_messages)
        if not queued:
            logger.exception(
                "Nothing is saved to publish queue for story: {} for action: {}"
                .format(doc[config.ID_FIELD], "resend"))

    def publish_package(self, package, target_subscribers):
        """Publishes a given package to given subscribers.

        For each subscriber updates the package definition with the wanted_items for that subscriber
        and removes unwanted_items that doesn't supposed to go that subscriber.
        Text stories are replaced by the digital versions.

        :param package: Package to be published
        :param target_subscribers: List of subscriber and items-per-subscriber
        """
        all_items = self.package_service.get_residrefs(package)
        no_formatters, queued = [], False
        subscribers = []
        for items in target_subscribers.values():
            updated = deepcopy(package)
            subscriber = items["subscriber"]
            codes = items["codes"]
            wanted_items = [
                item for item in items["items"]
                if items["items"].get(item, None)
            ]
            unwanted_items = [
                item for item in all_items if item not in wanted_items
            ]
            for i in unwanted_items:
                still_items_left = self.package_service.remove_ref_from_inmem_package(
                    updated, i)
                if not still_items_left and self.publish_type != "correct":
                    # if nothing left in the package to be published and
                    # if not correcting then don't send the package
                    return
            for key in wanted_items:
                try:
                    self.package_service.replace_ref_in_package(
                        updated, key, items["items"][key])
                except KeyError:
                    continue

            formatters, temp_queued = self.queue_transmission(
                updated, [subscriber], {subscriber[config.ID_FIELD]: codes},
                sent=True)

            subscribers.append(subscriber)
            no_formatters.extend(formatters)
            if temp_queued:
                queued = temp_queued

            delivery_types = [
                d["delivery_type"] for d in self.get_destinations(subscriber)
            ]
            is_content_api_delivery = "content_api" in delivery_types
            # packages for content_api will not be transmitted
            # so we need to publish them here
            if is_content_api_delivery and subscriber.get("api_enabled"):
                self.publish_content_api(package, [subscriber])

        return queued

    def get_destinations(self, subscriber):
        destinations = subscriber.get("destinations") or []
        if subscriber.get("api_enabled"):
            destinations.append({
                "name": "content api",
                "delivery_type": "content_api",
                "format": "ninjs"
            })
        return destinations

    def queue_transmission(self,
                           doc,
                           subscribers,
                           subscriber_codes=None,
                           associations=None,
                           sent=False):
        """Method formats and then queues the article for transmission to the passed subscribers.

        ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once
        based on the format_types configured across for all the subscribers as the formatted item must have a published
        sequence number generated by Subscriber.

        :param dict doc: document to queue for transmission
        :param list subscribers: List of subscriber dict.
        :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False
        """
        if associations is None:
            associations = {}
        if subscriber_codes is None:
            subscriber_codes = {}

        try:
            if config.PUBLISH_ASSOCIATIONS_RESEND and not sent:
                is_correction = doc.get("state") in [
                    "corrected", "being_corrected"
                ]
                is_update = doc.get("rewrite_of")
                is_new = not is_correction and not is_update

                if config.PUBLISH_ASSOCIATIONS_RESEND == "new" and is_new:
                    self.resend_association_items(doc)
                elif config.PUBLISH_ASSOCIATIONS_RESEND == "corrections":
                    self.resend_association_items(doc)
                elif config.PUBLISH_ASSOCIATIONS_RESEND == "updates" and not is_correction:
                    self.resend_association_items(doc)

            queued = False
            no_formatters = []
            for subscriber in subscribers:

                try:
                    if (doc[ITEM_TYPE] not in [
                            CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED
                    ] and subscriber.get("subscriber_type", "")
                            == SUBSCRIBER_TYPES.WIRE):
                        # wire subscribers can get only text and preformatted stories
                        continue

                    for destination in self.get_destinations(subscriber):
                        embed_package_items = doc[
                            ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and (
                                destination.get("config") or {}).get(
                                    "packaged", False)
                        if embed_package_items:
                            doc = self._embed_package_items(doc)

                        if doc.get(PUBLISHED_IN_PACKAGE) and (
                                destination.get("config") or {}).get(
                                    "packaged", False):
                            continue

                        # Step 2(a)
                        formatter = get_formatter(destination["format"], doc)

                        if not formatter:  # if formatter not found then record it
                            no_formatters.append(destination["format"])
                            continue

                        formatter.set_destination(destination, subscriber)
                        formatted_docs = formatter.format(
                            self.filter_document(doc), subscriber,
                            subscriber_codes.get(subscriber[config.ID_FIELD]))

                        for idx, publish_data in enumerate(formatted_docs):
                            if not isinstance(publish_data, dict):
                                pub_seq_num, formatted_doc = publish_data
                                formatted_docs[idx] = {
                                    "published_seq_num": pub_seq_num,
                                    "formatted_item": formatted_doc,
                                }
                            else:
                                assert ("published_seq_num" in publish_data
                                        and "formatted_item" in publish_data
                                        ), "missing keys in publish_data"

                        for publish_queue_item in formatted_docs:
                            publish_queue_item["item_id"] = doc["item_id"]
                            publish_queue_item["item_version"] = doc[
                                config.VERSION]
                            publish_queue_item["subscriber_id"] = subscriber[
                                config.ID_FIELD]
                            publish_queue_item["codes"] = subscriber_codes.get(
                                subscriber[config.ID_FIELD])
                            publish_queue_item["destination"] = destination
                            # publish_schedule is just to indicate in the queue item is create via scheduled item
                            publish_queue_item[
                                PUBLISH_SCHEDULE] = get_utc_schedule(
                                    doc, PUBLISH_SCHEDULE) or None
                            publish_queue_item["unique_name"] = doc.get(
                                "unique_name", None)
                            publish_queue_item["content_type"] = doc.get(
                                "type", None)
                            publish_queue_item["headline"] = doc.get(
                                "headline", None)
                            publish_queue_item[
                                "publishing_action"] = self.published_state
                            publish_queue_item["ingest_provider"] = (
                                ObjectId(doc.get("ingest_provider"))
                                if doc.get("ingest_provider") else None)
                            publish_queue_item[
                                "associated_items"] = associations.get(
                                    subscriber[config.ID_FIELD], [])
                            publish_queue_item["priority"] = subscriber.get(
                                "priority")

                            if doc.get(PUBLISHED_IN_PACKAGE):
                                publish_queue_item[PUBLISHED_IN_PACKAGE] = doc[
                                    PUBLISHED_IN_PACKAGE]
                            try:
                                encoded_item = publish_queue_item.pop(
                                    "encoded_item")
                            except KeyError:
                                pass
                            else:
                                binary = io.BytesIO(encoded_item)
                                publish_queue_item[
                                    "encoded_item_id"] = app.storage.put(
                                        binary)
                            publish_queue_item.pop(ITEM_STATE, None)

                            # content api delivery will be marked as SUCCESS in queue
                            get_resource_service("publish_queue").post(
                                [publish_queue_item])
                            queued = True

                except Exception:
                    logger.exception(
                        "Failed to queue item for id {} with headline {} for subscriber {}."
                        .format(doc.get(config.ID_FIELD), doc.get("headline"),
                                subscriber.get("name")))

            return no_formatters, queued
        except Exception:
            raise

    def get_unique_associations(self, associated_items):
        """This method is used for the removing duplicate associate items
        :param dict associated_items: all the associate item
        """
        associations = {}
        for association in associated_items.values():
            if not association:
                continue
            item_id = association.get("_id")
            if item_id and item_id not in associations.keys():
                associations[item_id] = association
        return associations.values()

    def resend_association_items(self, doc):
        """This method is used to resend assciation items.
        :param dict doc: document
        """
        associated_items = doc.get(ASSOCIATIONS)
        if associated_items:
            for association in self.get_unique_associations(associated_items):
                # resend only media association

                if association.get(
                        "type") not in MEDIA_TYPES or association.get(
                            "is_queued"):
                    continue

                archive_article = get_resource_service("archive").find_one(
                    req=None, _id=association.get("_id"))
                if not archive_article:
                    continue

                associated_article = get_resource_service(
                    "published").find_one(
                        req=None,
                        item_id=archive_article["_id"],
                        _current_version=archive_article["_current_version"])
                if associated_article and associated_article.get(
                        "state") not in ["unpublished", "killed"]:
                    from apps.publish.enqueue import get_enqueue_service

                    get_enqueue_service(associated_article.get(
                        "operation")).publish(associated_article)

    def _embed_package_items(self, package):
        """Embeds all package items in the package document."""
        for group in package.get(GROUPS, []):
            if group[GROUP_ID] == ROOT_GROUP:
                continue
            for ref in group[REFS]:
                if RESIDREF not in ref:
                    continue
                package_item = get_resource_service("published").find_one(
                    req=None,
                    item_id=ref[RESIDREF],
                    _current_version=ref[config.VERSION])
                if not package_item:
                    msg = _(
                        "Can not find package {package} published item {item}"
                    ).format(package=package["item_id"], item=ref["residRef"])
                    raise SuperdeskPublishError(500, msg)
                package_item[config.ID_FIELD] = package_item["item_id"]
                ref["package_item"] = package_item
        return package

    def _get_subscribers_for_package_item(self, package_item):
        """Finds the list of subscribers for a given item in a package

        :param package_item: item in a package
        :return list: List of subscribers
        """
        query = {
            "$and": [{
                "item_id": package_item[config.ID_FIELD]
            }, {
                "publishing_action": package_item[ITEM_STATE]
            }]
        }

        return self._get_subscribers_for_previously_sent_items(query)

    def _get_subscribers_for_previously_sent_items(self, lookup):
        """Returns list of subscribers that have previously received the item.

        :param dict lookup: elastic query to filter the publish queue
        :return: list of subscribers and list of product codes per subscriber
        """
        req = ParsedRequest()
        subscribers = []
        subscriber_codes = {}
        associations = {}
        queued_items = list(
            get_resource_service("publish_queue").get(req=req, lookup=lookup))

        if len(queued_items) > 0:
            subscriber_ids = {}
            for queue_item in queued_items:
                subscriber_id = queue_item["subscriber_id"]
                if not subscriber_ids.get(subscriber_id):
                    subscriber_ids[subscriber_id] = False
                    if queue_item.get(
                            "destination",
                        {}).get("delivery_type") == "content_api":
                        subscriber_ids[subscriber_id] = True

                subscriber_codes[subscriber_id] = queue_item.get("codes", [])
                if queue_item.get("associated_items"):
                    associations[subscriber_id] = list(
                        set(associations.get(subscriber_id, []))
                        | set(queue_item.get("associated_items", [])))

            query = {
                "$and": [{
                    config.ID_FIELD: {
                        "$in": list(subscriber_ids.keys())
                    }
                }]
            }
            subscribers = list(
                get_resource_service("subscribers").get(req=None,
                                                        lookup=query))
            for s in subscribers:
                s["api_enabled"] = subscriber_ids.get(s.get(config.ID_FIELD))

        return subscribers, subscriber_codes, associations

    def filter_subscribers(self, doc, subscribers, target_media_type):
        """Filter subscribers to whom the current document is going to be delivered.

        :param doc: Document to publish/kill/correct
        :param subscribers: List of Subscribers that might potentially get this document
        :param target_media_type: Valid values are - Wire, Digital.
        :return: List of of filtered subscribers and list of product codes per subscriber.
        """
        filtered_subscribers = []
        subscriber_codes = {}
        existing_products = {
            p[config.ID_FIELD]: p
            for p in list(
                get_resource_service("products").get(req=None, lookup=None))
        }
        global_filters = deepcopy([
            gf["cf"]
            for gf in self.filters.get("content_filters", {}).values()
            if gf["cf"].get("is_global", True)
        ])

        # apply global filters
        self.conforms_global_filter(global_filters, doc)

        for subscriber in subscribers:
            if target_media_type and subscriber.get(
                    "subscriber_type", "") != SUBSCRIBER_TYPES.ALL:
                can_send_digital = subscriber[
                    "subscriber_type"] == SUBSCRIBER_TYPES.DIGITAL
                if (target_media_type == SUBSCRIBER_TYPES.WIRE
                        and can_send_digital
                        or target_media_type == SUBSCRIBER_TYPES.DIGITAL
                        and not can_send_digital):
                    continue

            conforms, skip_filters = self.conforms_subscriber_targets(
                subscriber, doc)
            if not conforms:
                continue

            if not self.conforms_subscriber_global_filter(
                    subscriber, global_filters):
                continue

            product_codes = self._get_codes(subscriber)
            subscriber_added = False
            subscriber["api_enabled"] = False
            # validate against direct products
            result, codes = self._validate_article_for_subscriber(
                doc, subscriber.get("products"), existing_products)
            if result:
                product_codes.extend(codes)
                if not subscriber_added:
                    filtered_subscribers.append(subscriber)
                    subscriber_added = True

            if content_api.is_enabled():
                # validate against api products
                result, codes = self._validate_article_for_subscriber(
                    doc, subscriber.get("api_products"), existing_products)
                if result:
                    product_codes.extend(codes)
                    subscriber["api_enabled"] = True
                    if not subscriber_added:
                        filtered_subscribers.append(subscriber)
                        subscriber_added = True

            if skip_filters and not subscriber_added:
                # if targeted subscriber and has api products then send it to api.
                if subscriber.get("api_products"):
                    subscriber["api_enabled"] = True
                filtered_subscribers.append(subscriber)
                subscriber_added = True

            # unify the list of codes by removing duplicates
            if subscriber_added:
                subscriber_codes[subscriber[config.ID_FIELD]] = list(
                    set(product_codes))

        return filtered_subscribers, subscriber_codes

    def _validate_article_for_subscriber(self, doc, products,
                                         existing_products):
        """Validate the article for subscriber

        :param dict doc: Document to be validated
        :param list products: list of product ids
        :param dict existing_products: Product lookup
        :return tuple bool, list: Boolean flag to add subscriber or not and list of product codes.
        """
        add_subscriber, product_codes = False, []

        if not products:
            return add_subscriber, product_codes

        for product_id in products:
            # check if the product filter conforms with the story
            product = existing_products.get(product_id)

            if not product:
                continue

            if not self.conforms_product_targets(product, doc):
                continue

            if self.conforms_content_filter(product, doc):
                # gather the codes of products
                product_codes.extend(self._get_codes(product))
                add_subscriber = True

        return add_subscriber, product_codes

    def _filter_subscribers_for_associations(self, subscribers, doc,
                                             target_media_type,
                                             existing_associations):
        """Filter the subscriber for associations.

        :param list subscribers: list of subscriber that are going to receive parent item.
        :param dict doc: item with associations
        :param dict existing_associations: existing associations
        :param target_media_type: Valid values are - Wire, Digital.
        """
        associations = {}

        if not doc.get(ASSOCIATIONS) or not subscribers:
            return associations

        for assoc, item in doc.get(ASSOCIATIONS).items():
            if not item:
                continue

            assoc_subscribers = set()
            assoc_id = item.get(config.ID_FIELD)
            filtered_subscribers, subscriber_codes = self.filter_subscribers(
                item, deepcopy(subscribers), target_media_type)

            for subscriber in filtered_subscribers:
                # for the validated subscribers
                subscriber_id = subscriber.get(config.ID_FIELD)
                if not associations.get(subscriber_id):
                    associations[subscriber_id] = []

                associations[subscriber_id].append(assoc_id)
                assoc_subscribers.add(subscriber_id)

            for subscriber_id, items in existing_associations.items():
                # for the not validated associated item but previously published to the subscriber.
                if assoc_id in items and assoc_id not in associations.get(
                        subscriber_id, []):
                    if not associations.get(subscriber_id):
                        associations[subscriber_id] = []

                    associations[subscriber_id].append(assoc_id)
                    assoc_subscribers.add(subscriber_id)

            item["subscribers"] = list(assoc_subscribers)

        return associations

    def _update_associations(self, original, updates):
        """Update the associations

        :param dict original: original item
        :param dict updates: updates item
        """
        if not updates:
            return

        for subscriber, items in updates.items():
            if items:
                original[subscriber] = list(
                    set(original.get(subscriber, []))
                    | set(updates.get(subscriber, [])))

    def conforms_product_targets(self, product, article):
        """Check product targets.

        Checks if the given article has any target information and if it does
        it checks if the product satisfies any of the target information

        :param product: Product to test
        :param article: article
        :return:
            bool: True if the article conforms the targets for the given product
        """
        geo_restrictions = product.get("geo_restrictions")

        # If not targeted at all then Return true
        if not BasePublishService().is_targeted(article, "target_regions"):
            return geo_restrictions is None

        if geo_restrictions:
            for region in article.get("target_regions", []):
                if region["qcode"] == geo_restrictions and region["allow"]:
                    return True
                if region["qcode"] != geo_restrictions and not region["allow"]:
                    return True
        return False

    def conforms_subscriber_targets(self, subscriber, article):
        """Check subscriber targets.

        Checks if the given article has any target information and if it does
        it checks if the subscriber satisfies any of the target information

        :param subscriber: Subscriber to test
        :param article: article
        :return:
            bool: True/False if the article conforms the targets
            bool: True if the given subscriber is specifically targeted, False otherwise
        """
        # If not targeted at all then Return true
        if not BasePublishService().is_targeted(
                article,
                "target_subscribers") and not BasePublishService().is_targeted(
                    article, "target_types"):
            return True, False

        subscriber_type = subscriber.get("subscriber_type")

        for t in article.get("target_subscribers", []):
            if str(t.get("_id")) == str(subscriber["_id"]):
                return True, True

        if subscriber_type:
            for t in article.get("target_types", []):
                if t["qcode"] == subscriber_type and t["allow"]:
                    return True, False
                if t["qcode"] != subscriber_type and not t["allow"]:
                    return True, False

        # If there's a region target then continue with the subscriber to check products
        if BasePublishService().is_targeted(article, "target_regions"):
            return True, False

        # Nothing matches so this subscriber doesn't conform
        return False, False

    def conforms_content_filter(self, product, doc):
        """Checks if the document matches the subscriber filter

        :param product: Product where the filter is used
        :param doc: Document to test the filter against
        :return:
        True if there's no filter
        True if matches and permitting
        False if matches and blocking
        False if doesn't match and permitting
        True if doesn't match and blocking
        """
        content_filter = product.get("content_filter")

        if content_filter is None or "filter_id" not in content_filter or content_filter[
                "filter_id"] is None:
            return True

        service = get_resource_service("content_filters")
        filter = self.filters.get("content_filters",
                                  {}).get(content_filter["filter_id"],
                                          {}).get("cf")
        does_match = service.does_match(filter, doc, self.filters)

        if does_match:
            return content_filter["filter_type"] == "permitting"
        else:
            return content_filter["filter_type"] == "blocking"

    def conforms_global_filter(self, global_filters, doc):
        """Check global filter

        Checks if document matches the global filter

        :param global_filters: List of all global filters
        :param doc: Document to test the global filter against
        """
        service = get_resource_service("content_filters")
        for global_filter in global_filters:
            global_filter["does_match"] = service.does_match(
                global_filter, doc, self.filters)

    def conforms_subscriber_global_filter(self, subscriber, global_filters):
        """Check global filter for subscriber

        Checks if subscriber has a override rule against each of the
        global filter and if not checks if document matches the global filter

        :param subscriber: Subscriber to get if the global filter is overriden
        :param global_filters: List of all global filters
        :return: True if at least one global filter is not overriden
        and it matches the document
        False if global filter matches the document or all of them overriden
        """

        gfs = subscriber.get("global_filters", {})
        for global_filter in global_filters:
            if gfs.get(str(global_filter[config.ID_FIELD]), True):
                # Global filter applies to this subscriber
                if global_filter.get("does_match"):
                    return False
        return True

    def _extend_subscriber_items(self, subscriber_items, subscribers, item,
                                 package_item_id, subscriber_codes):
        """Extends the subscriber_items with the given list of subscribers for the item

        :param subscriber_items: The existing list of subscribers
        :param subscribers: New subscribers that item has been published to - to be added
        :param item: item that has been published
        :param package_item_id: package_item_id
        """
        item_id = item[config.ID_FIELD]
        for subscriber in subscribers:
            sid = subscriber[config.ID_FIELD]
            item_list = subscriber_items.get(sid, {}).get("items", {})
            item_list[item_id] = package_item_id
            subscriber_items[sid] = {
                "subscriber": subscriber,
                "items": item_list,
                "codes": subscriber_codes.get(sid, []),
            }

    def _get_codes(self, item):
        if item.get("codes"):
            return [c.strip() for c in item.get("codes").split(",") if c]
        else:
            return []

    @staticmethod
    def filter_document(doc):
        """
        Filter document:
        1. Remove fields that should not be there given it's profile.
        2. Remove `None` valued renditions.

        :param dict doc: document to filter
        :return: dict filtered document
        """

        # remove fields that should not be there given it's profile.
        doc = apply_schema(doc)

        # remove `None` valued renditions.
        for association_key in doc.get(ASSOCIATIONS, {}):
            association = doc[ASSOCIATIONS][association_key]
            if not association:
                continue

            renditions = association.get("renditions", {})
            for null_rendition_key in [
                    k for k in renditions if not renditions[k]
            ]:
                del doc[ASSOCIATIONS][association_key]["renditions"][
                    null_rendition_key]

        return doc
Esempio n. 12
0
class BasePublishService(BaseService):
    """
    Base service class for "publish" endpoint
    """

    publish_type = 'publish'
    published_state = 'published'

    non_digital = partial(filter, lambda s: s.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE)
    digital = partial(filter, lambda s: (s.get('subscriber_type', '') in {SUBSCRIBER_TYPES.DIGITAL,
                                                                          SUBSCRIBER_TYPES.ALL}))
    takes_package_service = TakesPackageService()
    package_service = PackageService()

    def raise_if_not_marked_for_publication(self, original):
        if original.get('flags', {}).get('marked_for_not_publication', False):
            raise SuperdeskApiError.badRequestError('Cannot publish an item which is marked as Not for Publication')

    def raise_if_invalid_state_transition(self, original):
        if not is_workflow_state_transition_valid(self.publish_type, original[ITEM_STATE]):
            error_message = "Can't {} as item state is {}" if original[ITEM_TYPE] == CONTENT_TYPE.TEXT else \
                "Can't {} as either package state or one of the items state is {}"
            raise InvalidStateTransitionError(error_message.format(self.publish_type, original[ITEM_STATE]))

    def on_update(self, updates, original):
        self.raise_if_not_marked_for_publication(original)
        self.raise_if_invalid_state_transition(original)

        updated = original.copy()
        updated.update(updates)

        takes_package = self.takes_package_service.get_take_package(original)

        if self.publish_type == 'publish':
            # validate if take can be published
            if takes_package and not self.takes_package_service.can_publish_take(
                    takes_package, updates.get(SEQUENCE, original.get(SEQUENCE, 1))):
                raise PublishQueueError.previous_take_not_published_error(
                    Exception("Previous takes are not published."))

            validate_schedule(updated.get('publish_schedule'), takes_package.get(SEQUENCE, 1) if takes_package else 1)

            if original[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE and updates.get(EMBARGO):
                get_resource_service(ARCHIVE).validate_embargo(updated)

        if self.publish_type in ['correct', 'kill']:
            if updates.get(EMBARGO):
                raise SuperdeskApiError.badRequestError("Embargo can't be set after publishing")

            if updates.get('dateline'):
                raise SuperdeskApiError.badRequestError("Dateline can't be modified after publishing")

        validate_item = {'act': self.publish_type, 'type': original['type'], 'validate': updated}
        validation_errors = get_resource_service('validate').post([validate_item])
        if validation_errors[0]:
            raise ValidationError(validation_errors)

        # validate the package if it is one
        package_validation_errors = []
        self._validate_package_contents(original, takes_package, package_validation_errors)
        if len(package_validation_errors) > 0:
            raise ValidationError(package_validation_errors)

        self._set_updates(original, updates, updates.get(config.LAST_UPDATED, utcnow()))
        updates[ITEM_OPERATION] = ITEM_PUBLISH
        convert_task_attributes_to_objectId(updates)

    def on_updated(self, updates, original):
        self.update_published_collection(published_item_id=original[config.ID_FIELD])
        original = get_resource_service(ARCHIVE).find_one(req=None, _id=original[config.ID_FIELD])
        updates.update(original)
        user = get_user()

        if updates[ITEM_OPERATION] != ITEM_KILL and \
                original.get(ITEM_TYPE) in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]:
            get_resource_service('archive_broadcast').on_broadcast_master_updated(updates[ITEM_OPERATION], original)

        get_resource_service('archive_broadcast').reset_broadcast_status(updates, original)
        push_notification('item:updated', item=str(original[config.ID_FIELD]), user=str(user.get(config.ID_FIELD)))
        self._import_into_legal_archive(updates)

    def update(self, id, updates, original):
        """
        Handles workflow of each Publish, Corrected and Killed.
        """
        try:
            user = get_user()
            last_updated = updates.get(config.LAST_UPDATED, utcnow())
            auto_publish = updates.pop('auto_publish', False)

            if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                self._publish_package_items(original, updates)

            queued_digital = False
            package = None

            if original[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE:
                # if target_for is set the we don't to digital client.
                if not (updates.get('targeted_for', original.get('targeted_for')) or
                        is_genre(original, BROADCAST_GENRE)):
                    # check if item is in a digital package
                    package = self.takes_package_service.get_take_package(original)

                    if package:
                        queued_digital = self._publish_takes_package(package, updates, original, last_updated)
                    else:
                        '''
                        If type of the item is text or preformatted
                        then item need to be sent to digital subscribers.
                        So, package the item as a take.
                        '''
                        updated = copy(original)
                        updated.update(updates)

                        if original[ITEM_TYPE] in {CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED} and \
                                self.sending_to_digital_subscribers(updated):
                            # create a takes package
                            package_id = self.takes_package_service.package_story_as_a_take(updated, {}, None)
                            updates[LINKED_IN_PACKAGES] = updated[LINKED_IN_PACKAGES]
                            package = get_resource_service(ARCHIVE).find_one(req=None, _id=package_id)
                            queued_digital = self._publish_takes_package(package, updates, original, last_updated)

                # queue only text items
                media_type = None
                updated = deepcopy(original)
                updated.update(updates)
                if package:
                    media_type = SUBSCRIBER_TYPES.WIRE

                queued_wire = self.publish(doc=original, updates=updates, target_media_type=media_type)

                queued = queued_digital or queued_wire
                if not queued:
                    logger.exception('Nothing is saved to publish queue for story: {} for action: {}'.
                                     format(original[config.ID_FIELD], self.publish_type))

            self._update_archive(original=original, updates=updates, should_insert_into_versions=auto_publish)
            push_notification('item:publish', item=str(id), unique_name=original['unique_name'],
                              desk=str(original.get('task', {}).get('desk', '')),
                              user=str(user.get(config.ID_FIELD, '')))
        except SuperdeskApiError as e:
            raise e
        except KeyError as e:
            raise SuperdeskApiError.badRequestError(
                message="Key is missing on article to be published: {}".format(str(e)))
        except Exception as e:
            logger.exception("Something bad happened while publishing %s".format(id))
            raise SuperdeskApiError.internalError(message="Failed to publish the item: {}".format(str(e)))

    def _publish_takes_package(self, package, updates, original, last_updated):
        """
        Process the takes to form digital master file content and publish.
        :param dict package: Takes package
        :param dict updates: updates for the take
        :param dict original: original takes
        :param datetime.datetime last_updated: datetime for the updates
        :return bool: boolean flag indicating takes package is queued or not
        """

        package_updates = self.process_takes(updates_of_take_to_be_published=updates,
                                             original_of_take_to_be_published=original,
                                             package=package)

        self._set_updates(package, package_updates, last_updated)
        package_updates.setdefault(ITEM_OPERATION, updates.get(ITEM_OPERATION, ITEM_PUBLISH))
        self._update_archive(package, package_updates)
        '''
        When embargo is lapsed and the article should go to Digital Subscribers the BasePublishService creates a
        Takes Package whose state is draft. In this case, we can't initiate post-publish actions on the Takes Package as
        the package hasn't been published. And post-publish service's get_subscribers() will return empty list.
        Also, logically without publishing a package post-publish actions on the item doesn't make sense.
        That's the reason checking the Takes Package state and invoking the appropriate Publish Service.
        '''
        if package[ITEM_STATE] in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]:
            package.update(package_updates)
            queued_digital = self.publish(doc=package, updates=None, target_media_type=SUBSCRIBER_TYPES.DIGITAL)
        else:
            package.update(package_updates)
            queued_digital = get_resource_service('archive_publish').publish(doc=package,
                                                                             updates=None,
                                                                             target_media_type=SUBSCRIBER_TYPES.DIGITAL)

        self.update_published_collection(published_item_id=package[config.ID_FIELD])
        self._import_into_legal_archive(package)
        return queued_digital

    def _import_into_legal_archive(self, doc):
        """
        Import into legal archive async
        :param {dict} doc: document to be imported
        """

        if doc.get(ITEM_STATE) != CONTENT_STATE.SCHEDULED:
            kwargs = {
                'doc': doc
            }
            import_into_legal_archive.apply_async(kwargs=kwargs)

    def _publish_package_items(self, package, updates):
        """
        Publishes all items of a package recursively then publishes the package itself
        :param package: package to publish
        :param updates: payload
        """
        items = self.package_service.get_residrefs(package)

        if len(items) == 0 and self.publish_type == ITEM_PUBLISH:
            raise SuperdeskApiError.badRequestError("Empty package cannot be published!")

        removed_items = []
        if self.publish_type in [ITEM_CORRECT, ITEM_KILL]:
            removed_items, added_items = self._get_changed_items(items, updates)
            # we raise error if correction is done on a empty package. Kill is fine.
            if len(removed_items) == len(items) and len(added_items) == 0 and self.publish_type == ITEM_CORRECT:
                raise SuperdeskApiError.badRequestError("Corrected package cannot be empty!")
            items.extend(added_items)

        subscriber_items = {}

        if items:
            archive_publish = get_resource_service('archive_publish')
            for guid in items:
                package_item = super().find_one(req=None, _id=guid)

                if not package_item:
                    raise SuperdeskApiError.badRequestError(
                        "Package item with id: {} does not exist.".format(guid))

                if package_item[ITEM_STATE] not in PUBLISH_STATES:  # if the item is not published then publish it
                    if package_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                        # if the item is a package do recursion to publish
                        sub_updates = {i: updates[i] for i in ['state', 'operation'] if i in updates}
                        sub_updates['groups'] = list(package_item['groups'])
                        self._publish_package_items(package_item, sub_updates)
                        self._update_archive(original=package_item, updates=sub_updates,
                                             should_insert_into_versions=False)
                        self.update_published_collection(published_item_id=package_item[config.ID_FIELD])
                    else:
                        # publish the item
                        archive_publish.patch(id=package_item.pop(config.ID_FIELD), updates=package_item)

                    insert_into_versions(id_=guid)

                elif guid in removed_items:
                    # remove the package information from the package item.
                    linked_in_packages = [linked for linked in package_item.get(LINKED_IN_PACKAGES)
                                          if linked.get(PACKAGE) != package.get(config.ID_FIELD)]
                    super().system_update(guid, {LINKED_IN_PACKAGES: linked_in_packages}, package_item)

                package_item = super().find_one(req=None, _id=guid)
                subscribers = self._get_subscribers_for_package_item(package_item)
                self.package_service.update_field_in_package(updates, package_item[config.ID_FIELD],
                                                             config.VERSION, package_item[config.VERSION])

                if package_item[config.ID_FIELD] in removed_items:
                    digital_item_id = None
                else:
                    digital_item_id = self._get_digital_id_for_package_item(package_item)

                self._extend_subscriber_items(subscriber_items, subscribers, package_item, digital_item_id)

            self.publish_package(package, updates, target_subscribers=subscriber_items)

    def _extend_subscriber_items(self, subscriber_items, subscribers, item, digital_item_id):
        """
        Extends the subscriber_items with the given list of subscribers for the item
        :param subscriber_items: The existing list of subscribers
        :param subscribers: New subscribers that item has been published to - to be added
        :param item: item that has been published
        :param digital_item_id: digital_item_id
        """
        item_id = item[config.ID_FIELD]
        for subscriber in subscribers:
            sid = subscriber[config.ID_FIELD]
            item_list = subscriber_items.get(sid, {}).get('items', {})
            item_list[item_id] = digital_item_id
            subscriber_items[sid] = {'subscriber': subscriber, 'items': item_list}

    def _get_changed_items(self, existing_items, updates):
        """
        Returns the added and removed items from existing_items
        :param existing_items: Existing list
        :param updates: Changes
        :return: list of removed items and list of added items
        """
        if 'groups' in updates:
            new_items = self.package_service.get_residrefs(updates)
            removed_items = list(set(existing_items) - set(new_items))
            added_items = list(set(new_items) - set(existing_items))
            return removed_items, added_items
        else:
            return [], []

    def _get_digital_id_for_package_item(self, package_item):
        """
        Finds the digital item id for a given item in a package
        :param package_item: item in a package
        :return string: Digital item id if there's one otherwise id of package_item
        """
        if package_item[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]:
            return package_item[config.ID_FIELD]
        else:
            package_item_takes_package_id = self.takes_package_service.get_take_package_id(package_item)
            if not package_item_takes_package_id:
                return package_item[config.ID_FIELD]
            return package_item_takes_package_id

    def _get_subscribers_for_package_item(self, package_item):
        """
        Finds the list of subscribers for a given item in a package
        :param package_item: item in a package
        :return list: List of subscribers
        :return string: Digital item id if there's one otherwise None
        """
        if package_item[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]:
            query = {'$and': [{'item_id': package_item[config.ID_FIELD]},
                              {'publishing_action': package_item[ITEM_STATE]}]}
        else:
            package_item_takes_package = self.takes_package_service.get_take_package(package_item)
            if not package_item_takes_package:
                # this item has not been published to digital subscribers so
                # the list of subscribers are empty
                return []

            query = {'$and': [{'item_id': package_item_takes_package[config.ID_FIELD]},
                              {'publishing_action': package_item_takes_package[ITEM_STATE]}]}

        return self._get_subscribers_for_previously_sent_items(query)

    def _set_updates(self, original, updates, last_updated):
        """
        Sets config.VERSION, config.LAST_UPDATED, ITEM_STATE in updates document.
        If item is being published and embargo is available then append Editorial Note with 'Embargoed'.

        :param dict original: original document
        :param dict updates: updates related to the original document
        :param datetime last_updated: datetime of the updates.
        """

        self.set_state(original, updates)
        updates.setdefault(config.LAST_UPDATED, last_updated)

        if original[config.VERSION] == updates.get(config.VERSION, original[config.VERSION]):
            resolve_document_version(document=updates, resource=ARCHIVE, method='PATCH', latest_doc=original)

        if updates.get(EMBARGO, original.get(EMBARGO)) \
                and updates.get('ednote', original.get('ednote', '')).find('Embargo') == -1:
            updates['ednote'] = '{} {}'.format(original.get('ednote', ''), 'Embargoed.').strip()

    def _update_archive(self, original, updates, versioned_doc=None, should_insert_into_versions=True):
        """
        Updates the articles into archive collection and inserts the latest into archive_versions.
        Also clears autosaved versions if any.
        :param: versioned_doc: doc which can be inserted into archive_versions
        :param: should_insert_into_versions if True inserts the latest document into versions collection
        """

        self.backend.update(self.datasource, original[config.ID_FIELD], updates, original)

        if should_insert_into_versions:
            if versioned_doc is None:
                insert_into_versions(id_=original[config.ID_FIELD])
            else:
                insert_into_versions(doc=versioned_doc)

        get_component(ItemAutosave).clear(original[config.ID_FIELD])

    def set_state(self, original, updates):
        """
        Set the state of the document based on the action (publish, correction, kill)
        :param dict original: original document
        :param dict updates: updates related to document
        """
        updates['publish_schedule'] = None
        updates[ITEM_STATE] = self.published_state

    def process_takes(self, updates_of_take_to_be_published, package, original_of_take_to_be_published=None):
        """
        Primary rule for publishing a Take in Takes Package is: all previous takes must be published before a take
        can be published.

        Also, generates body_html of the takes package and make sure the metadata for the package is the same as the
        metadata of the take to be published.

        :param dict updates_of_take_to_be_published: updates for the take to be published
        :param dict package: Takes package to publish
        :param dict original_of_take_to_be_published: original of the take to be published
        :return: Takes Package Updates
        """

        takes = self.takes_package_service.get_published_takes(package)
        body_html = updates_of_take_to_be_published.get('body_html',
                                                        original_of_take_to_be_published.get('body_html', ''))
        package_updates = {}

        groups = package.get(GROUPS, [])
        if groups:
            take_refs = [ref for group in groups if group['id'] == 'main' for ref in group.get('refs')]
            sequence_num_of_take_to_be_published = 0

            take_article_id = updates_of_take_to_be_published.get(
                config.ID_FIELD, original_of_take_to_be_published[config.ID_FIELD])

            for r in take_refs:
                if r[GUID_FIELD] == take_article_id:
                    sequence_num_of_take_to_be_published = r[SEQUENCE]
                    break

            if takes and self.published_state != 'killed':
                body_html_list = [take.get('body_html', '') for take in takes]
                if self.published_state == 'published':
                    body_html_list.append(body_html)
                else:
                    body_html_list[sequence_num_of_take_to_be_published - 1] = body_html

                package_updates['body_html'] = '<br>'.join(body_html_list)
            else:
                package_updates['body_html'] = body_html

            metadata_tobe_copied = self.takes_package_service.fields_for_creating_take.copy()
            metadata_tobe_copied.extend(['publish_schedule', 'byline'])
            updated_take = original_of_take_to_be_published.copy()
            updated_take.update(updates_of_take_to_be_published)
            metadata_from = updated_take
            if self.published_state == 'corrected' and len(takes) > 1:
                # get the last take metadata only if there are more than one takes
                metadata_from = takes[-1]

            for metadata in metadata_tobe_copied:
                if metadata in metadata_from:
                    package_updates[metadata] = metadata_from.get(metadata)

            package_updates[GROUPS] = groups
            self.package_service.update_field_in_package(package_updates,
                                                         original_of_take_to_be_published[config.ID_FIELD],
                                                         config.VERSION,
                                                         updates_of_take_to_be_published[config.VERSION])

        return package_updates

    def publish_package(self, package, updates, target_subscribers):
        """
        Publishes a given non-take package to given subscribers.
        For each subscriber updates the package definition with the wanted_items for that subscriber
        and removes unwanted_items that doesn't supposed to go that subscriber.
        Text stories are replaced by the digital versions.
        :param package: Package to be published
        :param updates: Updates to the package
        :param target_subscribers: List of subscriber and items-per-subscriber
        """
        self._process_publish_updates(package, updates)
        all_items = self.package_service.get_residrefs(package)
        for items in target_subscribers.values():
            updated = deepcopy(package)
            updates_copy = deepcopy(updates)
            updated.update(updates_copy)
            subscriber = items['subscriber']
            wanted_items = [item for item in items['items'] if items['items'].get(item, None)]
            unwanted_items = [item for item in all_items if item not in wanted_items]
            for i in unwanted_items:
                still_items_left = self.package_service.remove_ref_from_inmem_package(updated, i)
                if not still_items_left and self.publish_type != 'correct':
                    # if nothing left in the package to be published and
                    # if not correcting then don't send the package
                    return
            for key in wanted_items:
                self.package_service.replace_ref_in_package(updated, key, items['items'][key])
            self.queue_transmission(updated, [subscriber])

    def _process_publish_updates(self, doc, updates):
        """ Common updates for published items """
        desk = None
        if doc.get('task', {}).get('desk'):
            desk = get_resource_service('desks').find_one(req=None, _id=doc['task']['desk'])
        if not doc.get('ingest_provider'):
            updates['source'] = desk['source'] if desk and desk.get('source', '') \
                else DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES
        updates['pubstatus'] = PUB_STATUS.CANCELED if self.publish_type == 'kill' else PUB_STATUS.USABLE

    def publish(self, doc, updates, target_media_type=None):
        """
        Queue the content for publishing.
        1. Sets the Metadata Properties - source and pubstatus
        2. Get the subscribers.
        3. Update the headline of wire stories with the sequence
        4. Queue the content for subscribers
        5. Queue the content for previously published subscribers if any.
        6. Sends notification if no formatter has found for any of the formats configured in Subscriber.
        7. If not queued and not formatters then raise exception.
        :param dict doc: document to publish
        :param dict updates: updates for the document
        :param str target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article.
                Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire
                then the doc being queues is an Individual Article.
        :param dict target_subscribers: list of subscribers that document needs to get sent
        :return bool: if content is queued then True else False
        :raises PublishQueueError.item_not_queued_error:
                If the nothing is queued.
        """

        queued = True
        no_formatters = []
        updated = doc.copy()

        # Step 1
        if updates:
            self._process_publish_updates(doc, updates)
            updated.update(updates)

        # Step 2
        subscribers, subscribers_yet_to_receive = self.get_subscribers(doc, target_media_type)

        # Step 3
        if target_media_type == SUBSCRIBER_TYPES.WIRE:
            self._update_headline_sequence(updated)

        # Step 4
        no_formatters, queued = self.queue_transmission(updated, subscribers)

        # Step 5
        if subscribers_yet_to_receive:
            formatters_not_found, queued_new_subscribers = self.queue_transmission(updated, subscribers_yet_to_receive)
            no_formatters.extend(formatters_not_found)
            queued = queued or queued_new_subscribers

        # Step 6
        user = get_user()
        if len(no_formatters) > 0:
            push_notification('item:publish:wrong:format',
                              item=str(doc[config.ID_FIELD]), unique_name=doc['unique_name'],
                              desk=str(doc.get('task', {}).get('desk', '')),
                              user=str(user.get(config.ID_FIELD, '')),
                              formats=no_formatters)

        # Step 7
        if not target_media_type and not queued:
            logger.exception('Nothing is saved to publish queue for story: {} for action: {}'.
                             format(doc[config.ID_FIELD], self.publish_type))

        return queued

    def sending_to_digital_subscribers(self, doc):
        """
        Returns False if item has embargo and is in future.
        Returns True if there is a digital subscriber either in the previously sent or in yet to be sent subscribers

        :param doc: document
        :return bool: True if there's at least one
        """

        if doc.get(EMBARGO) and doc.get(EMBARGO) > utcnow():
            return False

        subscribers, subscribers_yet_to_receive = self.get_subscribers(doc, SUBSCRIBER_TYPES.DIGITAL)
        subscribers = list(self.digital(subscribers))
        subscribers_yet_to_receive = list(self.digital(subscribers_yet_to_receive))
        return len(subscribers) > 0 or len(subscribers_yet_to_receive) > 0

    def get_subscribers(self, doc, target_media_type):
        """
        Get subscribers for doc based on target_media_type.
        Override this method in the ArchivePublishService, ArchiveCorrectService and ArchiveKillService
        :param doc: Document to publish/correct/kill
        :param target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article.
                Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire
                then the doc being queues is an Individual Article.
        :return: (list, list) List of filtered subscriber,
                List of subscribers that have not received item previously (empty list in this case).
        """
        raise NotImplementedError()

    def _get_subscribers_for_previously_sent_items(self, lookup):
        """
        Returns list of subscribers that have previously received the item.
        :param dict lookup: elastic query to filter the publish queue
        :return: list of subscribers
        """
        req = ParsedRequest()
        subscribers = []
        queued_items = get_resource_service('publish_queue').get(req=req, lookup=lookup)
        if queued_items.count():
            subscriber_ids = {queued_item['subscriber_id'] for queued_item in queued_items}
            query = {'$and': [{config.ID_FIELD: {'$in': list(subscriber_ids)}}]}
            subscribers = list(get_resource_service('subscribers').get(req=None, lookup=query))
        return subscribers

    def filter_subscribers(self, doc, subscribers, target_media_type):
        """
        Filter subscribers to whom the current document is going to be delivered.
        :param doc: Document to publish/kill/correct
        :param subscribers: List of Subscribers that might potentially get this document
        :param target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article.
                Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire
                then the doc being queues is an Individual Article.
        :return: List of of filtered subscriber.
        """
        filtered_subscribers = []
        req = ParsedRequest()
        req.args = {'is_global': True}
        service = get_resource_service('content_filters')
        global_filters = list(service.get(req=req, lookup=None))

        for subscriber in subscribers:
            if target_media_type and subscriber.get('subscriber_type', '') != SUBSCRIBER_TYPES.ALL:
                can_send_takes_packages = subscriber['subscriber_type'] == SUBSCRIBER_TYPES.DIGITAL
                if target_media_type == SUBSCRIBER_TYPES.WIRE and can_send_takes_packages or \
                        target_media_type == SUBSCRIBER_TYPES.DIGITAL and not can_send_takes_packages:
                    continue

            if doc.get('targeted_for'):
                found_match = [t for t in doc['targeted_for'] if t['name'] == subscriber.get('subscriber_type', '')]

                if len(found_match) == 0 and subscriber.get('geo_restrictions'):
                    found_match = [t for t in doc['targeted_for'] if t['name'] == subscriber['geo_restrictions']]
                    if len(found_match) == 0 or found_match[0]['allow'] is False:
                        continue
                elif len(found_match) > 0 and found_match[0]['allow'] is False:
                    continue

            if not self.conforms_global_filter(subscriber, global_filters, doc):
                continue

            if not self.conforms_content_filter(subscriber, doc):
                continue

            filtered_subscribers.append(subscriber)

        return filtered_subscribers

    def queue_transmission(self, doc, subscribers):
        """
        Method formats and then queues the article for transmission to the passed subscribers.
        ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once
        based on the format_types configured across for all the subscribers as the formatted item must have a published
        sequence number generated by Subscriber.
        :param dict doc: document to queue for transmission
        :param list subscribers: List of subscriber dict.
        :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False
        """

        try:
            queued = False
            no_formatters = []
            for subscriber in subscribers:
                try:
                    if doc[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] and \
                            subscriber.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE:
                        # wire subscribers can get only text and preformatted stories
                        continue

                    for destination in subscriber['destinations']:
                        # Step 2(a)
                        formatter = get_formatter(destination['format'], doc)

                        if not formatter:  # if formatter not found then record it
                            no_formatters.append(destination['format'])
                            continue

                        formatted_docs = formatter.format(doc, subscriber)

                        for pub_seq_num, formatted_doc in formatted_docs:
                            publish_queue_item = dict()
                            publish_queue_item['item_id'] = doc['_id']
                            publish_queue_item['item_version'] = doc[config.VERSION]
                            publish_queue_item['formatted_item'] = formatted_doc
                            publish_queue_item['subscriber_id'] = subscriber['_id']
                            publish_queue_item['destination'] = destination
                            publish_queue_item['published_seq_num'] = pub_seq_num
                            publish_queue_item['publish_schedule'] = doc.get('publish_schedule', None)
                            publish_queue_item['unique_name'] = doc.get('unique_name', None)
                            publish_queue_item['content_type'] = doc.get('type', None)
                            publish_queue_item['headline'] = doc.get('headline', None)

                            self.set_state(doc, publish_queue_item)
                            if publish_queue_item.get(ITEM_STATE):
                                publish_queue_item['publishing_action'] = publish_queue_item.get(ITEM_STATE)
                                del publish_queue_item[ITEM_STATE]
                            else:
                                publish_queue_item['publishing_action'] = self.published_state

                            get_resource_service('publish_queue').post([publish_queue_item])
                            queued = True
                except:
                    logger.exception("Failed to queue item for id {} with headline {} for subscriber {}."
                                     .format(doc.get(config.ID_FIELD), doc.get('headline'), subscriber.get('name')))

            return no_formatters, queued
        except:
            raise

    def update_published_collection(self, published_item_id):
        """
        Updates the published collection with the published item.
        Set the last_published_version to false for previous versions of the published items.
        :param: str published_item_id: _id of the document.
        """
        published_item = super().find_one(req=None, _id=published_item_id)
        published_item = copy(published_item)
        get_resource_service('published').update_published_items(published_item_id, LAST_PUBLISHED_VERSION, False)
        get_resource_service('published').post([published_item])

    def conforms_content_filter(self, subscriber, doc):
        """
        Checks if the document matches the subscriber filter
        :param subscriber: Subscriber to get the filter
        :param doc: Document to test the filter against
        :return:
        True if there's no filter
        True if matches and permitting
        False if matches and blocking
        False if doesn't match and permitting
        True if doesn't match and blocking
        """
        content_filter = subscriber.get('content_filter')

        if content_filter is None or 'filter_id' not in content_filter or content_filter['filter_id'] is None:
            return True

        service = get_resource_service('content_filters')
        filter = service.find_one(req=None, _id=content_filter['filter_id'])
        does_match = service.does_match(filter, doc)

        if does_match:
            return content_filter['filter_type'] == 'permitting'
        else:
            return content_filter['filter_type'] == 'blocking'

    def conforms_global_filter(self, subscriber, global_filters, doc):
        """
        Checks if subscriber has a override rule against each of the
        global filter and if not checks if document matches the global filter
        :param subscriber: Subscriber to get if the global filter is overriden
        :param global_filters: List of all global filters
        :param doc: Document to test the global filter against
        :return: True if at least one global filter is not overriden
        and it matches the document
        False if global filter matches the document or all of them overriden
        """
        service = get_resource_service('content_filters')
        gfs = subscriber.get('global_filters', {})
        for global_filter in global_filters:
            if gfs.get(str(global_filter['_id']), True):
                # Global filter applies to this subscriber
                if service.does_match(global_filter, doc):
                    # All global filters behaves like blocking filters
                    return False
        return True

    def _update_headline_sequence(self, doc):
        """ Updates the headline of the text story if there's any sequence value in it """
        if doc.get(SEQUENCE):
            doc['headline'] = '{}={}'.format(doc['headline'], doc.get(SEQUENCE))

    def _validate_package_contents(self, package, takes_package, validation_errors=[]):
        """
        If the item passed is a package this function will ensure that the unpublished content validates and none of
        the content is locked by other than the publishing session, also do not allow any killed or spiked content

        :param package:
        :param takes_package:
        :param validation_errors: validation errors are appended if there are any.
        """
        # Ensure it is the sort of thing we need to validate
        if package[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and not takes_package and self.publish_type == ITEM_PUBLISH:
            items = self.package_service.get_residrefs(package)

            # make sure package is not scheduled or spiked
            if package[ITEM_STATE] in (CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED):
                validation_errors.append('Package cannot be {}'.format(package[ITEM_STATE]))

            if package.get(EMBARGO):
                validation_errors.append('Package cannot have Embargo')

            if items:
                for guid in items:
                    doc = super().find_one(req=None, _id=guid)

                    if package[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                        digital = self.takes_package_service.get_take_package(doc) or {}
                        self._validate_package_contents(doc, digital, validation_errors)

                    # make sure no items are killed or spiked or scheduled
                    if doc[ITEM_STATE] in (CONTENT_STATE.KILLED, CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED):
                        validation_errors.append('Package cannot contain {} item'.format(doc[ITEM_STATE]))

                    if doc.get(EMBARGO):
                        validation_errors.append('Package cannot have Items with Embargo')

                    # don't validate items that already have published
                    if doc[ITEM_STATE] not in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]:
                        validate_item = {'act': self.publish_type, 'type': doc[ITEM_TYPE], 'validate': doc}
                        errors = get_resource_service('validate').post([validate_item], headline=True)
                        if errors[0]:
                            validation_errors.extend(errors[0])

                    # check the locks on the items
                    if doc.get('lock_session', None) and package['lock_session'] != doc['lock_session']:
                        validation_errors.extend(['{}: packaged item cannot be locked'.format(doc['headline'])])
 def test_remove_two_refs_from_package2(self):
     PackageService().remove_ref_from_inmem_package(self.package1, "789")
     PackageService().remove_ref_from_inmem_package(self.package1, "123")
     self.assertEqual(len(self.package1.get("groups", [])), 2)
     root_group = self.package1.get("groups", [])[0]
     self.assertEqual(len(root_group.get("refs", [])), 1)
Esempio n. 14
0
class RemoveExpiredFromPublishedCollection(SuperdeskTestCase):
    def setUp(self):
        super().setUp()
        self._init_data()

        self.app.data.insert('users', self.users)
        self.app.data.insert('desks', self.desks)
        self.app.data.insert('vocabularies', self.vocabularies)
        self.app.data.insert('subscribers', self.subscribers)
        self.app.data.insert(ARCHIVE, self.articles)

        self.filename = os.path.join(
            os.path.abspath(os.path.dirname(__file__)), "validators.json")
        self.json_data = [
            {
                "_id": "kill_text",
                "act": "kill",
                "type": "text",
                "schema": {
                    "headline": {
                        "type": "string"
                    }
                }
            },
            {
                "_id": "publish_text",
                "act": "publish",
                "type": "text",
                "schema": {}
            },
            {
                "_id": "correct_text",
                "act": "correct",
                "type": "text",
                "schema": {}
            },
            {
                "_id": "publish_composite",
                "act": "publish",
                "type": "composite",
                "schema": {}
            },
        ]

        with open(self.filename, "w+") as file:
            json.dump(self.json_data, file)
        init_app(self.app)
        ValidatorsPopulateCommand().run(self.filename)

        self.package_service = PackageService()

    def tearDown(self):
        super().tearDown()
        if self.filename and os.path.exists(self.filename):
            os.remove(self.filename)

    def test_can_remove_from_production_succeeds_when_published_once(self):
        """
        Tests if can_remove_production() returns true if the item is published only once.
        """

        doc = self.articles[0].copy()

        updates = {
            'targeted_for': [{
                'name': 'New South Wales',
                'allow': True
            }]
        }
        get_resource_service(ARCHIVE).patch(id=doc[config.ID_FIELD],
                                            updates=updates)

        published_version_number = doc[config.VERSION] + 1
        get_resource_service(ARCHIVE_PUBLISH).patch(
            id=doc[config.ID_FIELD],
            updates={
                ITEM_STATE: CONTENT_STATE.PUBLISHED,
                config.VERSION: published_version_number
            })

        self._move_to_archived_and_assert_can_remove_from_production(
            doc[config.ID_FIELD], self.assertTrue)

    def test_can_remove_from_production_fails_when_published_and_then_killed(
            self):
        """
        Tests if can_remove_production() returns false if the item is published more than once.
        """

        doc = self.articles[0].copy()

        updates = {
            'targeted_for': [{
                'name': 'New South Wales',
                'allow': True
            }]
        }
        get_resource_service(ARCHIVE).patch(id=doc[config.ID_FIELD],
                                            updates=updates)

        published_version_number = doc[config.VERSION] + 1
        get_resource_service(ARCHIVE_PUBLISH).patch(
            id=doc[config.ID_FIELD],
            updates={
                ITEM_STATE: CONTENT_STATE.PUBLISHED,
                config.VERSION: published_version_number
            })

        published_item = self._move_to_archived_and_assert_can_remove_from_production(
            doc[config.ID_FIELD], self.assertTrue)

        published_version_number += 1
        get_resource_service(ARCHIVE_KILL).patch(id=doc['_id'],
                                                 updates={
                                                     ITEM_STATE:
                                                     CONTENT_STATE.KILLED,
                                                     config.VERSION:
                                                     published_version_number
                                                 })
        self.assertFalse(
            get_resource_service(PUBLISHED).can_remove_from_production(
                published_item))

    def test_can_remove_from_production_second_rule(self):
        """
        Test if can_remove_production() returns false when the expired published item is part of a package.
        """

        doc = self.articles[0].copy()

        get_resource_service(ARCHIVE_PUBLISH).patch(
            id=doc[config.ID_FIELD],
            updates={
                ITEM_STATE: CONTENT_STATE.PUBLISHED,
                config.VERSION: doc[config.VERSION] + 1
            })

        item_in_production = get_resource_service(ARCHIVE).find_one(
            req=None, _id=doc[config.ID_FIELD])
        self.assertIsNotNone(
            TakesPackageService().get_take_package_id(item_in_production))

        self._move_to_archived_and_assert_can_remove_from_production(
            doc[config.ID_FIELD], self.assertFalse)

    def test_can_remove_from_production_third_rule(self):
        """
        Test if can_remove_production() returns false when the expired published item is a package.
        """

        published_articles = [
            self.articles[1].copy(), self.articles[2].copy(),
            self.articles[3].copy(), self.articles[4].copy()
        ]

        for published_article in published_articles:
            published_article[ITEM_STATE] = CONTENT_STATE.PUBLISHED

        published_service = get_resource_service(PUBLISHED)
        published_service.post(published_articles)

        published_package = self._move_to_archived_and_assert_can_remove_from_production(
            self.articles[4][config.ID_FIELD], self.assertFalse)

        self._move_to_archived_and_assert_can_remove_from_production(
            self.articles[3][config.ID_FIELD], self.assertFalse,
            published_package)

        self._move_to_archived_and_assert_can_remove_from_production(
            self.articles[2][config.ID_FIELD], self.assertFalse,
            published_package)
        self._move_to_archived_and_assert_can_remove_from_production(
            self.articles[1][config.ID_FIELD], self.assertTrue,
            published_package)

    def test_cannot_remove_scheduled_content(self):
        published_service = get_resource_service(PUBLISHED)
        original = self.articles[0].copy()

        original[ITEM_STATE] = CONTENT_STATE.SCHEDULED
        original['publish_schedule'] = utcnow() + timedelta(days=2)

        published_service.post([original])
        published_items = published_service.get_other_published_items(
            original['item_id'])
        self.assertEqual(1, published_items.count())

        RemoveExpiredPublishContent().run()
        published_items = published_service.get_other_published_items(
            original['item_id'])
        self.assertEqual(1, published_items.count())

    def test_remove_published_expired_content(self):
        original = self.articles[0].copy()
        original[ITEM_STATE] = CONTENT_STATE.PUBLISHED
        self._create_and_insert_into_versions(original, True)

        published_service = get_resource_service(PUBLISHED)
        archive_publish = get_resource_service(ARCHIVE_PUBLISH)

        subscribers, subscribers_yet_to_receive = archive_publish.get_subscribers(
            original, SUBSCRIBER_TYPES.WIRE)
        archive_publish.queue_transmission(original, subscribers)
        published_service.post([original])

        published_items = published_service.get(req=None, lookup=None)
        self.assertEqual(1, published_items.count())

        published_service.update_published_items(
            original['item_id'], 'expiry',
            utcnow() + timedelta(minutes=-60))
        RemoveExpiredPublishContent().run()
        published_items = published_service.get_other_published_items(
            str(original['item_id']))
        self.assertEqual(0, published_items.count())

        archived_item = get_resource_service('archived').find_one(
            req=None, _id=str(original[config.ID_FIELD]))
        self.assertEqual(archived_item['item_id'],
                         self.articles[0][config.ID_FIELD])
        self.assertFalse(archived_item['allow_post_publish_actions'])
        self.assertFalse(archived_item['can_be_removed'])

    def test_remove_published_and_killed_content_separately(self):
        doc = self.articles[0]
        original = doc.copy()

        updates = {
            'targeted_for': [{
                'name': 'New South Wales',
                'allow': True
            }]
        }
        get_resource_service(ARCHIVE).patch(id=original[config.ID_FIELD],
                                            updates=updates)

        original.update(updates)
        self._create_and_insert_into_versions(original, False)

        published_version_number = original[config.VERSION] + 1
        get_resource_service(ARCHIVE_PUBLISH).patch(
            id=doc[config.ID_FIELD],
            updates={
                ITEM_STATE: CONTENT_STATE.PUBLISHED,
                config.VERSION: published_version_number
            })

        published_service = get_resource_service(PUBLISHED)
        published_items = published_service.get(req=None, lookup=None)
        self.assertEqual(1, published_items.count())

        article_in_production = get_resource_service(ARCHIVE).find_one(
            req=None, _id=original[config.ID_FIELD])
        self.assertIsNotNone(article_in_production)
        self.assertEqual(article_in_production[ITEM_STATE],
                         CONTENT_STATE.PUBLISHED)
        self.assertEqual(article_in_production[config.VERSION],
                         published_version_number)
        insert_into_versions(doc=article_in_production)

        # Setting the expiry date of the published article to 1 hr back from now
        published_service.update_published_items(
            original[config.ID_FIELD], 'expiry',
            utcnow() + timedelta(minutes=-60))

        # Killing the published article and inserting into archive_versions as unittests use service directly
        published_version_number += 1
        get_resource_service(ARCHIVE_KILL).patch(id=doc[config.ID_FIELD],
                                                 updates={
                                                     ITEM_STATE:
                                                     CONTENT_STATE.KILLED,
                                                     config.VERSION:
                                                     published_version_number
                                                 })

        # Executing the Expiry Job for the Published Article and asserting the collections
        RemoveExpiredPublishContent().run()

        published_items = published_service.get(req=None, lookup=None)
        self.assertEqual(1, published_items.count())

        article_in_production = get_resource_service(ARCHIVE).find_one(
            req=None, _id=original[config.ID_FIELD])
        self.assertIsNotNone(article_in_production)
        self.assertEqual(article_in_production[ITEM_STATE],
                         CONTENT_STATE.KILLED)
        self.assertEqual(article_in_production[config.VERSION],
                         published_version_number)
        insert_into_versions(doc=article_in_production)

        # Setting the expiry date of the killed article to 1 hr back from now and running the job again
        published_service.update_published_items(
            original[config.ID_FIELD], 'expiry',
            utcnow() + timedelta(minutes=-60))
        RemoveExpiredPublishContent().run()

        published_items = published_service.get_other_published_items(
            str(original[config.ID_FIELD]))
        self.assertEqual(0, published_items.count())

        article_in_production = get_resource_service(ARCHIVE).find_one(
            req=None, _id=original[config.ID_FIELD])
        self.assertIsNone(article_in_production)

    def test_remove_takes_package(self):
        """
        Tests the behavior of remove_expired() when just takes package expires
        """
        def expire(published_takes_pkg):
            published_service.update(
                published_takes_pkg[config.ID_FIELD],
                {'expiry': utcnow() + timedelta(minutes=-60)},
                published_takes_pkg)

            RemoveExpiredPublishContent().run()

            if published_takes_pkg[ITEM_STATE] == CONTENT_STATE.PUBLISHED:
                self.assertEqual(published_takes_pkg[ITEM_OPERATION],
                                 'publish')
            elif published_takes_pkg[ITEM_STATE] == CONTENT_STATE.KILLED:
                self.assertEqual(published_takes_pkg[ITEM_OPERATION], 'kill')

        doc = self.articles[0].copy()
        self._create_and_insert_into_versions(doc, False)

        published_version_number = doc[config.VERSION] + 1
        get_resource_service(ARCHIVE_PUBLISH).patch(
            id=doc[config.ID_FIELD],
            updates={
                ITEM_STATE: CONTENT_STATE.PUBLISHED,
                config.VERSION: published_version_number
            })
        insert_into_versions(id_=doc[config.ID_FIELD])

        published_version_number += 1
        get_resource_service(ARCHIVE_KILL).patch(id=doc[config.ID_FIELD],
                                                 updates={
                                                     ITEM_STATE:
                                                     CONTENT_STATE.KILLED,
                                                     config.VERSION:
                                                     published_version_number
                                                 })
        insert_into_versions(id_=doc[config.ID_FIELD])

        published_service = get_resource_service(PUBLISHED)
        items_in_published_repo = list(
            published_service.get_from_mongo(req=None, lookup=None))
        self.assertEqual(len(items_in_published_repo), 4)

        # Expiring the Takes Package whose state is Published
        published_takes_pkg = [
            g for g in items_in_published_repo
            if is_takes_package(g) and g[ITEM_STATE] == CONTENT_STATE.PUBLISHED
        ]
        expire(published_takes_pkg[0])

        # Expiring the Takes Package whose state is Killed
        published_takes_pkg = [
            g for g in items_in_published_repo
            if is_takes_package(g) and g[ITEM_STATE] == CONTENT_STATE.KILLED
        ]
        expire(published_takes_pkg[0])

    def test_remove_when_package_and_items_in_package_expire(self):
        """
        Tests if items in the package are copied to legal archive when the package in published collection expires.
        In this test both the items in the package and package expire. Since the job sorts the expired items in the
        order they are created, the creation order of items in this test is: items in the package and then the package.
        """

        package = self.articles[3].copy()

        items_in_published_repo = self._publish_package_and_assert_published_collection(
            package)
        published_service = get_resource_service(PUBLISHED)

        # Validate if version is available for each item in the package after publishing
        for item in items_in_published_repo:
            if item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                items_in_package = self.package_service.get_item_refs(package)
                for item_in_pkg in items_in_package:
                    if config.VERSION not in item_in_pkg:
                        self.fail(
                            'version is not found for item in the package. Item Id: %s'
                            % item_in_pkg['guid'])

            # Expiring the published items
            published_service.update_published_items(
                item['item_id'], 'expiry',
                utcnow() + timedelta(minutes=-60))

        RemoveExpiredPublishContent().run()
        self.assertEqual(
            published_service.get(req=None, lookup=None).count(), 0)

    def test_remove_when_only_package_expires(self):
        """
        Tests if items in the package are copied to legal archive when only the package in published collection expires.
        In this test only the package expires. Since the job sorts the expired items in the order they are created,
        the creation order of items in this test is: items in the package and then the package.
        """

        package = self.articles[3].copy()

        self._publish_package_and_assert_published_collection(package)
        published_service = get_resource_service(PUBLISHED)

        # Expiring the package
        published_service.update_published_items(
            package[config.ID_FIELD], 'expiry',
            utcnow() + timedelta(minutes=-60))

        RemoveExpiredPublishContent().run()
        items_in_published_repo = published_service.get(req=None, lookup=None)
        self.assertEqual(items_in_published_repo.count(), 2)

        for item in items_in_published_repo:
            self.assertTrue(item['allow_post_publish_actions'])

    def _publish_package_and_assert_published_collection(self, package):
        # Please make sure that groups has only text items
        item_refs = self.package_service.get_residrefs(package)

        # Inserting docs into archive_versions for all items in the package and for the package
        for item_ref in item_refs:
            item = None
            for article in self.articles:
                if article[config.ID_FIELD] == item_ref:
                    item = article
                    break

            item = item.copy()
            updates = {'targeted_for': [{'name': 'Wire', 'allow': True}]}
            get_resource_service(ARCHIVE).patch(id=item[config.ID_FIELD],
                                                updates=updates)

            self._create_and_insert_into_versions(item, False)

        self._create_and_insert_into_versions(package, False)

        updates = {
            ITEM_STATE: CONTENT_STATE.PUBLISHED,
            config.VERSION: package[config.VERSION] + 1,
            GROUPS: package.get(GROUPS)
        }
        get_resource_service(ARCHIVE_PUBLISH).patch(
            id=package[config.ID_FIELD], updates=updates)

        items_in_published_repo = get_resource_service(
            PUBLISHED).get_from_mongo(req=None, lookup=None)
        self.assertEqual(items_in_published_repo.count(), 3)

        return items_in_published_repo

    def _init_data(self):
        self.users = [{'_id': '1', 'username': '******'}]
        self.desks = [{
            '_id': ObjectId('123456789ABCDEF123456789'),
            'name': 'desk1'
        }]
        self.vocabularies = [{
            "_id":
            "rightsinfo",
            "items": [{
                "is_active":
                True,
                "name":
                "AAP",
                "copyrightHolder":
                "Australian Associated Press",
                "copyrightNotice":
                "AAP content is owned by or licensed to AAP",
                "usageTerms":
                "The direct recipient must comply with the limitations specified in the AAP Information"
            }, {
                "is_active":
                True,
                "name":
                "default",
                "copyrightHolder":
                "Australian Associated Press",
                "copyrightNotice":
                "AAP content is owned by or licensed to AAP.",
                "usageTerms":
                "The direct recipient must comply with the limitations specified in the AAP Information."
            }]
        }]

        self.subscribers = [{
            "_id":
            "1",
            "name":
            "sub1",
            "is_active":
            True,
            "subscriber_type":
            SUBSCRIBER_TYPES.WIRE,
            "media_type":
            "media",
            "sequence_num_settings": {
                "max": 10,
                "min": 1
            },
            "email":
            "*****@*****.**",
            "destinations": [{
                "name": "dest1",
                "format": "nitf",
                "delivery_type": "ftp",
                "config": {
                    "address": "127.0.0.1",
                    "username": "******"
                }
            }]
        }, {
            "_id":
            "2",
            "name":
            "sub2",
            "is_active":
            True,
            "subscriber_type":
            SUBSCRIBER_TYPES.DIGITAL,
            "media_type":
            "media",
            "sequence_num_settings": {
                "max": 10,
                "min": 1
            },
            "email":
            "*****@*****.**",
            "destinations": [{
                "name": "dest1",
                "format": "newsmlg2",
                "delivery_type": "ftp",
                "config": {
                    "address": "127.0.0.1",
                    "username": "******"
                }
            }]
        }]

        self.articles = [{
            'guid':
            '1',
            '_id':
            '1',
            'last_version':
            3,
            config.VERSION:
            4,
            'body_html':
            "shouldn't be referenced by any package",
            'urgency':
            4,
            'anpa_category': [{
                'qcode': 'A',
                'name': 'Sport'
            }],
            'headline':
            'no package reference',
            'pubstatus':
            'usable',
            'firstcreated':
            utcnow(),
            'byline':
            'By Alan Karben',
            'dateline': {
                'located': {
                    'city': 'Sydney'
                }
            },
            'keywords': ['Student', 'Crime', 'Police', 'Missing'],
            'subject': [{
                'qcode': '17004000',
                'name': 'Statistics'
            }, {
                'qcode': '04001002',
                'name': 'Weather'
            }],
            'expiry':
            utcnow() + timedelta(minutes=20),
            'task': {
                'user': '******',
                'desk': '123456789ABCDEF123456789'
            },
            ITEM_STATE:
            CONTENT_STATE.PROGRESS,
            ITEM_TYPE:
            CONTENT_TYPE.TEXT,
            'unique_name':
            '#1'
        }, {
            'guid':
            '2',
            '_id':
            '2',
            'last_version':
            3,
            config.VERSION:
            4,
            'body_html':
            'some body',
            'urgency':
            4,
            'headline':
            'some headline',
            'abstract':
            'Abstract Sample',
            'anpa_category': [{
                'qcode': 'A',
                'name': 'Sport'
            }],
            'pubstatus':
            'done',
            'firstcreated':
            utcnow(),
            'byline':
            'By Alan Karben',
            'dateline': {
                'located': {
                    'city': 'Sydney'
                }
            },
            'slugline':
            'taking takes',
            'keywords': ['Student', 'Crime', 'Police', 'Missing'],
            'subject': [{
                'qcode': '17004000',
                'name': 'Statistics'
            }, {
                'qcode': '04001002',
                'name': 'Weather'
            }],
            'task': {
                'user': '******',
                'desk': '123456789ABCDEF123456789'
            },
            ITEM_STATE:
            CONTENT_STATE.PROGRESS,
            'expiry':
            utcnow() + timedelta(minutes=20),
            ITEM_TYPE:
            CONTENT_TYPE.TEXT,
            'unique_name':
            '#2'
        }, {
            'guid':
            '3',
            '_id':
            '3',
            'last_version':
            3,
            config.VERSION:
            4,
            'body_html':
            'some body',
            'urgency':
            4,
            'headline':
            'some headline',
            'abstract':
            'Abstract Sample',
            'anpa_category': [{
                'qcode': 'A',
                'name': 'Sport'
            }],
            'pubstatus':
            'done',
            'firstcreated':
            utcnow(),
            'byline':
            'By Alan Karben',
            'dateline': {
                'located': {
                    'city': 'Sydney'
                }
            },
            'slugline':
            'taking takes',
            'keywords': ['Student', 'Crime', 'Police', 'Missing'],
            'subject': [{
                'qcode': '17004000',
                'name': 'Statistics'
            }, {
                'qcode': '04001002',
                'name': 'Weather'
            }],
            'task': {
                'user': '******',
                'desk': '123456789ABCDEF123456789'
            },
            ITEM_STATE:
            CONTENT_STATE.PROGRESS,
            'expiry':
            utcnow() + timedelta(minutes=20),
            ITEM_TYPE:
            CONTENT_TYPE.TEXT,
            'unique_name':
            '#3'
        }, {
            'guid':
            '4',
            '_id':
            '4',
            'headline':
            'simple package with 2 items',
            'last_version':
            2,
            config.VERSION:
            3,
            ITEM_TYPE:
            CONTENT_TYPE.COMPOSITE,
            'groups': [{
                'id': 'root',
                'refs': [{
                    'idRef': 'main'
                }],
                'role': 'grpRole:NEP'
            }, {
                'id':
                'main',
                'role':
                'grpRole:main',
                'refs': [{
                    'location': ARCHIVE,
                    'guid': '2',
                    ITEM_TYPE: CONTENT_TYPE.TEXT,
                    RESIDREF: '2'
                }, {
                    'location': ARCHIVE,
                    'guid': '3',
                    ITEM_TYPE: CONTENT_TYPE.TEXT,
                    RESIDREF: '3'
                }]
            }],
            'firstcreated':
            utcnow(),
            'expiry':
            utcnow() + timedelta(minutes=20),
            'unique_name':
            '#4',
            ITEM_STATE:
            CONTENT_STATE.PROGRESS
        }, {
            'guid':
            '5',
            '_id':
            '5',
            'headline':
            'package and item is also a package',
            config.VERSION:
            3,
            ITEM_TYPE:
            CONTENT_TYPE.COMPOSITE,
            'groups': [{
                'id': 'root',
                'refs': [{
                    'idRef': 'main'
                }],
                'role': 'grpRole:NEP'
            }, {
                'id':
                'main',
                'role':
                'grpRole:main',
                'refs': [{
                    'location': ARCHIVE,
                    ITEM_TYPE: CONTENT_TYPE.COMPOSITE,
                    RESIDREF: '4'
                }]
            }],
            'firstcreated':
            utcnow(),
            'expiry':
            utcnow() + timedelta(minutes=20),
            'unique_name':
            '#5',
            ITEM_STATE:
            CONTENT_STATE.PROGRESS
        }]

    def _create_and_insert_into_versions(self, item,
                                         insert_last_version_as_published):
        version = item[config.VERSION]
        archive_versions = []

        while version != 0:
            versioned_item = item.copy()
            versioned_item['_id_document'] = versioned_item['_id']
            versioned_item[config.VERSION] = version
            del versioned_item['_id']

            if insert_last_version_as_published and item[
                    config.VERSION] == version:
                versioned_item[ITEM_STATE] = CONTENT_STATE.PUBLISHED

            archive_versions.append(versioned_item)
            version -= 1

        self.app.data.insert('archive_versions', archive_versions)

    def _move_to_archived_and_assert_can_remove_from_production(
            self, item_id, assert_function, item_to_assert=None):
        published_service = get_resource_service(PUBLISHED)
        published_item = list(
            published_service.get_from_mongo(req=None,
                                             lookup={'item_id': item_id}))
        self.assertEqual(len(published_item), 1)

        # Moving to archived explicitly
        published_item = published_item[0]
        published_service.patch(id=published_item[config.ID_FIELD],
                                updates={'allow_post_publish_actions': False})

        assert_function(
            published_service.can_remove_from_production(
                item_to_assert if item_to_assert else published_item))

        return published_item
Esempio n. 15
0
class EnqueueService:
    """
    Creates the corresponding entries in the publish queue for items marked for publishing
    """

    publish_type = 'publish'
    published_state = 'published'

    non_digital = partial(
        filter,
        lambda s: s.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE)
    digital = partial(
        filter, lambda s: (s.get('subscriber_type', '') in
                           {SUBSCRIBER_TYPES.DIGITAL, SUBSCRIBER_TYPES.ALL}))
    takes_package_service = TakesPackageService()
    package_service = PackageService()

    def _enqueue_item(self, item):
        if item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and item.get(
                PACKAGE_TYPE):
            return self.publish(doc=item,
                                target_media_type=SUBSCRIBER_TYPES.DIGITAL)
        elif item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            return self._publish_package_items(item)
        elif item[ITEM_TYPE] not in [
                CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED
        ]:
            return self.publish(item, SUBSCRIBER_TYPES.DIGITAL)
        else:
            return self.publish(
                item,
                SUBSCRIBER_TYPES.WIRE if item.get('is_take_item') else None)

    def _publish_package_items(self, package):
        """
        Publishes all items of a package recursively then publishes the package itself
        :param package: package to publish
        :param updates: payload
        """
        items = self.package_service.get_residrefs(package)
        subscriber_items = {}
        queued = False
        removed_items = []
        if self.publish_type in ['correct', 'kill']:
            removed_items, added_items = self._get_changed_items(
                items, package)
            # we raise error if correction is done on a empty package. Kill is fine.
            if len(removed_items) == len(items) and len(
                    added_items) == 0 and self.publish_type == 'correct':
                raise SuperdeskApiError.badRequestError(
                    "Corrected package cannot be empty!")
            items.extend(added_items)

        if items:
            archive_service = get_resource_service('archive')
            for guid in items:
                package_item = archive_service.find_one(req=None, _id=guid)

                if not package_item:
                    raise SuperdeskApiError.badRequestError(
                        "Package item with id: {} has not been published.".
                        format(guid))

                subscribers, subscriber_codes = self._get_subscribers_for_package_item(
                    package_item)
                digital_item_id = BasePublishService(
                ).get_digital_id_for_package_item(package_item)
                self._extend_subscriber_items(subscriber_items, subscribers,
                                              package_item, digital_item_id,
                                              subscriber_codes)

            for removed_id in removed_items:
                package_item = archive_service.find_one(req=None,
                                                        _id=removed_id)
                subscribers, subscriber_codes = self._get_subscribers_for_package_item(
                    package_item)
                digital_item_id = None
                self._extend_subscriber_items(subscriber_items, subscribers,
                                              package_item, digital_item_id,
                                              subscriber_codes)

            queued = self.publish_package(package,
                                          target_subscribers=subscriber_items)

        return queued

    def _get_changed_items(self, existing_items, package):
        """
        Returns the added and removed items from existing_items
        :param existing_items: Existing list
        :param updates: Changes
        :return: list of removed items and list of added items
        """
        published_service = get_resource_service('published')
        req = ParsedRequest()
        query = {
            'query': {
                'filtered': {
                    'filter': {
                        'and': [{
                            'term': {
                                QUEUE_STATE: PUBLISH_STATE.QUEUED
                            }
                        }, {
                            'term': {
                                'item_id': package['item_id']
                            }
                        }]
                    }
                }
            },
            'sort': [{
                'publish_sequence_no': 'desc'
            }]
        }
        req.args = {'source': json.dumps(query)}
        req.max_results = 1000
        previously_published_packages = published_service.get(req=req,
                                                              lookup=None)
        previously_published_package = previously_published_packages[0]

        if 'groups' in previously_published_package:
            old_items = self.package_service.get_residrefs(
                previously_published_package)
            added_items = list(set(existing_items) - set(old_items))
            removed_items = list(set(old_items) - set(existing_items))
            return removed_items, added_items
        else:
            return [], []

    def enqueue_item(self, item):
        """
        Creates the corresponding entries in the publish queue for the given item
        :return bool: True if item is queued else false.
        """
        try:
            return self._enqueue_item(item)
        except SuperdeskApiError as e:
            raise e
        except KeyError as e:
            raise SuperdeskApiError.badRequestError(
                message="Key is missing on article to be published: {}".format(
                    str(e)))
        except Exception as e:
            logger.exception(
                "Something bad happened while publishing %s".format(id))
            raise SuperdeskApiError.internalError(
                message="Failed to publish the item: {}".format(str(e)))

    def get_subscribers(self, doc, target_media_type):
        """
        Get subscribers for doc based on target_media_type.
        Override this method in the ArchivePublishService, ArchiveCorrectService and ArchiveKillService
        :param doc: Document to publish/correct/kill
        :param target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article.
                Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire
                then the doc being queues is an Individual Article.
        :return: (list, list) List of filtered subscriber,
                List of subscribers that have not received item previously (empty list in this case).
        """
        raise NotImplementedError()

    def publish(self, doc, target_media_type=None):
        """
        Queue the content for publishing.
        1. Get the subscribers.
        2. Update the headline of wire stories with the sequence
        3. Queue the content for subscribers
        4. Queue the content for previously published subscribers if any.
        5. Sends notification if no formatter has found for any of the formats configured in Subscriber.
        6. If not queued and not formatters then raise exception.
        :param dict doc: document to publish
        :param str target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article.
                Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire
                then the doc being queues is an Individual Article.
        :return bool: if content is queued then True else False
        :raises PublishQueueError.item_not_queued_error:
                If the nothing is queued.
        """
        # Step 1
        subscribers, subscribers_yet_to_receive, subscriber_codes = self.get_subscribers(
            doc, target_media_type)

        # Step 2
        if target_media_type == SUBSCRIBER_TYPES.WIRE:
            self._update_headline_sequence(doc)

        # Step 3
        no_formatters, queued = self.queue_transmission(
            deepcopy(doc), subscribers, subscriber_codes)

        # Step 4
        if subscribers_yet_to_receive:
            formatters_not_found, queued_new_subscribers = \
                self.queue_transmission(deepcopy(doc), subscribers_yet_to_receive, subscriber_codes)
            no_formatters.extend(formatters_not_found)
            queued = queued or queued_new_subscribers

        # Step 5
        self._push_formatter_notification(doc, no_formatters)

        # Step 6
        if not target_media_type and not queued:
            logger.exception(
                'Nothing is saved to publish queue for story: {} for action: {}'
                .format(doc[config.ID_FIELD], self.publish_type))

        return queued

    def _push_formatter_notification(self, doc, no_formatters=[]):
        if len(no_formatters) > 0:
            user = get_user()
            push_notification('item:publish:wrong:format',
                              item=str(doc[config.ID_FIELD]),
                              unique_name=doc['unique_name'],
                              desk=str(doc.get('task', {}).get('desk', '')),
                              user=str(user.get(config.ID_FIELD, '')),
                              formats=no_formatters)

    def _get_subscriber_codes(self, subscribers):
        subscriber_codes = {}
        all_products = list(
            get_resource_service('products').get(req=None, lookup=None))

        for subscriber in subscribers:
            codes = self._get_codes(subscriber)
            products = [
                p for p in all_products
                if p[config.ID_FIELD] in subscriber.get('products', [])
            ]

            for product in products:
                codes.extend(self._get_codes(product))
                subscriber_codes[subscriber[config.ID_FIELD]] = list(
                    set(codes))

        return subscriber_codes

    def resend(self, doc, subscribers):
        subscriber_codes = self._get_subscriber_codes(subscribers)
        wire_subscribers = list(self.non_digital(subscribers))
        digital_subscribers = list(self.digital(subscribers))

        if len(wire_subscribers) > 0:
            doc['item_id'] = doc[config.ID_FIELD]
            self._resend_to_subscribers(doc, wire_subscribers,
                                        subscriber_codes)

        if len(digital_subscribers) > 0:
            package = self.takes_package_service.get_take_package(doc)
            package['item_id'] = package[config.ID_FIELD]
            self._resend_to_subscribers(package, digital_subscribers,
                                        subscriber_codes)

    def _resend_to_subscribers(self, doc, subscribers, subscriber_codes):
        formatter_messages, queued = self.queue_transmission(
            doc, subscribers, subscriber_codes)
        self._push_formatter_notification(doc, formatter_messages)
        if not queued:
            logger.exception(
                'Nothing is saved to publish queue for story: {} for action: {}'
                .format(doc[config.ID_FIELD], 'resend'))

    def publish_package(self, package, target_subscribers):
        """
        Publishes a given non-take package to given subscribers.
        For each subscriber updates the package definition with the wanted_items for that subscriber
        and removes unwanted_items that doesn't supposed to go that subscriber.
        Text stories are replaced by the digital versions.
        :param package: Package to be published
        :param target_subscribers: List of subscriber and items-per-subscriber
        """
        all_items = self.package_service.get_residrefs(package)
        no_formatters, queued = [], False
        for items in target_subscribers.values():
            updated = deepcopy(package)
            subscriber = items['subscriber']
            codes = items['codes']
            wanted_items = [
                item for item in items['items']
                if items['items'].get(item, None)
            ]
            unwanted_items = [
                item for item in all_items if item not in wanted_items
            ]
            for i in unwanted_items:
                still_items_left = self.package_service.remove_ref_from_inmem_package(
                    updated, i)
                if not still_items_left and self.publish_type != 'correct':
                    # if nothing left in the package to be published and
                    # if not correcting then don't send the package
                    return
            for key in wanted_items:
                self.package_service.replace_ref_in_package(
                    updated, key, items['items'][key])

            formatters, temp_queued = self.queue_transmission(
                updated, [subscriber], {subscriber[config.ID_FIELD]: codes})

            no_formatters.extend(formatters)
            if temp_queued:
                queued = temp_queued

        return queued

    def queue_transmission(self, doc, subscribers, subscriber_codes={}):
        """
        Method formats and then queues the article for transmission to the passed subscribers.
        ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once
        based on the format_types configured across for all the subscribers as the formatted item must have a published
        sequence number generated by Subscriber.
        :param dict doc: document to queue for transmission
        :param list subscribers: List of subscriber dict.
        :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False
        """

        try:
            queued = False
            no_formatters = []
            for subscriber in subscribers:
                try:
                    if doc[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] and \
                            subscriber.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE:
                        # wire subscribers can get only text and preformatted stories
                        continue

                    for destination in subscriber['destinations']:
                        embed_package_items = doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and \
                            PACKAGE_TYPE not in doc and destination['config'].get('packaged', False)
                        if embed_package_items:
                            doc = self._embed_package_items(doc)
                        # Step 2(a)
                        formatter = get_formatter(destination['format'], doc)

                        if not formatter:  # if formatter not found then record it
                            no_formatters.append(destination['format'])
                            continue

                        formatted_docs = formatter.format(
                            doc, subscriber,
                            subscriber_codes.get(subscriber[config.ID_FIELD]))

                        for idx, publish_data in enumerate(formatted_docs):
                            if not isinstance(publish_data, dict):
                                pub_seq_num, formatted_doc = publish_data
                                formatted_docs[idx] = {
                                    'published_seq_num': pub_seq_num,
                                    'formatted_item': formatted_doc
                                }
                            else:
                                assert 'published_seq_num' in publish_data and 'formatted_item' in publish_data,\
                                    "missing keys in publish_data"

                        for publish_queue_item in formatted_docs:
                            publish_queue_item['item_id'] = doc['item_id']
                            publish_queue_item['item_version'] = doc[
                                config.VERSION]
                            publish_queue_item['subscriber_id'] = subscriber[
                                config.ID_FIELD]
                            publish_queue_item['codes'] = subscriber_codes.get(
                                subscriber[config.ID_FIELD])
                            publish_queue_item['destination'] = destination
                            # publish_schedule is just to indicate in the queue item is create via scheduled item
                            publish_queue_item[
                                PUBLISH_SCHEDULE] = get_utc_schedule(
                                    doc, PUBLISH_SCHEDULE) or None
                            publish_queue_item['unique_name'] = doc.get(
                                'unique_name', None)
                            publish_queue_item['content_type'] = doc.get(
                                'type', None)
                            publish_queue_item['headline'] = doc.get(
                                'headline', None)
                            publish_queue_item[
                                'publishing_action'] = self.published_state
                            publish_queue_item['ingest_provider'] = \
                                ObjectId(doc.get('ingest_provider')) if doc.get('ingest_provider') else None
                            if doc.get(PUBLISHED_IN_PACKAGE):
                                publish_queue_item[PUBLISHED_IN_PACKAGE] = doc[
                                    PUBLISHED_IN_PACKAGE]
                            try:
                                encoded_item = publish_queue_item.pop(
                                    'encoded_item')
                            except KeyError:
                                pass
                            else:
                                binary = io.BytesIO(encoded_item)
                                publish_queue_item[
                                    'encoded_item_id'] = app.storage.put(
                                        binary)
                            publish_queue_item.pop(ITEM_STATE, None)
                            get_resource_service('publish_queue').post(
                                [publish_queue_item])
                            queued = True
                except:
                    logger.exception(
                        "Failed to queue item for id {} with headline {} for subscriber {}."
                        .format(doc.get(config.ID_FIELD), doc.get('headline'),
                                subscriber.get('name')))

            return no_formatters, queued
        except:
            raise

    def _embed_package_items(self, package):
        """ Embeds all package items in the package document
        """
        for group in package.get(GROUPS, []):
            if group[GROUP_ID] == ROOT_GROUP:
                continue
            for ref in group[REFS]:
                if RESIDREF not in ref:
                    continue
                package_item = get_resource_service('published').find_one(
                    req=None,
                    item_id=ref[RESIDREF],
                    _current_version=ref[config.VERSION])
                if not package_item:
                    msg = 'Can not find package %s published item %s' % (
                        package['item_id'], ref['residRef'])
                    raise SuperdeskPublishError(500, msg)
                package_item[config.ID_FIELD] = package_item['item_id']
                ref['package_item'] = package_item
        return package

    def _update_headline_sequence(self, doc):
        """ Updates the headline of the text story if there's any sequence value in it """
        if doc.get(SEQUENCE):
            doc['headline'] = '{}={}'.format(doc['headline'],
                                             doc.get(SEQUENCE))

    def _get_subscribers_for_package_item(self, package_item):
        """
        Finds the list of subscribers for a given item in a package
        :param package_item: item in a package
        :return list: List of subscribers
        :return string: Digital item id if there's one otherwise None
        """
        if package_item[ITEM_TYPE] not in [
                CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED
        ]:
            query = {
                '$and': [{
                    'item_id': package_item[config.ID_FIELD]
                }, {
                    'publishing_action': package_item[ITEM_STATE]
                }]
            }
        else:
            package_item_takes_package = self.takes_package_service.get_take_package(
                package_item)
            if not package_item_takes_package:
                # this item has not been published to digital subscribers so
                # the list of subscribers are empty
                return [], {}

            query = {
                '$and': [{
                    'item_id': package_item_takes_package[config.ID_FIELD]
                }, {
                    'publishing_action':
                    package_item_takes_package[ITEM_STATE]
                }]
            }

        return self._get_subscribers_for_previously_sent_items(query)

    def _get_subscribers_for_previously_sent_items(self, lookup):
        """
        Returns list of subscribers that have previously received the item.
        :param dict lookup: elastic query to filter the publish queue
        :return: list of subscribers and list of product codes per subscriber
        """
        req = ParsedRequest()
        subscribers = []
        subscriber_codes = {}
        queued_items = list(
            get_resource_service('publish_queue').get(req=req, lookup=lookup))
        if len(queued_items) > 0:
            subscriber_ids = {
                queued_item['subscriber_id']
                for queued_item in queued_items
            }
            subscriber_codes = {
                q['subscriber_id']: q.get('codes', [])
                for q in queued_items
            }
            query = {
                '$and': [{
                    config.ID_FIELD: {
                        '$in': list(subscriber_ids)
                    }
                }]
            }
            subscribers = list(
                get_resource_service('subscribers').get(req=None,
                                                        lookup=query))
        return subscribers, subscriber_codes

    def filter_subscribers(self, doc, subscribers, target_media_type):
        """
        Filter subscribers to whom the current document is going to be delivered.
        :param doc: Document to publish/kill/correct
        :param subscribers: List of Subscribers that might potentially get this document
        :param target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article.
                Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire
                then the doc being queues is an Individual Article.
        :return: List of of filtered subscribers and list of product codes per subscriber.
        """
        filtered_subscribers = []
        subscriber_codes = {}
        req = ParsedRequest()
        req.args = {'is_global': True}
        filter_service = get_resource_service('content_filters')
        existing_products = {
            p[config.ID_FIELD]: p
            for p in list(
                get_resource_service('products').get(req=req, lookup=None))
        }
        global_filters = list(filter_service.get(req=req, lookup=None))

        for subscriber in subscribers:
            if target_media_type and subscriber.get(
                    'subscriber_type', '') != SUBSCRIBER_TYPES.ALL:
                can_send_takes_packages = subscriber[
                    'subscriber_type'] == SUBSCRIBER_TYPES.DIGITAL
                if target_media_type == SUBSCRIBER_TYPES.WIRE and can_send_takes_packages or \
                        target_media_type == SUBSCRIBER_TYPES.DIGITAL and not can_send_takes_packages:
                    continue

            conforms, skip_filters = self.conforms_subscriber_targets(
                subscriber, doc)
            if not conforms:
                continue

            if not self.conforms_global_filter(subscriber, global_filters,
                                               doc):
                continue

            product_codes = self._get_codes(subscriber)
            subscriber_added = False
            for product_id in subscriber.get('products', []):
                # check if the product filter conforms with the story
                product = existing_products.get(product_id)

                if not product:
                    continue

                if not self.conforms_product_targets(product, doc):
                    continue

                if self.conforms_content_filter(product, doc):
                    # gather the codes of products
                    product_codes.extend(self._get_codes(product))
                    if not subscriber_added:
                        filtered_subscribers.append(subscriber)
                        subscriber_added = True

            if skip_filters and not subscriber_added:
                filtered_subscribers.append(subscriber)
                subscriber_added = True

            # unify the list of codes by removing duplicates
            if subscriber_added:
                subscriber_codes[subscriber[config.ID_FIELD]] = list(
                    set(product_codes))

        return filtered_subscribers, subscriber_codes

    def conforms_product_targets(self, product, article):
        """
        Checks if the given article has any target information and if it does
        it checks if the product satisfies any of the target information
        :param product: Product to test
        :param article: article
        :return:
            bool: True if the article conforms the targets for the given product
        """
        geo_restrictions = product.get('geo_restrictions')

        # If not targeted at all then Return true
        if not BasePublishService().is_targeted(article, 'target_regions'):
            return geo_restrictions is None

        if geo_restrictions:
            for region in article.get('target_regions', []):
                if region['qcode'] == geo_restrictions and region['allow']:
                    return True
                if region['qcode'] != geo_restrictions and not region['allow']:
                    return True
        return False

    def conforms_subscriber_targets(self, subscriber, article):
        """
        Checks if the given article has any target information and if it does
        it checks if the subscriber satisfies any of the target information
        :param subscriber: Subscriber to test
        :param article: article
        :return:
            bool: True/False if the article conforms the targets
            bool: True if the given subscriber is specifically targeted, False otherwise
        """
        # If not targeted at all then Return true
        if not BasePublishService().is_targeted(article, 'target_subscribers') and \
                not BasePublishService().is_targeted(article, 'target_types'):
            return True, False

        subscriber_type = subscriber.get('subscriber_type')

        for t in article.get('target_subscribers', []):
            if str(t.get('_id')) == str(subscriber['_id']):
                return True, True

        if subscriber_type:
            for t in article.get('target_types', []):
                if t['qcode'] == subscriber_type and t['allow']:
                    return True, False
                if t['qcode'] != subscriber_type and not t['allow']:
                    return True, False

        # If there's a region target then continue with the subscriber to check products
        if BasePublishService().is_targeted(article, 'target_regions'):
            return True, False

        # Nothing matches so this subscriber doesn't conform
        return False, False

    def conforms_content_filter(self, product, doc):
        """
        Checks if the document matches the subscriber filter
        :param product: Product where the filter is used
        :param doc: Document to test the filter against
        :return:
        True if there's no filter
        True if matches and permitting
        False if matches and blocking
        False if doesn't match and permitting
        True if doesn't match and blocking
        """
        content_filter = product.get('content_filter')

        if content_filter is None or 'filter_id' not in content_filter or content_filter[
                'filter_id'] is None:
            return True

        service = get_resource_service('content_filters')
        filter = service.find_one(req=None, _id=content_filter['filter_id'])
        does_match = service.does_match(filter, doc)

        if does_match:
            return content_filter['filter_type'] == 'permitting'
        else:
            return content_filter['filter_type'] == 'blocking'

    def conforms_global_filter(self, subscriber, global_filters, doc):
        """
        Checks if subscriber has a override rule against each of the
        global filter and if not checks if document matches the global filter
        :param subscriber: Subscriber to get if the global filter is overriden
        :param global_filters: List of all global filters
        :param doc: Document to test the global filter against
        :return: True if at least one global filter is not overriden
        and it matches the document
        False if global filter matches the document or all of them overriden
        """
        service = get_resource_service('content_filters')
        gfs = subscriber.get('global_filters', {})
        for global_filter in global_filters:
            if gfs.get(str(global_filter[config.ID_FIELD]), True):
                # Global filter applies to this subscriber
                if service.does_match(global_filter, doc):
                    # All global filters behaves like blocking filters
                    return False
        return True

    def _extend_subscriber_items(self, subscriber_items, subscribers, item,
                                 digital_item_id, subscriber_codes):
        """
        Extends the subscriber_items with the given list of subscribers for the item
        :param subscriber_items: The existing list of subscribers
        :param subscribers: New subscribers that item has been published to - to be added
        :param item: item that has been published
        :param digital_item_id: digital_item_id
        """
        item_id = item[config.ID_FIELD]
        for subscriber in subscribers:
            sid = subscriber[config.ID_FIELD]
            item_list = subscriber_items.get(sid, {}).get('items', {})
            item_list[item_id] = digital_item_id
            subscriber_items[sid] = {
                'subscriber': subscriber,
                'items': item_list,
                'codes': subscriber_codes.get(sid, [])
            }

    def _get_codes(self, item):
        if item.get('codes'):
            return [c.strip() for c in item.get('codes').split(',') if c]
        else:
            return []
Esempio n. 16
0
    def test_added_removed_in_a_package(self):
        package = {"groups": [{"id": "root", "refs": [{"idRef": "main"}], "role": "grpRole:NEP"},
                              {"id": "main", "refs": [
                                  {
                                      "renditions": {},
                                      "slugline": "Boat",
                                      "guid": "123",
                                      "headline": "item-1 headline",
                                      "location": "archive",
                                      "type": "text",
                                      "itemClass": "icls:text",
                                      "residRef": "123"
                                  },
                                  {
                                      "renditions": {},
                                      "slugline": "Boat",
                                      "guid": "456",
                                      "headline": "item-2 headline",
                                      "location": "archive",
                                      "type": "text",
                                      "itemClass": "icls:text",
                                      "residRef": "456"
                                  },
                                  {
                                      "renditions": {},
                                      "slugline": "Boat",
                                      "guid": "789",
                                      "headline": "item-3 headline",
                                      "location": "archive",
                                      "type": "text",
                                      "itemClass": "icls:text",
                                      "residRef": "789"
                                  }], "role": "grpRole:main"}],
                   "task": {
                       "user": "******",
                       "status": "todo",
                       "stage": "#desks.incoming_stage#",
                       "desk": "#desks._id#"},
                   "guid": "compositeitem",
                   "headline": "test package",
                   "state": "submitted",
                   "type": "composite"}

        updates = {"groups": [{"id": "root", "refs": [{"idRef": "main"}], "role": "grpRole:NEP"},
                              {"id": "main", "refs": [
                                  {
                                      "renditions": {},
                                      "slugline": "Boat",
                                      "guid": "123",
                                      "headline": "item-1 headline",
                                      "location": "archive",
                                      "type": "text",
                                      "itemClass": "icls:text",
                                      "residRef": "123"
                                  },
                                  {
                                      "renditions": {},
                                      "slugline": "Boat",
                                      "guid": "555",
                                      "headline": "item-2 headline",
                                      "location": "archive",
                                      "type": "text",
                                      "itemClass": "icls:text",
                                      "residRef": "555"
                                  },
                                  {
                                      "renditions": {},
                                      "slugline": "Boat",
                                      "guid": "456",
                                      "headline": "item-2 headline",
                                      "location": "archive",
                                      "type": "text",
                                      "itemClass": "icls:text",
                                      "residRef": "456"
                                  }], "role": "grpRole:main"}],
                   "task": {
                       "user": "******",
                       "status": "todo",
                       "stage": "#desks.incoming_stage#",
                       "desk": "#desks._id#"},
                   "guid": "compositeitem",
                   "headline": "test package",
                   "state": "submitted",
                   "type": "composite"}

        items = PackageService().get_residrefs(package)
        removed_items, added_items = ArchivePublishService()._get_changed_items(items, updates)
        self.assertEqual(len(removed_items), 1)
        self.assertEqual(len(added_items), 1)
Esempio n. 17
0
class BasePublishService(BaseService):
    """Base service for different "publish" services."""

    publish_type = 'publish'
    published_state = 'published'

    non_digital = partial(
        filter,
        lambda s: s.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE)
    digital = partial(
        filter, lambda s: (s.get('subscriber_type', '') in
                           {SUBSCRIBER_TYPES.DIGITAL, SUBSCRIBER_TYPES.ALL}))
    package_service = PackageService()

    def on_update(self, updates, original):
        self._refresh_associated_items(original)
        self._validate(original, updates)
        self._set_updates(original, updates,
                          updates.get(config.LAST_UPDATED, utcnow()))
        convert_task_attributes_to_objectId(updates)  # ???
        self._process_publish_updates(original, updates)
        self._mark_media_item_as_used(updates, original)

    def on_updated(self, updates, original):
        original = get_resource_service(ARCHIVE).find_one(
            req=None, _id=original[config.ID_FIELD])
        updates.update(original)

        if updates[ITEM_OPERATION] != ITEM_KILL and \
                original.get(ITEM_TYPE) in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]:
            get_resource_service(
                'archive_broadcast').on_broadcast_master_updated(
                    updates[ITEM_OPERATION], original)

        get_resource_service('archive_broadcast').reset_broadcast_status(
            updates, original)
        push_content_notification([updates])
        self._import_into_legal_archive(updates)
        CropService().update_media_references(updates, original, True)
        superdesk.item_published.send(self, item=original)

    def update(self, id, updates, original):
        """
        Handles workflow of each Publish, Corrected and Killed.
        """
        try:
            user = get_user()
            auto_publish = updates.get('auto_publish', False)

            if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                self._publish_package_items(original, updates)
                self._update_archive(original,
                                     updates,
                                     should_insert_into_versions=auto_publish)
            else:
                self._refresh_associated_items(original)
                updated = deepcopy(original)
                updated.update(deepcopy(updates))

                if updates.get(ASSOCIATIONS):
                    self._refresh_associated_items(
                        updated)  # updates got lost with update

                self._update_archive(original,
                                     updates,
                                     should_insert_into_versions=auto_publish)
                self.update_published_collection(
                    published_item_id=original[config.ID_FIELD],
                    updated=updated)

            from apps.publish.enqueue import enqueue_published
            enqueue_published.apply_async()

            push_notification('item:publish',
                              item=str(id),
                              unique_name=original['unique_name'],
                              desk=str(
                                  original.get('task', {}).get('desk', '')),
                              user=str(user.get(config.ID_FIELD, '')))
        except SuperdeskApiError as e:
            raise
        except KeyError as e:
            logger.exception(e)
            raise SuperdeskApiError.badRequestError(
                message="Key is missing on article to be published: {}".format(
                    str(e)))
        except Exception as e:
            raise SuperdeskApiError.internalError(
                message="Failed to publish the item: {}".format(str(id)),
                exception=e)

    def is_targeted(self, article, target=None):
        """Checks if article is targeted.

        Returns True if the given article has been targeted by region or
        subscriber type or specific subscribers.

        :param article: Article to check
        :param target: Optional specific target to check if exists
        :return:
        """
        if target:
            return len(article.get(target, [])) > 0
        else:
            return len(
                article.get('target_regions', []) +
                article.get('target_types', []) +
                article.get('target_subscribers', [])) > 0

    def _validate(self, original, updates):
        self.raise_if_invalid_state_transition(original)

        updated = original.copy()
        updated.update(updates)

        self.raise_if_not_marked_for_publication(updated)

        if self.publish_type == 'publish':
            update_schedule_settings(updated, PUBLISH_SCHEDULE,
                                     updated.get(PUBLISH_SCHEDULE))
            validate_schedule(
                updated.get(SCHEDULE_SETTINGS,
                            {}).get('utc_{}'.format(PUBLISH_SCHEDULE)))

        if original[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE and updates.get(
                EMBARGO):
            update_schedule_settings(updated, EMBARGO, updated.get(EMBARGO))
            get_resource_service(ARCHIVE).validate_embargo(updated)

        if self.publish_type in [ITEM_CORRECT, ITEM_KILL]:
            if updates.get(EMBARGO) and not original.get(EMBARGO):
                raise SuperdeskApiError.badRequestError(
                    "Embargo can't be set after publishing")

        if self.publish_type in [ITEM_CORRECT, ITEM_KILL]:
            if updates.get('dateline'):
                raise SuperdeskApiError.badRequestError(
                    "Dateline can't be modified after publishing")

        if self.publish_type == ITEM_PUBLISH and updated.get('rewritten_by'):
            rewritten_by = get_resource_service(ARCHIVE).find_one(
                req=None, _id=updated.get('rewritten_by'))
            if rewritten_by and rewritten_by.get(ITEM_STATE) in PUBLISH_STATES:
                raise SuperdeskApiError.badRequestError(
                    "Cannot publish the story after Update is published.!")

        publish_type = 'auto_publish' if updates.get(
            'auto_publish') else self.publish_type
        validate_item = {
            'act': publish_type,
            'type': original['type'],
            'validate': updated
        }
        validation_errors = get_resource_service('validate').post(
            [validate_item])
        if validation_errors[0]:
            raise ValidationError(validation_errors)

        validation_errors = []
        self._validate_associated_items(original, validation_errors)

        if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            self._validate_package(original, updates, validation_errors)

        if len(validation_errors) > 0:
            raise ValidationError(validation_errors)

    def _validate_package(self, package, updates, validation_errors):
        # make sure package is not scheduled or spiked
        if package[ITEM_STATE] in (CONTENT_STATE.SPIKED,
                                   CONTENT_STATE.SCHEDULED):
            validation_errors.append('Package cannot be {}'.format(
                package[ITEM_STATE]))

        if package.get(EMBARGO):
            validation_errors.append('Package cannot have Embargo')

        items = self.package_service.get_residrefs(package)
        if self.publish_type in [ITEM_CORRECT, ITEM_KILL]:
            removed_items, added_items = self._get_changed_items(
                items, updates)
            # we raise error if correction is done on a empty package. Kill is fine.
            if len(removed_items) == len(items) and len(
                    added_items) == 0 and self.publish_type == ITEM_CORRECT:
                validation_errors.append("Corrected package cannot be empty!")

    def raise_if_not_marked_for_publication(self, original):
        if original.get('flags', {}).get('marked_for_not_publication', False):
            raise SuperdeskApiError.badRequestError(
                'Cannot publish an item which is marked as Not for Publication'
            )

    def raise_if_invalid_state_transition(self, original):
        if not is_workflow_state_transition_valid(self.publish_type,
                                                  original[ITEM_STATE]):
            error_message = "Can't {} as item state is {}" if original[ITEM_TYPE] == CONTENT_TYPE.TEXT else \
                "Can't {} as either package state or one of the items state is {}"
            raise InvalidStateTransitionError(
                error_message.format(self.publish_type, original[ITEM_STATE]))

    def _process_publish_updates(self, original, updates):
        """Common updates for published items."""
        desk = None
        if original.get('task', {}).get('desk'):
            desk = get_resource_service('desks').find_one(
                req=None, _id=original['task']['desk'])
        if not original.get('ingest_provider'):
            updates['source'] = desk['source'] if desk and desk.get('source', '') \
                else app.settings['DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES']
        updates[
            'pubstatus'] = PUB_STATUS.CANCELED if self.publish_type == 'kill' else PUB_STATUS.USABLE
        self._set_item_expiry(updates, original)

    def _set_item_expiry(self, updates, original):
        """Set the expiry for the item.

        :param dict updates: doc on which publishing action is performed
        """
        desk_id = original.get('task', {}).get('desk')
        stage_id = original.get('task', {}).get('stage')

        if EMBARGO in updates or PUBLISH_SCHEDULE in updates:
            offset = get_utc_schedule(updates,
                                      PUBLISH_SCHEDULE) or get_utc_schedule(
                                          updates, EMBARGO)
        elif EMBARGO in original or PUBLISH_SCHEDULE in original:
            offset = get_utc_schedule(original,
                                      PUBLISH_SCHEDULE) or get_utc_schedule(
                                          original, EMBARGO)

        if app.settings.get('PUBLISHED_CONTENT_EXPIRY_MINUTES'):
            updates['expiry'] = get_expiry_date(
                app.settings['PUBLISHED_CONTENT_EXPIRY_MINUTES'],
                offset=offset)
        else:
            updates['expiry'] = get_expiry(desk_id, stage_id, offset=offset)

    def _publish_package_items(self, package, updates):
        """Publishes all items of a package recursively then publishes the package itself.

        :param package: package to publish
        :param updates: payload
        """
        items = self.package_service.get_residrefs(package)

        if len(items) == 0 and self.publish_type == ITEM_PUBLISH:
            raise SuperdeskApiError.badRequestError(
                "Empty package cannot be published!")

        removed_items = []
        if self.publish_type in [ITEM_CORRECT, ITEM_KILL]:
            removed_items, added_items = self._get_changed_items(
                items, updates)
            # we raise error if correction is done on a empty package. Kill is fine.
            if len(removed_items) == len(items) and len(
                    added_items) == 0 and self.publish_type == ITEM_CORRECT:
                raise SuperdeskApiError.badRequestError(
                    "Corrected package cannot be empty!")
            items.extend(added_items)

        if not updates.get('groups') and package.get(
                'groups'):  # this saves some typing in tests
            updates['groups'] = package.get('groups')

        if items:
            archive_publish = get_resource_service('archive_publish')
            for guid in items:
                package_item = super().find_one(req=None, _id=guid)

                if not package_item:
                    raise SuperdeskApiError.badRequestError(
                        "Package item with id: {} does not exist.".format(
                            guid))

                if package_item[
                        ITEM_STATE] not in PUBLISH_STATES:  # if the item is not published then publish it
                    if package_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                        # if the item is a package do recursion to publish
                        sub_updates = {
                            i: updates[i]
                            for i in ['state', 'operation'] if i in updates
                        }
                        sub_updates['groups'] = list(package_item['groups'])
                        self._publish_package_items(package_item, sub_updates)
                        self._update_archive(original=package_item,
                                             updates=sub_updates,
                                             should_insert_into_versions=False)
                    else:
                        # publish the item
                        package_item[PUBLISHED_IN_PACKAGE] = package[
                            config.ID_FIELD]
                        archive_publish.patch(id=package_item.pop(
                            config.ID_FIELD),
                                              updates=package_item)

                    insert_into_versions(id_=guid)

                elif guid in removed_items:
                    # remove the package information from the package item.
                    linked_in_packages = [
                        linked
                        for linked in package_item.get(LINKED_IN_PACKAGES)
                        if linked.get(PACKAGE) != package.get(config.ID_FIELD)
                    ]
                    super().system_update(
                        guid, {LINKED_IN_PACKAGES: linked_in_packages},
                        package_item)

                package_item = super().find_one(req=None, _id=guid)

                self.package_service.update_field_in_package(
                    updates, package_item[config.ID_FIELD], config.VERSION,
                    package_item[config.VERSION])

                if package_item.get(ASSOCIATIONS):
                    self.package_service.update_field_in_package(
                        updates, package_item[config.ID_FIELD], ASSOCIATIONS,
                        package_item[ASSOCIATIONS])

        updated = deepcopy(package)
        updated.update(updates)
        self.update_published_collection(
            published_item_id=package[config.ID_FIELD], updated=updated)

    def update_published_collection(self, published_item_id, updated=None):
        """Updates the published collection with the published item.

        Set the last_published_version to false for previous versions of the published items.

        :param: str published_item_id: _id of the document.
        """
        published_item = super().find_one(req=None, _id=published_item_id)
        published_item = copy(published_item)
        if updated:
            published_item.update(updated)
        get_resource_service(PUBLISHED).update_published_items(
            published_item_id, LAST_PUBLISHED_VERSION, False)
        return get_resource_service(PUBLISHED).post([published_item])

    def set_state(self, original, updates):
        """Set the state of the document based on the action (publish, correction, kill)

        :param dict original: original document
        :param dict updates: updates related to document
        """
        updates[PUBLISH_SCHEDULE] = None
        updates[SCHEDULE_SETTINGS] = {}
        updates[ITEM_STATE] = self.published_state

    def _set_updates(self,
                     original,
                     updates,
                     last_updated,
                     preserve_state=False):
        """Sets config.VERSION, config.LAST_UPDATED, ITEM_STATE in updates document.

        If item is being published and embargo is available then append Editorial Note with 'Embargoed'.

        :param dict original: original document
        :param dict updates: updates related to the original document
        :param datetime last_updated: datetime of the updates.
        """
        if not preserve_state:
            self.set_state(original, updates)
        updates.setdefault(config.LAST_UPDATED, last_updated)

        if original[config.VERSION] == updates.get(config.VERSION,
                                                   original[config.VERSION]):
            resolve_document_version(document=updates,
                                     resource=ARCHIVE,
                                     method='PATCH',
                                     latest_doc=original)

        user = get_user()
        if user and user.get(config.ID_FIELD):
            updates['version_creator'] = user[config.ID_FIELD]

    def _update_archive(self,
                        original,
                        updates,
                        versioned_doc=None,
                        should_insert_into_versions=True):
        """Updates the articles into archive collection and inserts the latest into archive_versions.

        Also clears autosaved versions if any.

        :param: versioned_doc: doc which can be inserted into archive_versions
        :param: should_insert_into_versions if True inserts the latest document into versions collection
        """

        self.backend.update(self.datasource, original[config.ID_FIELD],
                            updates, original)
        app.on_archive_item_updated(updates, original, updates[ITEM_OPERATION])

        if should_insert_into_versions:
            if versioned_doc is None:
                insert_into_versions(id_=original[config.ID_FIELD])
            else:
                insert_into_versions(doc=versioned_doc)

        get_component(ItemAutosave).clear(original[config.ID_FIELD])

    def _get_changed_items(self, existing_items, updates):
        """Returns the added and removed items from existing_items.

        :param existing_items: Existing list
        :param updates: Changes
        :return: list of removed items and list of added items
        """
        if 'groups' in updates:
            new_items = self.package_service.get_residrefs(updates)
            removed_items = list(set(existing_items) - set(new_items))
            added_items = list(set(new_items) - set(existing_items))
            return removed_items, added_items
        else:
            return [], []

    def _validate_associated_items(self, original_item, validation_errors=[]):
        """Validates associated items.

        This function will ensure that the unpublished content validates and none of
        the content is locked by other than the publishing session, also do not allow
        any killed or spiked content.

        :param package:
        :param validation_errors: validation errors are appended if there are any.
        """
        items = [
            value
            for value in (original_item.get(ASSOCIATIONS) or {}).values()
        ]
        if original_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and \
                self.publish_type == ITEM_PUBLISH:
            items.extend(self.package_service.get_residrefs(original_item))

        for item in items:
            if type(item) == dict:
                doc = item
            elif item:
                doc = super().find_one(req=None, _id=item)
            else:
                continue

            if not doc:
                continue

            if original_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                self._validate_associated_items(doc, validation_errors)

            # make sure no items are killed or spiked or scheduled
            doc_item_state = doc.get(ITEM_STATE, CONTENT_STATE.PUBLISHED)
            if doc_item_state in (CONTENT_STATE.KILLED, CONTENT_STATE.SPIKED,
                                  CONTENT_STATE.SCHEDULED):
                validation_errors.append(
                    'Item cannot contain associated {} item'.format(
                        doc[ITEM_STATE]))

            if doc.get(EMBARGO):
                validation_errors.append(
                    'Item cannot have associated items with Embargo')

            # don't validate items that already have published
            if doc_item_state not in [
                    CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED
            ]:
                validate_item = {
                    'act': self.publish_type,
                    'type': doc[ITEM_TYPE],
                    'validate': doc
                }
                if type(item) == dict:
                    validate_item['embedded'] = True
                errors = get_resource_service('validate').post([validate_item],
                                                               headline=True)
                if errors[0]:
                    pre_errors = [
                        'Associated item %s %s' %
                        (doc.get('slugline', ''), error) for error in errors[0]
                    ]
                    validation_errors.extend(pre_errors)

            # check the locks on the items
            if doc.get(
                    'lock_session', None
            ) and original_item['lock_session'] != doc['lock_session']:
                validation_errors.extend([
                    '{}: packaged item cannot be locked'.format(
                        doc['headline'])
                ])

    def _import_into_legal_archive(self, doc):
        """Import into legal archive async

        :param {dict} doc: document to be imported
        """

        if doc.get(ITEM_STATE) != CONTENT_STATE.SCHEDULED:
            kwargs = {'item_id': doc.get(config.ID_FIELD)}

            # countdown=3 is for elasticsearch to be refreshed with archive and published changes
            import_into_legal_archive.apply_async(
                countdown=3, kwargs=kwargs)  # @UndefinedVariable

    def _apply_kill_template(self, item):
        # apply the kill template
        updates = render_content_template_by_name(item, 'kill')
        return updates

    def apply_kill_override(self, item, updates):
        """Applies kill override.

        Kill requires content to be generate based on the item getting killed (and not the
        item that is being actioned on).

        :param dict item: item to kill
        :param dict updates: updates that needs to be modified based on the template
        :return:
        """
        try:
            desk_name = get_resource_service('desks').get_desk_name(
                item.get('task', {}).get('desk'))
            city = get_dateline_city(item.get('dateline'))
            kill_header = json.loads(render_template(
                'article_killed_override.json',
                slugline=item.get('slugline', ''),
                headline=item.get('headline', ''),
                desk_name=desk_name,
                city=city,
                versioncreated=item.get('versioncreated',
                                        item.get(config.LAST_UPDATED)),
                body_html=updates.get('body_html', ''),
                update_headline=updates.get('headline', '')),
                                     strict=False)
            for key, value in kill_header.items():
                kill_header[key] = html.unescape(value)

            updates.update(kill_header)
        except Exception:
            logger.exception(
                'Failed to apply kill header template to item {}.'.format(
                    item))

    def _refresh_associated_items(self, original):
        """Refresh associated items before publishing

        Any further updates made to basic metadata done after item was associated will be carried on and
        used when validating those items.
        """
        associations = original.get(ASSOCIATIONS) or {}
        for _, item in associations.items():
            if type(item) == dict and item.get(config.ID_FIELD):
                keys = DEFAULT_SCHEMA.keys()
                if app.settings.get('COPY_METADATA_FROM_PARENT') and item.get(
                        ITEM_TYPE) in MEDIA_TYPES:
                    updates = original
                    keys = FIELDS_TO_COPY_FOR_ASSOCIATED_ITEM
                else:
                    updates = super().find_one(
                        req=None, _id=item[config.ID_FIELD]) or {}

                update_item_data(item, updates, keys)

    def _mark_media_item_as_used(self, updates, original):
        if ASSOCIATIONS not in updates or not updates.get(ASSOCIATIONS):
            return

        for item_name, item_obj in updates.get(ASSOCIATIONS).items():
            if not (item_obj and config.ID_FIELD in item_obj):
                continue

            item_id = item_obj[config.ID_FIELD]
            media_item = {}
            if app.settings.get('COPY_METADATA_FROM_PARENT') and item_obj.get(
                    ITEM_TYPE) in MEDIA_TYPES:
                stored_item = (original.get(ASSOCIATIONS)
                               or {}).get(item_name) or item_obj
            else:
                media_item = stored_item = self.find_one(req=None, _id=item_id)
                if not stored_item:
                    continue

            # If the media item is not marked as 'used', mark it as used
            if original.get(ITEM_TYPE) == CONTENT_TYPE.TEXT and \
                    (item_obj is not stored_item or not stored_item.get('used')):
                archive_service = get_resource_service('archive')
                if media_item is not stored_item:
                    media_item = archive_service.find_one(req=None,
                                                          _id=item_id)

                if media_item and not media_item.get('used'):
                    archive_service.system_update(media_item['_id'],
                                                  {'used': True}, media_item)

                stored_item['used'] = True
Esempio n. 18
0
class BasePublishService(BaseService):
    """
    Base service class for "publish" endpoint
    """

    publish_type = 'publish'
    published_state = 'published'

    non_digital = partial(filter, lambda s: s.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE)
    digital = partial(filter, lambda s: (s.get('subscriber_type', '') in {SUBSCRIBER_TYPES.DIGITAL,
                                                                          SUBSCRIBER_TYPES.ALL}))
    takes_package_service = TakesPackageService()
    package_service = PackageService()

    def on_update(self, updates, original):
        self._validate(original, updates)
        self._set_updates(original, updates, updates.get(config.LAST_UPDATED, utcnow()))
        convert_task_attributes_to_objectId(updates)  # ???
        self._process_publish_updates(original, updates)

    def on_updated(self, updates, original):
        original = get_resource_service(ARCHIVE).find_one(req=None, _id=original[config.ID_FIELD])
        updates.update(original)

        if updates[ITEM_OPERATION] != ITEM_KILL and \
                original.get(ITEM_TYPE) in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]:
            get_resource_service('archive_broadcast').on_broadcast_master_updated(updates[ITEM_OPERATION], original)

        get_resource_service('archive_broadcast').reset_broadcast_status(updates, original)
        push_content_notification([updates])
        self._import_into_legal_archive(updates)

    def update(self, id, updates, original):
        """
        Handles workflow of each Publish, Corrected and Killed.
        """
        try:
            user = get_user()
            auto_publish = updates.pop('auto_publish', False)

            if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                self._publish_package_items(original, updates)
                self._update_archive(original, updates, should_insert_into_versions=auto_publish)
            else:
                self._publish_associations(original, id)
                updated = deepcopy(original)
                updated.update(updates)

                if self.published_state != CONTENT_STATE.KILLED:
                    self._process_takes_package(original, updated, updates)

                self._update_archive(original, updated, should_insert_into_versions=auto_publish)
                self.update_published_collection(published_item_id=original[config.ID_FIELD], updated=updated)

            from apps.publish.enqueue import enqueue_published
            enqueue_published.apply_async()

            push_notification('item:publish', item=str(id),
                              unique_name=original['unique_name'],
                              desk=str(original.get('task', {}).get('desk', '')),
                              user=str(user.get(config.ID_FIELD, '')))
        except SuperdeskApiError as e:
            raise e
        except KeyError as e:
            raise SuperdeskApiError.badRequestError(
                message="Key is missing on article to be published: {}".format(str(e)))
        except Exception as e:
            logger.exception("Something bad happened while publishing %s".format(id))
            raise SuperdeskApiError.internalError(message="Failed to publish the item: {}".format(str(e)))

    def _process_takes_package(self, original, updated, updates):
        # if target_for is set then we don't to digital client.
        targeted_for = updates.get('targeted_for', original.get('targeted_for'))
        if original[ITEM_TYPE] in {CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED} \
                and not (targeted_for or is_genre(original, BROADCAST_GENRE)):
            # check if item is in a digital package
            last_updated = updates.get(config.LAST_UPDATED, utcnow())
            package = self.takes_package_service.get_take_package(original)
            if not package:
                '''
                If type of the item is text or preformatted then item need to be sent to
                digital subscribers, so package the item as a take.
                '''
                package_id = self.takes_package_service.package_story_as_a_take(updated, {}, None)
                package = get_resource_service(ARCHIVE).find_one(req=None, _id=package_id)
            package_id = package[config.ID_FIELD]
            package_updates = self.process_takes(updates_of_take_to_be_published=updates,
                                                 original_of_take_to_be_published=original,
                                                 package=package)
            # If the original package is corrected then the next take shouldn't change it
            # back to 'published'
            preserve_state = package.get(ITEM_STATE, '') == CONTENT_STATE.CORRECTED and \
                updates.get(ITEM_OPERATION, ITEM_PUBLISH) == ITEM_PUBLISH
            self._set_updates(package, package_updates, last_updated, preserve_state)
            package_updates.setdefault(ITEM_OPERATION, updates.get(ITEM_OPERATION, ITEM_PUBLISH))
            self._update_archive(package, package_updates)
            package.update(package_updates)
            self.update_published_collection(published_item_id=package_id)
            self._import_into_legal_archive(package)

    def _validate(self, original, updates):
        self.raise_if_not_marked_for_publication(original)
        self.raise_if_invalid_state_transition(original)

        updated = original.copy()
        updated.update(updates)

        takes_package = self.takes_package_service.get_take_package(original)

        if self.publish_type == 'publish':
            # validate if take can be published
            if takes_package and not self.takes_package_service.can_publish_take(
                    takes_package, updates.get(SEQUENCE, original.get(SEQUENCE, 1))):
                raise PublishQueueError.previous_take_not_published_error(
                    Exception("Previous takes are not published."))

            validate_schedule(updated.get(PUBLISH_SCHEDULE), takes_package.get(SEQUENCE, 1) if takes_package else 1)
            update_schedule_settings(updated, PUBLISH_SCHEDULE, updated.get(PUBLISH_SCHEDULE))

            if original[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE and updates.get(EMBARGO):
                get_resource_service(ARCHIVE).validate_embargo(updated)

        if self.publish_type in [ITEM_CORRECT, ITEM_KILL]:
            if updates.get(EMBARGO):
                raise SuperdeskApiError.badRequestError("Embargo can't be set after publishing")

            if updates.get('dateline'):
                raise SuperdeskApiError.badRequestError("Dateline can't be modified after publishing")

        if self.publish_type == ITEM_PUBLISH and updated.get('rewritten_by'):
            # if update is published then user cannot publish the takes
            rewritten_by = get_resource_service(ARCHIVE).find_one(req=None, _id=updated.get('rewritten_by'))
            if rewritten_by and rewritten_by.get(ITEM_STATE) in PUBLISH_STATES:
                raise SuperdeskApiError.badRequestError("Cannot publish the story after Update is published.!")

        validate_item = {'act': self.publish_type, 'type': original['type'], 'validate': updated}
        validation_errors = get_resource_service('validate').post([validate_item])
        if validation_errors[0]:
            raise ValidationError(validation_errors)

        if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            package_validation_errors = []
            self._validate_package_contents(original, takes_package, package_validation_errors)
            if len(package_validation_errors) > 0:
                raise ValidationError(package_validation_errors)

            self._validate_package(original, updates)

    def _validate_package(self, package, updates):
        items = self.package_service.get_residrefs(package)
        if self.publish_type in [ITEM_CORRECT, ITEM_KILL]:
            removed_items, added_items = self._get_changed_items(items, updates)
            # we raise error if correction is done on a empty package. Kill is fine.
            if len(removed_items) == len(items) and len(added_items) == 0 and self.publish_type == ITEM_CORRECT:
                raise SuperdeskApiError.badRequestError("Corrected package cannot be empty!")

    def raise_if_not_marked_for_publication(self, original):
        if original.get('flags', {}).get('marked_for_not_publication', False):
            raise SuperdeskApiError.badRequestError('Cannot publish an item which is marked as Not for Publication')

    def raise_if_invalid_state_transition(self, original):
        if not is_workflow_state_transition_valid(self.publish_type, original[ITEM_STATE]):
            error_message = "Can't {} as item state is {}" if original[ITEM_TYPE] == CONTENT_TYPE.TEXT else \
                "Can't {} as either package state or one of the items state is {}"
            raise InvalidStateTransitionError(error_message.format(self.publish_type, original[ITEM_STATE]))

    def get_digital_id_for_package_item(self, package_item):
        """
        Finds the digital item id for a given item in a package
        :param package_item: item in a package
        :return string: Digital item id if there's one otherwise id of package_item
        """
        if package_item[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]:
            return package_item[config.ID_FIELD]
        else:
            package_item_takes_package_id = self.takes_package_service.get_take_package_id(package_item)
            if not package_item_takes_package_id:
                return package_item[config.ID_FIELD]
            return package_item_takes_package_id

    def _process_publish_updates(self, original, updates):
        """ Common updates for published items """
        desk = None
        if original.get('task', {}).get('desk'):
            desk = get_resource_service('desks').find_one(req=None, _id=original['task']['desk'])
        if not original.get('ingest_provider'):
            updates['source'] = desk['source'] if desk and desk.get('source', '') \
                else app.settings['DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES']
        updates['pubstatus'] = PUB_STATUS.CANCELED if self.publish_type == 'kill' else PUB_STATUS.USABLE
        self._set_item_expiry(updates, original)

    def _set_item_expiry(self, updates, original):
        """
        Set the expiry for the item
        :param dict updates: doc on which publishing action is performed
        """
        desk_id = original.get('task', {}).get('desk')
        stage_id = original.get('task', {}).get('stage')

        if EMBARGO in updates or PUBLISH_SCHEDULE in updates:
            offset = get_utc_schedule(updates, PUBLISH_SCHEDULE) or get_utc_schedule(updates, EMBARGO)
        elif EMBARGO in original or PUBLISH_SCHEDULE in original:
            offset = get_utc_schedule(original, PUBLISH_SCHEDULE) or get_utc_schedule(original, EMBARGO)

        updates['expiry'] = get_expiry(desk_id, stage_id, offset=offset)

    def _is_take_item(self, item):
        """ Returns True if the item was a take
        """
        return item[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE and \
            (not (item.get('targeted_for') or is_genre(item, BROADCAST_GENRE)))

    def process_takes(self, updates_of_take_to_be_published, package, original_of_take_to_be_published=None):
        """
        Primary rule for publishing a Take in Takes Package is: all previous takes must be published before a take
        can be published.

        Also, generates body_html of the takes package and make sure the metadata for the package is the same as the
        metadata of the take to be published.

        :param dict updates_of_take_to_be_published: updates for the take to be published
        :param dict package: Takes package to publish
        :param dict original_of_take_to_be_published: original of the take to be published
        :return: Takes Package Updates
        """

        takes = self.takes_package_service.get_published_takes(package)
        body_html = updates_of_take_to_be_published.get('body_html',
                                                        original_of_take_to_be_published.get('body_html', ''))
        package_updates = {}

        groups = package.get(GROUPS, [])
        if groups:
            take_refs = [ref for group in groups if group['id'] == 'main' for ref in group.get('refs')]
            sequence_num_of_take_to_be_published = 0
            take_article_id = updates_of_take_to_be_published.get(
                config.ID_FIELD, original_of_take_to_be_published[config.ID_FIELD])

            for r in take_refs:
                if r[GUID_FIELD] == take_article_id:
                    sequence_num_of_take_to_be_published = r[SEQUENCE]
                    r['is_published'] = True
                    break

            if takes and self.published_state != 'killed':
                body_html_list = [take.get('body_html', '') for take in takes]
                if self.published_state == 'published':
                    body_html_list.append(body_html)
                else:
                    body_html_list[sequence_num_of_take_to_be_published - 1] = body_html

                package_updates['body_html'] = '<br>'.join(body_html_list)
            else:
                package_updates['body_html'] = body_html

            metadata_tobe_copied = self.takes_package_service.fields_for_creating_take.copy()
            metadata_tobe_copied.extend([PUBLISH_SCHEDULE, SCHEDULE_SETTINGS, 'byline'])
            updated_take = original_of_take_to_be_published.copy()
            updated_take.update(updates_of_take_to_be_published)
            metadata_from = updated_take
            # this rules has changed to use the last published metadata
            # per ticket SD-3885
            # if self.published_state == 'corrected' and len(takes) > 1:
            #     # get the last take metadata only if there are more than one takes
            #     metadata_from = takes[-1]

            for metadata in metadata_tobe_copied:
                if metadata in metadata_from:
                    package_updates[metadata] = metadata_from.get(metadata)

            if self.published_state == 'killed':
                # if published then update the groups in the take
                # to reflect the correct version, headline and slugline
                archive_service = get_resource_service(ARCHIVE)
                for ref in take_refs:
                    if ref.get(RESIDREF) != take_article_id:
                        archive_item = archive_service.find_one(req=None, _id=ref.get(RESIDREF))
                        ref['headline'] = archive_item.get('headline')
                        ref['slugline'] = archive_item.get('slugline')
                        ref[config.VERSION] = archive_item.get(config.VERSION)

            take_ref = next((ref for ref in take_refs if ref.get(RESIDREF) == take_article_id), None)
            if take_ref:
                # for published take update the version, headline and slugline
                take_ref['headline'] = updated_take.get('headline')
                take_ref['slugline'] = updated_take.get('slugline')
                take_ref[config.VERSION] = updated_take.get(config.VERSION)

            package_updates[GROUPS] = groups

        return package_updates

    def _publish_package_items(self, package, updates):
        """
        Publishes all items of a package recursively then publishes the package itself
        :param package: package to publish
        :param updates: payload
        """
        items = self.package_service.get_residrefs(package)

        if len(items) == 0 and self.publish_type == ITEM_PUBLISH:
            raise SuperdeskApiError.badRequestError("Empty package cannot be published!")

        removed_items = []
        if self.publish_type in [ITEM_CORRECT, ITEM_KILL]:
            removed_items, added_items = self._get_changed_items(items, updates)
            # we raise error if correction is done on a empty package. Kill is fine.
            if len(removed_items) == len(items) and len(added_items) == 0 and self.publish_type == ITEM_CORRECT:
                raise SuperdeskApiError.badRequestError("Corrected package cannot be empty!")
            items.extend(added_items)

        if items:
            archive_publish = get_resource_service('archive_publish')
            for guid in items:
                package_item = super().find_one(req=None, _id=guid)

                if not package_item:
                    raise SuperdeskApiError.badRequestError(
                        "Package item with id: {} does not exist.".format(guid))

                if package_item[ITEM_STATE] not in PUBLISH_STATES:  # if the item is not published then publish it
                    if package_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                        # if the item is a package do recursion to publish
                        sub_updates = {i: updates[i] for i in ['state', 'operation'] if i in updates}
                        sub_updates['groups'] = list(package_item['groups'])
                        self._publish_package_items(package_item, sub_updates)
                        self._update_archive(original=package_item, updates=sub_updates,
                                             should_insert_into_versions=False)
                    else:
                        # publish the item
                        package_item[PUBLISHED_IN_PACKAGE] = package[config.ID_FIELD]
                        archive_publish.patch(id=package_item.pop(config.ID_FIELD), updates=package_item)

                    insert_into_versions(id_=guid)

                elif guid in removed_items:
                    # remove the package information from the package item.
                    linked_in_packages = [linked for linked in package_item.get(LINKED_IN_PACKAGES)
                                          if linked.get(PACKAGE) != package.get(config.ID_FIELD)]
                    super().system_update(guid, {LINKED_IN_PACKAGES: linked_in_packages}, package_item)

                package_item = super().find_one(req=None, _id=guid)
                self.package_service.update_field_in_package(updates, package_item[config.ID_FIELD],
                                                             config.VERSION, package_item[config.VERSION])

        updated = deepcopy(package)
        updated.update(updates)
        self.update_published_collection(published_item_id=package[config.ID_FIELD], updated=updated)

    def update_published_collection(self, published_item_id, updated=None):
        """
        Updates the published collection with the published item.
        Set the last_published_version to false for previous versions of the published items.
        :param: str published_item_id: _id of the document.
        """
        published_item = super().find_one(req=None, _id=published_item_id)
        published_item = copy(published_item)
        if updated:
            published_item.update(updated)
        published_item['is_take_item'] = self.takes_package_service.get_take_package_id(published_item) is not None
        if not published_item.get('digital_item_id'):
            published_item['digital_item_id'] = self.get_digital_id_for_package_item(published_item)
        get_resource_service(PUBLISHED).update_published_items(published_item_id, LAST_PUBLISHED_VERSION, False)
        return get_resource_service(PUBLISHED).post([published_item])

    def set_state(self, original, updates):
        """
        Set the state of the document based on the action (publish, correction, kill)
        :param dict original: original document
        :param dict updates: updates related to document
        """
        updates[PUBLISH_SCHEDULE] = None
        updates[SCHEDULE_SETTINGS] = {}
        updates[ITEM_STATE] = self.published_state

    def _set_updates(self, original, updates, last_updated, preserve_state=False):
        """
        Sets config.VERSION, config.LAST_UPDATED, ITEM_STATE in updates document.
        If item is being published and embargo is available then append Editorial Note with 'Embargoed'.

        :param dict original: original document
        :param dict updates: updates related to the original document
        :param datetime last_updated: datetime of the updates.
        """
        if not preserve_state:
            self.set_state(original, updates)
        updates.setdefault(config.LAST_UPDATED, last_updated)

        if original[config.VERSION] == updates.get(config.VERSION, original[config.VERSION]):
            resolve_document_version(document=updates, resource=ARCHIVE, method='PATCH', latest_doc=original)

        if updates.get(EMBARGO, original.get(EMBARGO)) \
                and updates.get('ednote', original.get('ednote', '')).find('Embargo') == -1:
            updates['ednote'] = '{} {}'.format(original.get('ednote', ''), 'Embargoed.').strip()

        user = get_user()
        if user and user.get(config.ID_FIELD):
            updates['version_creator'] = user[config.ID_FIELD]

    def _update_archive(self, original, updates, versioned_doc=None, should_insert_into_versions=True):
        """
        Updates the articles into archive collection and inserts the latest into archive_versions.
        Also clears autosaved versions if any.
        :param: versioned_doc: doc which can be inserted into archive_versions
        :param: should_insert_into_versions if True inserts the latest document into versions collection
        """

        self.backend.update(self.datasource, original[config.ID_FIELD], updates, original)

        if should_insert_into_versions:
            if versioned_doc is None:
                insert_into_versions(id_=original[config.ID_FIELD])
            else:
                insert_into_versions(doc=versioned_doc)

        get_component(ItemAutosave).clear(original[config.ID_FIELD])

    def _get_changed_items(self, existing_items, updates):
        """
        Returns the added and removed items from existing_items
        :param existing_items: Existing list
        :param updates: Changes
        :return: list of removed items and list of added items
        """
        if 'groups' in updates:
            new_items = self.package_service.get_residrefs(updates)
            removed_items = list(set(existing_items) - set(new_items))
            added_items = list(set(new_items) - set(existing_items))
            return removed_items, added_items
        else:
            return [], []

    def _validate_package_contents(self, package, takes_package, validation_errors=[]):
        """
        If the item passed is a package this function will ensure that the unpublished content validates and none of
        the content is locked by other than the publishing session, also do not allow any killed or spiked content

        :param package:
        :param takes_package:
        :param validation_errors: validation errors are appended if there are any.
        """
        # Ensure it is the sort of thing we need to validate
        if package[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and not takes_package and self.publish_type == ITEM_PUBLISH:
            items = self.package_service.get_residrefs(package)

            # make sure package is not scheduled or spiked
            if package[ITEM_STATE] in (CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED):
                validation_errors.append('Package cannot be {}'.format(package[ITEM_STATE]))

            if package.get(EMBARGO):
                validation_errors.append('Package cannot have Embargo')

            if items:
                for guid in items:
                    doc = super().find_one(req=None, _id=guid)

                    if package[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                        digital = self.takes_package_service.get_take_package(doc) or {}
                        self._validate_package_contents(doc, digital, validation_errors)

                    # make sure no items are killed or spiked or scheduled
                    if doc[ITEM_STATE] in (CONTENT_STATE.KILLED, CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED):
                        validation_errors.append('Package cannot contain {} item'.format(doc[ITEM_STATE]))

                    if doc.get(EMBARGO):
                        validation_errors.append('Package cannot have Items with Embargo')

                    # don't validate items that already have published
                    if doc[ITEM_STATE] not in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]:
                        validate_item = {'act': self.publish_type, 'type': doc[ITEM_TYPE], 'validate': doc}
                        errors = get_resource_service('validate').post([validate_item], headline=True)
                        if errors[0]:
                            validation_errors.extend(errors[0])

                    # check the locks on the items
                    if doc.get('lock_session', None) and package['lock_session'] != doc['lock_session']:
                        validation_errors.extend(['{}: packaged item cannot be locked'.format(doc['headline'])])

    def _import_into_legal_archive(self, doc):
        """
        Import into legal archive async
        :param {dict} doc: document to be imported
        """

        if doc.get(ITEM_STATE) != CONTENT_STATE.SCHEDULED:
            kwargs = {
                'item_id': doc.get(config.ID_FIELD)
            }

            # countdown=3 is for elasticsearch to be refreshed with archive and published changes
            import_into_legal_archive.apply_async(countdown=3, kwargs=kwargs)  # @UndefinedVariable

    def _publish_associations(self, parent, guid):
        """Publish parent item associations."""
        associations = parent.get('associations', {})
        for rel, item in associations.copy().items():
            if item.get('pubstatus', 'usable') != 'usable':
                associations.pop(rel)
                continue
            self._publish_renditions(item, rel, guid)

    def _publish_renditions(self, item, rel, guid):
        """Publish item renditions."""
        images = []
        renditions = item.get('renditions', {})
        original = renditions.get('original')
        crop_service = CropService()
        for rendition_name, rendition in renditions.items():
            crop = get_crop(rendition)
            rend_spec = crop_service.get_crop_by_name(rendition_name)
            if crop and rend_spec:
                file_name = '%s/%s/%s' % (guid, rel, rendition_name)
                rendition['media'] = app.media.media_id(file_name, original.get('mimetype'))
                rendition['href'] = app.media.url_for_media(rendition['media'], original.get('mimetype'))
                rendition['width'] = rend_spec.get('width')
                rendition['height'] = rend_spec.get('height')
                rendition['ratio'] = rend_spec.get('ratio')
                rendition['mimetype'] = original.get('mimetype')
                images.append({
                    'rendition': rendition_name,
                    'file_name': file_name,
                    'media': rendition['media'],
                    'spec': rend_spec,
                    'crop': crop,
                })
        publish_images.delay(images=images, original=original, item=item)
Esempio n. 19
0
class PublishedPackageItemsService(BaseService):
    package_service = PackageService()

    def create(self, docs, **kwargs):
        ids = []
        for doc in docs:
            original = get_resource_service(ARCHIVE).find_one(
                req=None, _id=doc['package_id'])
            if not original or original[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE:
                raise SuperdeskApiError.badRequestError(
                    'Invalid package identifier')
            if original[ITEM_STATE] not in PUBLISH_STATES:
                raise SuperdeskApiError.badRequestError(
                    'Package was not published')

            items = {}
            for new_item in doc['new_items']:
                item = get_resource_service(ARCHIVE).find_one(
                    req=None, _id=new_item['item_id'])
                if not item:
                    raise SuperdeskApiError.badRequestError(
                        'Invalid item identifier %s' % new_item['item_id'])
                try:
                    self.package_service.check_for_circular_reference(
                        original, new_item['item_id'])
                except ValidationError:
                    raise SuperdeskApiError.badRequestError(
                        'Circular reference in item %s', new_item['item_id'])
                items[item[config.ID_FIELD]] = item

            updates = {
                key: original[key]
                for key in [config.ID_FIELD, PACKAGE_TYPE, GROUPS]
                if key in original
            }
            create_root_group([updates])
            items_refs = []
            for new_item in doc['new_items']:
                items_refs.append(
                    self._set_item_assoc(updates, new_item,
                                         items[new_item['item_id']]))
            get_resource_service(ARCHIVE).system_update(
                original[config.ID_FIELD], updates, original)
            for item_ref in items_refs:
                self.package_service.update_link(updates, item_ref)

            items_published = [
                new_item[ITEM_STATE] in PUBLISH_STATES
                for new_item in items.values()
            ]
            if any(items_published):
                get_resource_service('archive_correct').patch(
                    id=doc['package_id'], updates=updates)

            ids.append(original[config.ID_FIELD])
        return ids

    def _set_item_assoc(self, package, new_item, item_doc):
        group = self._get_group(package, new_item['group'])
        for assoc in group[REFS]:
            if assoc.get(RESIDREF) == new_item['item_id']:
                return assoc
        item_ref = get_item_ref(item_doc)
        group[REFS].append(item_ref)
        return item_ref

    def _get_group(self, package, group):
        for package_group in package[GROUPS]:
            if group == package_group[GROUP_ID]:
                return package_group
        self._add_group_in_root(group, package[GROUPS])
        package[GROUPS].append({GROUP_ID: group, REFS: []})
        return package[GROUPS][-1]

    def _add_group_in_root(self, group, groups):
        root_refs = []
        for group_meta in groups:
            if group_meta.get(GROUP_ID) == ROOT_GROUP:
                root_refs = [ref[ID_REF] for ref in group_meta[REFS]]
                if group not in root_refs:
                    group_meta[REFS].append({ID_REF: group})
Esempio n. 20
0
class BasePublishService(BaseService):
    """Base service for different "publish" services."""

    publish_type = "publish"
    published_state = "published"
    item_operation = ITEM_PUBLISH

    non_digital = partial(filter, lambda s: s.get("subscriber_type", "") == SUBSCRIBER_TYPES.WIRE)
    digital = partial(
        filter, lambda s: (s.get("subscriber_type", "") in {SUBSCRIBER_TYPES.DIGITAL, SUBSCRIBER_TYPES.ALL})
    )
    package_service = PackageService()

    def on_update(self, updates, original):
        self._refresh_associated_items(original)
        self._validate(original, updates)
        self._set_updates(
            original,
            updates,
            updates.get(config.LAST_UPDATED, utcnow()),
            preserve_state=original.get("state") in (CONTENT_STATE.SCHEDULED,) and "pubstatus" not in updates,
        )
        convert_task_attributes_to_objectId(updates)  # ???
        transtype_metadata(updates, original)
        self._process_publish_updates(original, updates)
        self._mark_media_item_as_used(updates, original)
        update_refs(updates, original)

    def on_updated(self, updates, original):
        original = super().find_one(req=None, _id=original[config.ID_FIELD])
        updates.update(original)

        if updates[ITEM_OPERATION] not in {ITEM_KILL, ITEM_TAKEDOWN} and original.get(ITEM_TYPE) in [
            CONTENT_TYPE.TEXT,
            CONTENT_TYPE.PREFORMATTED,
        ]:
            get_resource_service("archive_broadcast").on_broadcast_master_updated(updates[ITEM_OPERATION], original)

        get_resource_service("archive_broadcast").reset_broadcast_status(updates, original)
        push_content_notification([updates])
        self._import_into_legal_archive(updates)
        CropService().update_media_references(updates, original, True)
        signals.item_published.send(self, item=original)
        packages = self.package_service.get_packages(original[config.ID_FIELD])
        if packages and packages.count() > 0:
            archive_correct = get_resource_service("archive_correct")
            processed_packages = []
            for package in packages:
                original_updates = {"operation": updates["operation"], ITEM_STATE: updates[ITEM_STATE]}
                if (
                    package[ITEM_STATE] in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]
                    and package.get(PACKAGE_TYPE, "") == ""
                    and str(package[config.ID_FIELD]) not in processed_packages
                ):
                    original_updates["groups"] = package["groups"]

                    if updates.get("headline"):
                        self.package_service.update_field_in_package(
                            original_updates, original[config.ID_FIELD], "headline", updates.get("headline")
                        )

                    if updates.get("slugline"):
                        self.package_service.update_field_in_package(
                            original_updates, original[config.ID_FIELD], "slugline", updates.get("slugline")
                        )

                    archive_correct.patch(id=package[config.ID_FIELD], updates=original_updates)
                    insert_into_versions(id_=package[config.ID_FIELD])
                    processed_packages.append(package[config.ID_FIELD])

    def update(self, id, updates, original):
        """
        Handles workflow of each Publish, Corrected, Killed and TakeDown.
        """
        try:
            user = get_user()
            auto_publish = updates.get("auto_publish", False)

            # unlock the item
            set_unlock_updates(updates)

            if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                self._publish_package_items(original, updates)
                self._update_archive(original, updates, should_insert_into_versions=auto_publish)
            else:
                self._publish_associated_items(original, updates)
                updated = deepcopy(original)
                updated.update(deepcopy(updates))

                if updates.get(ASSOCIATIONS):
                    self._refresh_associated_items(updated, skip_related=True)  # updates got lost with update

                if updated.get(ASSOCIATIONS):
                    self._fix_related_references(updated, updates)

                signals.item_publish.send(self, item=updated)
                self._update_archive(original, updates, should_insert_into_versions=auto_publish)
                self.update_published_collection(published_item_id=original[config.ID_FIELD], updated=updated)

            from apps.publish.enqueue import enqueue_published

            enqueue_published.apply_async()

            push_notification(
                "item:publish",
                item=str(id),
                unique_name=original["unique_name"],
                desk=str(original.get("task", {}).get("desk", "")),
                user=str(user.get(config.ID_FIELD, "")),
            )

            if updates.get("previous_marked_user") and not updates.get("marked_for_user"):
                # send notification so that marked for me list can be updated
                get_resource_service("archive").handle_mark_user_notifications(updates, original, False)

        except SuperdeskApiError:
            raise
        except KeyError as e:
            logger.exception(e)
            raise SuperdeskApiError.badRequestError(
                message=_("Key is missing on article to be published: {exception}").format(exception=str(e))
            )
        except Exception as e:
            logger.exception(e)
            raise SuperdeskApiError.internalError(
                message=_("Failed to publish the item: {id}").format(id=str(id)), exception=e
            )

    def is_targeted(self, article, target=None):
        """Checks if article is targeted.

        Returns True if the given article has been targeted by region or
        subscriber type or specific subscribers.

        :param article: Article to check
        :param target: Optional specific target to check if exists
        :return:
        """
        if target:
            return len(article.get(target, [])) > 0
        else:
            return (
                len(
                    article.get("target_regions", [])
                    + article.get("target_types", [])
                    + article.get("target_subscribers", [])
                )
                > 0
            )

    def _validate(self, original, updates):
        self.raise_if_invalid_state_transition(original)
        self._raise_if_unpublished_related_items(original)

        updated = original.copy()
        updated.update(updates)

        self.raise_if_not_marked_for_publication(updated)

        if self.publish_type == "publish":
            # The publish schedule has not been cleared
            if (
                updates.get(PUBLISH_SCHEDULE)
                or updated.get(SCHEDULE_SETTINGS, {}).get("utc_{}".format(PUBLISH_SCHEDULE))
                or not original.get(PUBLISH_SCHEDULE)
            ):
                update_schedule_settings(updated, PUBLISH_SCHEDULE, updated.get(PUBLISH_SCHEDULE))
                validate_schedule(updated.get(SCHEDULE_SETTINGS, {}).get("utc_{}".format(PUBLISH_SCHEDULE)))

        if original[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE and updates.get(EMBARGO):
            update_schedule_settings(updated, EMBARGO, updated.get(EMBARGO))
            get_resource_service(ARCHIVE).validate_embargo(updated)

        if self.publish_type in [ITEM_CORRECT, ITEM_KILL]:
            if updates.get(EMBARGO) and not original.get(EMBARGO):
                raise SuperdeskApiError.badRequestError(_("Embargo can't be set after publishing"))

        if self.publish_type == ITEM_KILL:
            if updates.get("dateline"):
                raise SuperdeskApiError.badRequestError(_("Dateline can't be modified on kill or take down"))

        if self.publish_type == ITEM_PUBLISH and updated.get("rewritten_by"):
            rewritten_by = get_resource_service(ARCHIVE).find_one(req=None, _id=updated.get("rewritten_by"))
            if rewritten_by and rewritten_by.get(ITEM_STATE) in PUBLISH_STATES:
                raise SuperdeskApiError.badRequestError(_("Cannot publish the story after Update is published."))

        if self.publish_type == ITEM_PUBLISH and updated.get("rewrite_of"):
            rewrite_of = get_resource_service(ARCHIVE).find_one(req=None, _id=updated.get("rewrite_of"))
            if rewrite_of and rewrite_of.get(ITEM_STATE) not in PUBLISH_STATES:
                raise SuperdeskApiError.badRequestError(_("Can't publish update until original story is published."))

        publish_type = "auto_publish" if updates.get("auto_publish") else self.publish_type
        validate_item = {"act": publish_type, "type": original["type"], "validate": updated}
        validation_errors = get_resource_service("validate").post([validate_item], fields=True)
        for errors, fields in validation_errors:
            if errors:
                raise SuperdeskValidationError(errors, fields)

        validation_errors = []
        self._validate_associated_items(original, updates, validation_errors)

        if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            self._validate_package(original, updates, validation_errors)

        if len(validation_errors) > 0:
            raise ValidationError(validation_errors)

    def _raise_if_unpublished_related_items(self, original):
        if not request:
            return

        if (
            config.PUBLISH_ASSOCIATED_ITEMS
            or not original.get(ASSOCIATIONS)
            or self.publish_type not in [ITEM_PUBLISH, ITEM_CORRECT]
        ):
            return

        archive_service = get_resource_service("archive")
        publishing_warnings_confirmed = strtobool(request.args.get("publishing_warnings_confirmed") or "False")

        if not publishing_warnings_confirmed:
            for key, associated_item in original.get(ASSOCIATIONS).items():
                if associated_item and is_related_content(key):
                    item = archive_service.find_one(req=None, _id=associated_item.get("_id"))
                    item = item if item else associated_item

                    if item.get("state") not in PUBLISH_STATES:
                        error_msg = json.dumps(
                            {
                                "warnings": [
                                    _(
                                        "There are unpublished related "
                                        + "items that won't be sent out as "
                                        + "related items. Do you want to publish the article anyway?"
                                    )
                                ]
                            }
                        )
                        raise ValidationError(error_msg)

    def _validate_package(self, package, updates, validation_errors):
        # make sure package is not scheduled or spiked
        if package[ITEM_STATE] in (CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED):
            validation_errors.append(_("Package cannot be {state}").format(state=package[ITEM_STATE]))

        if package.get(EMBARGO):
            validation_errors.append(_("Package cannot have Embargo"))

        items = self.package_service.get_residrefs(package)
        if self.publish_type in [ITEM_CORRECT, ITEM_KILL]:
            removed_items, added_items = self._get_changed_items(items, updates)
            # we raise error if correction is done on a empty package. Kill is fine.
            if len(removed_items) == len(items) and len(added_items) == 0 and self.publish_type == ITEM_CORRECT:
                validation_errors.append(_("Corrected package cannot be empty!"))

    def raise_if_not_marked_for_publication(self, original):
        if original.get("flags", {}).get("marked_for_not_publication", False):
            raise SuperdeskApiError.badRequestError(_("Cannot publish an item which is marked as Not for Publication"))

    def raise_if_invalid_state_transition(self, original):
        if not is_workflow_state_transition_valid(self.publish_type, original[ITEM_STATE]):
            error_message = (
                _("Can't {operation} as item state is {state}")
                if original[ITEM_TYPE] == CONTENT_TYPE.TEXT
                else _("Can't {operation} as either package state or one of the items state is {state}")
            )
            raise InvalidStateTransitionError(
                error_message.format(operation=self.publish_type, state=original[ITEM_STATE])
            )

    def _process_publish_updates(self, original, updates):
        """Common updates for published items."""
        desk = None
        if original.get("task", {}).get("desk"):
            desk = get_resource_service("desks").find_one(req=None, _id=original["task"]["desk"])
        if not original.get("ingest_provider"):
            updates["source"] = (
                desk["source"]
                if desk and desk.get("source", "")
                else app.settings["DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES"]
            )
        updates["pubstatus"] = PUB_STATUS.CANCELED if self.publish_type == ITEM_KILL else PUB_STATUS.USABLE
        self._set_item_expiry(updates, original)

    def _set_item_expiry(self, updates, original):
        """Set the expiry for the item.

        :param dict updates: doc on which publishing action is performed
        """
        desk_id = original.get("task", {}).get("desk")
        stage_id = original.get("task", {}).get("stage")

        if EMBARGO in updates or PUBLISH_SCHEDULE in updates:
            offset = get_utc_schedule(updates, PUBLISH_SCHEDULE) or get_utc_schedule(updates, EMBARGO)
        elif EMBARGO in original or PUBLISH_SCHEDULE in original:
            offset = get_utc_schedule(original, PUBLISH_SCHEDULE) or get_utc_schedule(original, EMBARGO)

        if app.settings.get("PUBLISHED_CONTENT_EXPIRY_MINUTES"):
            updates["expiry"] = get_expiry_date(app.settings["PUBLISHED_CONTENT_EXPIRY_MINUTES"], offset=offset)
        else:
            updates["expiry"] = get_expiry(desk_id, stage_id, offset=offset)

    def _publish_package_items(self, package, updates):
        """Publishes all items of a package recursively then publishes the package itself.

        :param package: package to publish
        :param updates: payload
        """
        items = self.package_service.get_residrefs(package)

        if len(items) == 0 and self.publish_type == ITEM_PUBLISH:
            raise SuperdeskApiError.badRequestError(_("Empty package cannot be published!"))

        added_items = []
        removed_items = []
        if self.publish_type in [ITEM_CORRECT, ITEM_KILL]:
            removed_items, added_items = self._get_changed_items(items, updates)
            # we raise error if correction is done on a empty package. Kill is fine.
            if len(removed_items) == len(items) and len(added_items) == 0 and self.publish_type == ITEM_CORRECT:
                raise SuperdeskApiError.badRequestError(_("Corrected package cannot be empty!"))
            items.extend(added_items)

        if not updates.get("groups") and package.get("groups"):  # this saves some typing in tests
            updates["groups"] = package.get("groups")

        if items:
            archive_publish = get_resource_service("archive_publish")
            for guid in items:
                package_item = super().find_one(req=None, _id=guid)

                if not package_item:
                    raise SuperdeskApiError.badRequestError(
                        _("Package item with id: {guid} does not exist.").format(guid=guid)
                    )

                if package_item[ITEM_STATE] not in PUBLISH_STATES:  # if the item is not published then publish it
                    if package_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                        # if the item is a package do recursion to publish
                        sub_updates = {i: updates[i] for i in ["state", "operation"] if i in updates}
                        sub_updates["groups"] = list(package_item["groups"])
                        self._publish_package_items(package_item, sub_updates)
                        self._update_archive(
                            original=package_item, updates=sub_updates, should_insert_into_versions=False
                        )
                    else:
                        # publish the item
                        package_item[PUBLISHED_IN_PACKAGE] = package[config.ID_FIELD]
                        archive_publish.patch(id=package_item.pop(config.ID_FIELD), updates=package_item)

                    insert_into_versions(id_=guid)

                elif guid in added_items:
                    linked_in_packages = package_item.get(LINKED_IN_PACKAGES, [])
                    if package[config.ID_FIELD] not in (lp.get(PACKAGE) for lp in linked_in_packages):
                        linked_in_packages.append({PACKAGE: package[config.ID_FIELD]})
                        super().system_update(
                            guid,
                            {LINKED_IN_PACKAGES: linked_in_packages, PUBLISHED_IN_PACKAGE: package[config.ID_FIELD]},
                            package_item,
                        )

                elif guid in removed_items:
                    # remove the package information from the package item.
                    linked_in_packages = [
                        linked
                        for linked in package_item.get(LINKED_IN_PACKAGES, [])
                        if linked.get(PACKAGE) != package.get(config.ID_FIELD)
                    ]
                    super().system_update(guid, {LINKED_IN_PACKAGES: linked_in_packages}, package_item)

                package_item = super().find_one(req=None, _id=guid)

                self.package_service.update_field_in_package(
                    updates, package_item[config.ID_FIELD], config.VERSION, package_item[config.VERSION]
                )

                if package_item.get(ASSOCIATIONS):
                    self.package_service.update_field_in_package(
                        updates, package_item[config.ID_FIELD], ASSOCIATIONS, package_item[ASSOCIATIONS]
                    )

        updated = deepcopy(package)
        updated.update(updates)
        self.update_published_collection(published_item_id=package[config.ID_FIELD], updated=updated)

    def update_published_collection(self, published_item_id, updated=None):
        """Updates the published collection with the published item.

        Set the last_published_version to false for previous versions of the published items.

        :param: str published_item_id: _id of the document.
        """
        published_item = super().find_one(req=None, _id=published_item_id)
        published_item = copy(published_item)
        if updated:
            published_item.update(updated)
        get_resource_service(PUBLISHED).update_published_items(published_item_id, LAST_PUBLISHED_VERSION, False)
        return get_resource_service(PUBLISHED).post([published_item])

    def set_state(self, original, updates):
        """Set the state of the document based on the action (publish, correction, kill, recalled)

        :param dict original: original document
        :param dict updates: updates related to document
        """
        updates[PUBLISH_SCHEDULE] = None
        updates[SCHEDULE_SETTINGS] = {}
        updates[ITEM_STATE] = self.published_state

    def _set_updates(self, original, updates, last_updated, preserve_state=False):
        """Sets config.VERSION, config.LAST_UPDATED, ITEM_STATE in updates document.

        If item is being published and embargo is available then append Editorial Note with 'Embargoed'.

        :param dict original: original document
        :param dict updates: updates related to the original document
        :param datetime last_updated: datetime of the updates.
        """
        if not preserve_state:
            self.set_state(original, updates)
        updates.setdefault(config.LAST_UPDATED, last_updated)

        if original[config.VERSION] == updates.get(config.VERSION, original[config.VERSION]):
            resolve_document_version(document=updates, resource=ARCHIVE, method="PATCH", latest_doc=original)

        user = get_user()
        if user and user.get(config.ID_FIELD):
            updates["version_creator"] = user[config.ID_FIELD]

    def _update_archive(self, original, updates, versioned_doc=None, should_insert_into_versions=True):
        """Updates the articles into archive collection and inserts the latest into archive_versions.

        Also clears autosaved versions if any.

        :param: versioned_doc: doc which can be inserted into archive_versions
        :param: should_insert_into_versions if True inserts the latest document into versions collection
        """
        self.backend.update(self.datasource, original[config.ID_FIELD], updates, original)
        app.on_archive_item_updated(updates, original, updates[ITEM_OPERATION])

        if should_insert_into_versions:
            if versioned_doc is None:
                insert_into_versions(id_=original[config.ID_FIELD])
            else:
                insert_into_versions(doc=versioned_doc)

        get_component(ItemAutosave).clear(original[config.ID_FIELD])

    def _get_changed_items(self, existing_items, updates):
        """Returns the added and removed items from existing_items.

        :param existing_items: Existing list
        :param updates: Changes
        :return: list of removed items and list of added items
        """
        if "groups" in updates:
            new_items = self.package_service.get_residrefs(updates)
            removed_items = list(set(existing_items) - set(new_items))
            added_items = list(set(new_items) - set(existing_items))
            return removed_items, added_items
        else:
            return [], []

    def _validate_associated_items(self, original_item, updates=None, validation_errors=None):
        """Validates associated items.

        This function will ensure that the unpublished content validates and none of
        the content is locked, also do not allow any killed or recalled or spiked content.

        :param package:
        :param validation_errors: validation errors are appended if there are any.
        """

        if validation_errors is None:
            validation_errors = []

        if updates is None:
            updates = {}

        # merge associations
        associations = deepcopy(original_item.get(ASSOCIATIONS, {}))
        associations.update(updates.get(ASSOCIATIONS, {}))

        items = [value for value in associations.values()]
        if original_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and self.publish_type == ITEM_PUBLISH:
            items.extend(self.package_service.get_residrefs(original_item))

        main_publish_schedule = get_utc_schedule(updates, PUBLISH_SCHEDULE) or get_utc_schedule(
            original_item, PUBLISH_SCHEDULE
        )

        for item in items:
            orig = None
            if type(item) == dict and item.get(config.ID_FIELD):
                doc = item
                orig = super().find_one(req=None, _id=item[config.ID_FIELD])
                if not app.settings.get("COPY_METADATA_FROM_PARENT") and orig:
                    doc = orig
                try:
                    doc.update({"lock_user": orig["lock_user"]})
                except (TypeError, KeyError):
                    pass
            elif item:
                doc = super().find_one(req=None, _id=item)
            else:
                continue

            if not doc:
                continue

            if not orig:
                orig = doc.copy()

            if original_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                self._validate_associated_items(doc, validation_errors=validation_errors)

            # make sure no items are killed or recalled or spiked
            # using the latest version of the item from archive
            doc_item_state = orig.get(ITEM_STATE, CONTENT_STATE.PUBLISHED)
            if (
                doc_item_state
                in {
                    CONTENT_STATE.KILLED,
                    CONTENT_STATE.RECALLED,
                    CONTENT_STATE.SPIKED,
                }
                or (doc_item_state == CONTENT_STATE.SCHEDULED and main_publish_schedule is None)
            ):
                validation_errors.append(_("Item cannot contain associated {state} item.").format(state=doc_item_state))
            elif doc_item_state == CONTENT_STATE.SCHEDULED:
                item_schedule = get_utc_schedule(orig, PUBLISH_SCHEDULE)
                if main_publish_schedule < item_schedule:
                    validation_errors.append(_("Associated item is scheduled later than current item."))

            if doc.get(EMBARGO):
                validation_errors.append(_("Item cannot have associated items with Embargo"))

            # don't validate items that already have published
            if doc_item_state not in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]:
                validate_item = {"act": self.publish_type, "type": doc[ITEM_TYPE], "validate": doc}
                if type(item) == dict:
                    validate_item["embedded"] = True
                errors = get_resource_service("validate").post([validate_item], headline=True, fields=True)[0]
                if errors[0]:
                    pre_errors = [
                        _("Associated item {name} {error}").format(name=doc.get("slugline", ""), error=error)
                        for error in errors[0]
                    ]
                    validation_errors.extend(pre_errors)

            if config.PUBLISH_ASSOCIATED_ITEMS:
                # check the locks on the items
                if doc.get("lock_user"):
                    if original_item["lock_user"] != doc["lock_user"]:
                        validation_errors.extend(
                            [
                                "{}: {}".format(
                                    doc.get("headline", doc["_id"]), _("packaged item is locked by another user")
                                )
                            ]
                        )
                    elif original_item["lock_user"] == doc["lock_user"]:
                        validation_errors.extend(
                            [
                                "{}: {}".format(
                                    doc.get("headline", doc["_id"]),
                                    _("packaged item is locked by you. Unlock it and try again"),
                                )
                            ]
                        )

    def _import_into_legal_archive(self, doc):
        """Import into legal archive async

        :param {dict} doc: document to be imported
        """

        if doc.get(ITEM_STATE) != CONTENT_STATE.SCHEDULED:
            kwargs = {"item_id": doc.get(config.ID_FIELD)}
            # countdown=3 is for elasticsearch to be refreshed with archive and published changes
            import_into_legal_archive.apply_async(countdown=3, kwargs=kwargs)  # @UndefinedVariable

    def _refresh_associated_items(self, original, skip_related=False):
        """Refreshes associated items with the latest version. Any further updates made to basic metadata done after
        item was associated will be carried on and used when validating those items.
        """
        associations = original.get(ASSOCIATIONS) or {}
        for name, item in associations.items():
            if type(item) == dict and item.get(config.ID_FIELD) and (not skip_related or len(item.keys()) > 2):
                keys = [key for key in DEFAULT_SCHEMA.keys() if key not in PRESERVED_FIELDS]

                if app.settings.get("COPY_METADATA_FROM_PARENT") and item.get(ITEM_TYPE) in MEDIA_TYPES:
                    updates = original
                    keys = FIELDS_TO_COPY_FOR_ASSOCIATED_ITEM
                else:
                    updates = super().find_one(req=None, _id=item[config.ID_FIELD]) or {}

                try:
                    is_db_item_bigger_ver = updates["_current_version"] > item["_current_version"]
                except KeyError:
                    update_item_data(item, updates, keys)
                else:
                    # if copying from parent the don't keep the existing
                    # otherwise check the value is_db_item_bigger_ver
                    keep_existing = not app.settings.get("COPY_METADATA_FROM_PARENT") and not is_db_item_bigger_ver
                    update_item_data(item, updates, keys, keep_existing=keep_existing)

    def _fix_related_references(self, updated, updates):
        for key, item in updated[ASSOCIATIONS].items():
            if item and item.get("_fetchable", True) and is_related_content(key):
                updated[ASSOCIATIONS][key] = {
                    "_id": item["_id"],
                    "type": item["type"],
                    "order": item.get("order", 1),
                }
                updates.setdefault("associations", {})[key] = updated[ASSOCIATIONS][key]

    def _publish_associated_items(self, original, updates=None):
        """If there any updates to associated item and if setting:PUBLISH_ASSOCIATED_ITEMS is true
        then publish the associated item
        """

        if updates is None:
            updates = {}

        if not publish_services.get(self.publish_type):
            # publish type not supported
            return

        publish_service = get_resource_service(publish_services.get(self.publish_type))

        if not updates.get(ASSOCIATIONS) and not original.get(ASSOCIATIONS):
            # there's nothing to update
            return

        associations = original.get(ASSOCIATIONS) or {}

        if updates and updates.get(ASSOCIATIONS):
            associations.update(updates[ASSOCIATIONS])

        archive_service = get_resource_service("archive")

        for associations_key, associated_item in associations.items():
            if associated_item is None:
                continue
            if type(associated_item) == dict and associated_item.get(config.ID_FIELD):
                if not config.PUBLISH_ASSOCIATED_ITEMS or not publish_service:
                    if original.get(ASSOCIATIONS, {}).get(associations_key):
                        # Not allowed to publish
                        original[ASSOCIATIONS][associations_key]["state"] = self.published_state
                        original[ASSOCIATIONS][associations_key]["operation"] = self.publish_type
                    continue

                # if item is not fetchable, only mark it as published
                if not associated_item.get("_fetchable", True):
                    associated_item["state"] = self.published_state
                    associated_item["operation"] = self.publish_type
                    updates[ASSOCIATIONS] = updates.get(ASSOCIATIONS, {})
                    updates[ASSOCIATIONS][associations_key] = associated_item
                    continue

                if associated_item.get("state") == CONTENT_STATE.UNPUBLISHED:
                    # get the original associated item from archive
                    orig_associated_item = archive_service.find_one(req=None, _id=associated_item[config.ID_FIELD])

                    orig_associated_item["state"] = updates.get("state", self.published_state)
                    orig_associated_item["operation"] = self.publish_type

                    # if main item is scheduled we must also schedule associations
                    self._inherit_publish_schedule(original, updates, orig_associated_item)

                    get_resource_service("archive_publish").patch(
                        id=orig_associated_item.pop(config.ID_FIELD), updates=orig_associated_item
                    )
                    continue

                if associated_item.get("state") not in PUBLISH_STATES:
                    # This associated item has not been published before
                    remove_unwanted(associated_item)

                    # get the original associated item from archive
                    orig_associated_item = archive_service.find_one(req=None, _id=associated_item[config.ID_FIELD])

                    # check if the original associated item exists in archive
                    if not orig_associated_item:
                        raise SuperdeskApiError.badRequestError(
                            _('Associated item "{}" does not exist in the system'.format(associations_key))
                        )

                    if orig_associated_item.get("state") in PUBLISH_STATES:
                        # item was published already
                        original[ASSOCIATIONS][associations_key].update(
                            {
                                "state": orig_associated_item["state"],
                                "operation": orig_associated_item.get("operation", self.publish_type),
                            }
                        )
                        continue

                    # if the original associated item stage is present, it should be updated in the association item.
                    if orig_associated_item.get("task", {}).get("stage") and associated_item.get("task"):
                        associated_item["task"].update({"stage": orig_associated_item.get("task", {}).get("stage")})

                    # update _updated, otherwise it's stored as string.
                    # fixes SDESK-5043
                    associated_item["_updated"] = utcnow()

                    # if main item is scheduled we must also schedule associations
                    self._inherit_publish_schedule(original, updates, associated_item)

                    get_resource_service("archive_publish").patch(
                        id=associated_item.pop(config.ID_FIELD), updates=associated_item
                    )
                    associated_item["state"] = updates.get("state", self.published_state)
                    associated_item["operation"] = self.publish_type
                    updates[ASSOCIATIONS] = updates.get(ASSOCIATIONS, {})
                    updates[ASSOCIATIONS][associations_key] = associated_item
                elif associated_item.get("state") != self.published_state:
                    # Check if there are updates to associated item
                    association_updates = updates.get(ASSOCIATIONS, {}).get(associations_key)

                    # if main item is scheduled we must also schedule associations
                    self._inherit_publish_schedule(original, updates, associated_item)

                    if not association_updates:
                        # there is no update for this item
                        associated_item.get("task", {}).pop("stage", None)
                        remove_unwanted(associated_item)
                        publish_service.patch(id=associated_item.pop(config.ID_FIELD), updates=associated_item)
                        continue

                    if association_updates.get("state") not in PUBLISH_STATES:
                        # There's an update to the published associated item
                        remove_unwanted(association_updates)
                        publish_service.patch(id=association_updates.pop(config.ID_FIELD), updates=association_updates)

            # When there is an associated item which is published, Inserts the latest version of that associated item into archive_versions.
            insert_into_versions(doc=associated_item)
        self._refresh_associated_items(original)

    def _mark_media_item_as_used(self, updates, original):
        if ASSOCIATIONS not in updates or not updates.get(ASSOCIATIONS):
            return

        for item_name, item_obj in updates.get(ASSOCIATIONS).items():
            if not item_obj or config.ID_FIELD not in item_obj:
                continue
            item_id = item_obj[config.ID_FIELD]
            media_item = self.find_one(req=None, _id=item_id)
            if app.settings.get("COPY_METADATA_FROM_PARENT") and item_obj.get(ITEM_TYPE) in MEDIA_TYPES:
                stored_item = (original.get(ASSOCIATIONS) or {}).get(item_name) or item_obj
            else:
                stored_item = media_item
                if not stored_item:
                    continue
            track_usage(media_item, stored_item, item_obj, item_name, original)

    def _inherit_publish_schedule(self, original, updates, associated_item):
        if self.publish_type == "publish" and (updates.get(PUBLISH_SCHEDULE) or original.get(PUBLISH_SCHEDULE)):
            schedule_settings = updates.get(SCHEDULE_SETTINGS, original.get(SCHEDULE_SETTINGS, {}))
            publish_schedule = updates.get(PUBLISH_SCHEDULE, original.get(PUBLISH_SCHEDULE))
            if publish_schedule and not associated_item.get(PUBLISH_SCHEDULE):
                associated_item[PUBLISH_SCHEDULE] = publish_schedule
                associated_item[SCHEDULE_SETTINGS] = schedule_settings
Esempio n. 21
0
class RemoveExpiredFromPublishedCollection(SuperdeskTestCase):
    def setUp(self):
        super().setUp()
        self._init_data()

        self.app.data.insert('users', self.users)
        self.app.data.insert('desks', self.desks)
        self.app.data.insert('vocabularies', self.vocabularies)
        self.app.data.insert('subscribers', self.subscribers)
        self.app.data.insert(ARCHIVE, self.articles)

        self.filename = os.path.join(os.path.abspath(os.path.dirname(__file__)), "validators.json")
        self.json_data = [
            {"_id": "kill_text", "act": "kill", "type": "text", "schema": {"headline": {"type": "string"}}},
            {"_id": "publish_text", "act": "publish", "type": "text", "schema": {}},
            {"_id": "correct_text", "act": "correct", "type": "text", "schema": {}},
            {"_id": "publish_composite", "act": "publish", "type": "composite", "schema": {}},
        ]

        with open(self.filename, "w+") as file:
            json.dump(self.json_data, file)
        init_app(self.app)
        ValidatorsPopulateCommand().run(self.filename)

        self.package_service = PackageService()

    def tearDown(self):
        super().tearDown()
        if self.filename and os.path.exists(self.filename):
            os.remove(self.filename)

    def test_can_remove_from_production_succeeds_when_published_once(self):
        """
        Tests if can_remove_production() returns true if the item is published only once.
        """

        doc = self.articles[0].copy()

        updates = {'targeted_for': [{'name': 'New South Wales', 'allow': True}]}
        get_resource_service(ARCHIVE).patch(id=doc[config.ID_FIELD], updates=updates)

        published_version_number = doc[config.VERSION] + 1
        get_resource_service(ARCHIVE_PUBLISH).patch(id=doc[config.ID_FIELD],
                                                    updates={ITEM_STATE: CONTENT_STATE.PUBLISHED,
                                                             config.VERSION: published_version_number})

        self._move_to_archived_and_assert_can_remove_from_production(doc[config.ID_FIELD], self.assertTrue)

    def test_can_remove_from_production_fails_when_published_and_then_killed(self):
        """
        Tests if can_remove_production() returns false if the item is published more than once.
        """

        doc = self.articles[0].copy()

        updates = {'targeted_for': [{'name': 'New South Wales', 'allow': True}]}
        get_resource_service(ARCHIVE).patch(id=doc[config.ID_FIELD], updates=updates)

        published_version_number = doc[config.VERSION] + 1
        get_resource_service(ARCHIVE_PUBLISH).patch(id=doc[config.ID_FIELD],
                                                    updates={ITEM_STATE: CONTENT_STATE.PUBLISHED,
                                                             config.VERSION: published_version_number})

        published_item = self._move_to_archived_and_assert_can_remove_from_production(doc[config.ID_FIELD],
                                                                                      self.assertTrue)

        published_version_number += 1
        get_resource_service(ARCHIVE_KILL).patch(id=doc['_id'],
                                                 updates={ITEM_STATE: CONTENT_STATE.KILLED,
                                                          config.VERSION: published_version_number})
        self.assertFalse(get_resource_service(PUBLISHED).can_remove_from_production(published_item))

    def test_can_remove_from_production_second_rule(self):
        """
        Test if can_remove_production() returns false when the expired published item is part of a package.
        """

        doc = self.articles[0].copy()

        get_resource_service(ARCHIVE_PUBLISH).patch(id=doc[config.ID_FIELD],
                                                    updates={ITEM_STATE: CONTENT_STATE.PUBLISHED,
                                                             config.VERSION: doc[config.VERSION] + 1})

        item_in_production = get_resource_service(ARCHIVE).find_one(req=None, _id=doc[config.ID_FIELD])
        self.assertIsNotNone(TakesPackageService().get_take_package_id(item_in_production))

        self._move_to_archived_and_assert_can_remove_from_production(doc[config.ID_FIELD], self.assertFalse)

    def test_can_remove_from_production_third_rule(self):
        """
        Test if can_remove_production() returns false when the expired published item is a package.
        """

        published_articles = [self.articles[1].copy(), self.articles[2].copy(), self.articles[3].copy(),
                              self.articles[4].copy()]

        for published_article in published_articles:
            published_article[ITEM_STATE] = CONTENT_STATE.PUBLISHED

        published_service = get_resource_service(PUBLISHED)
        published_service.post(published_articles)

        published_package = self._move_to_archived_and_assert_can_remove_from_production(
            self.articles[4][config.ID_FIELD], self.assertFalse)

        self._move_to_archived_and_assert_can_remove_from_production(self.articles[3][config.ID_FIELD],
                                                                     self.assertFalse, published_package)

        self._move_to_archived_and_assert_can_remove_from_production(self.articles[2][config.ID_FIELD],
                                                                     self.assertFalse, published_package)
        self._move_to_archived_and_assert_can_remove_from_production(self.articles[1][config.ID_FIELD],
                                                                     self.assertTrue, published_package)

    def test_cannot_remove_scheduled_content(self):
        published_service = get_resource_service(PUBLISHED)
        original = self.articles[0].copy()

        original[ITEM_STATE] = CONTENT_STATE.SCHEDULED
        original['publish_schedule'] = utcnow() + timedelta(days=2)

        published_service.post([original])
        published_items = published_service.get_other_published_items(original['item_id'])
        self.assertEqual(1, published_items.count())

        RemoveExpiredPublishContent().run()
        published_items = published_service.get_other_published_items(original['item_id'])
        self.assertEqual(1, published_items.count())

    def test_remove_published_expired_content(self):
        original = self.articles[0].copy()
        original[ITEM_STATE] = CONTENT_STATE.PUBLISHED
        self._create_and_insert_into_versions(original, True)

        published_service = get_resource_service(PUBLISHED)
        archive_publish = get_resource_service(ARCHIVE_PUBLISH)

        subscribers, subscribers_yet_to_receive = archive_publish.get_subscribers(original, SUBSCRIBER_TYPES.WIRE)
        archive_publish.queue_transmission(original, subscribers)
        published_service.post([original])

        published_items = published_service.get(req=None, lookup=None)
        self.assertEqual(1, published_items.count())

        published_service.update_published_items(original['item_id'], 'expiry', utcnow() + timedelta(minutes=-60))
        RemoveExpiredPublishContent().run()
        published_items = published_service.get_other_published_items(str(original['item_id']))
        self.assertEqual(0, published_items.count())

        archived_item = get_resource_service('archived').find_one(req=None, _id=str(original[config.ID_FIELD]))
        self.assertEqual(archived_item['item_id'], self.articles[0][config.ID_FIELD])
        self.assertFalse(archived_item['allow_post_publish_actions'])
        self.assertFalse(archived_item['can_be_removed'])

    def test_remove_published_and_killed_content_separately(self):
        doc = self.articles[0]
        original = doc.copy()

        updates = {'targeted_for': [{'name': 'New South Wales', 'allow': True}]}
        get_resource_service(ARCHIVE).patch(id=original[config.ID_FIELD], updates=updates)

        original.update(updates)
        self._create_and_insert_into_versions(original, False)

        published_version_number = original[config.VERSION] + 1
        get_resource_service(ARCHIVE_PUBLISH).patch(id=doc[config.ID_FIELD],
                                                    updates={ITEM_STATE: CONTENT_STATE.PUBLISHED,
                                                             config.VERSION: published_version_number})

        published_service = get_resource_service(PUBLISHED)
        published_items = published_service.get(req=None, lookup=None)
        self.assertEqual(1, published_items.count())

        article_in_production = get_resource_service(ARCHIVE).find_one(req=None, _id=original[config.ID_FIELD])
        self.assertIsNotNone(article_in_production)
        self.assertEqual(article_in_production[ITEM_STATE], CONTENT_STATE.PUBLISHED)
        self.assertEqual(article_in_production[config.VERSION], published_version_number)
        insert_into_versions(doc=article_in_production)

        # Setting the expiry date of the published article to 1 hr back from now
        published_service.update_published_items(
            original[config.ID_FIELD], 'expiry', utcnow() + timedelta(minutes=-60))

        # Killing the published article and inserting into archive_versions as unittests use service directly
        published_version_number += 1
        get_resource_service(ARCHIVE_KILL).patch(id=doc[config.ID_FIELD],
                                                 updates={ITEM_STATE: CONTENT_STATE.KILLED,
                                                          config.VERSION: published_version_number})

        # Executing the Expiry Job for the Published Article and asserting the collections
        RemoveExpiredPublishContent().run()

        published_items = published_service.get(req=None, lookup=None)
        self.assertEqual(1, published_items.count())

        article_in_production = get_resource_service(ARCHIVE).find_one(req=None, _id=original[config.ID_FIELD])
        self.assertIsNotNone(article_in_production)
        self.assertEqual(article_in_production[ITEM_STATE], CONTENT_STATE.KILLED)
        self.assertEqual(article_in_production[config.VERSION], published_version_number)
        insert_into_versions(doc=article_in_production)

        # Setting the expiry date of the killed article to 1 hr back from now and running the job again
        published_service.update_published_items(
            original[config.ID_FIELD], 'expiry', utcnow() + timedelta(minutes=-60))
        RemoveExpiredPublishContent().run()

        published_items = published_service.get_other_published_items(str(original[config.ID_FIELD]))
        self.assertEqual(0, published_items.count())

        article_in_production = get_resource_service(ARCHIVE).find_one(req=None, _id=original[config.ID_FIELD])
        self.assertIsNone(article_in_production)

    def test_remove_takes_package(self):
        """
        Tests the behavior of remove_expired() when just takes package expires
        """

        def expire(published_takes_pkg):
            published_service.update(published_takes_pkg[config.ID_FIELD],
                                     {'expiry': utcnow() + timedelta(minutes=-60)}, published_takes_pkg)

            RemoveExpiredPublishContent().run()

            if published_takes_pkg[ITEM_STATE] == CONTENT_STATE.PUBLISHED:
                self.assertEqual(published_takes_pkg[ITEM_OPERATION], 'publish')
            elif published_takes_pkg[ITEM_STATE] == CONTENT_STATE.KILLED:
                self.assertEqual(published_takes_pkg[ITEM_OPERATION], 'kill')

        doc = self.articles[0].copy()
        self._create_and_insert_into_versions(doc, False)

        published_version_number = doc[config.VERSION] + 1
        get_resource_service(ARCHIVE_PUBLISH).patch(id=doc[config.ID_FIELD],
                                                    updates={ITEM_STATE: CONTENT_STATE.PUBLISHED,
                                                             config.VERSION: published_version_number})
        insert_into_versions(id_=doc[config.ID_FIELD])

        published_version_number += 1
        get_resource_service(ARCHIVE_KILL).patch(id=doc[config.ID_FIELD],
                                                 updates={ITEM_STATE: CONTENT_STATE.KILLED,
                                                          config.VERSION: published_version_number})
        insert_into_versions(id_=doc[config.ID_FIELD])

        published_service = get_resource_service(PUBLISHED)
        items_in_published_repo = list(published_service.get_from_mongo(req=None, lookup=None))
        self.assertEqual(len(items_in_published_repo), 4)

        # Expiring the Takes Package whose state is Published
        published_takes_pkg = [g for g in items_in_published_repo if is_takes_package(g) and
                               g[ITEM_STATE] == CONTENT_STATE.PUBLISHED]
        expire(published_takes_pkg[0])

        # Expiring the Takes Package whose state is Killed
        published_takes_pkg = [g for g in items_in_published_repo if is_takes_package(g) and
                               g[ITEM_STATE] == CONTENT_STATE.KILLED]
        expire(published_takes_pkg[0])

    def test_remove_when_package_and_items_in_package_expire(self):
        """
        Tests if items in the package are copied to legal archive when the package in published collection expires.
        In this test both the items in the package and package expire. Since the job sorts the expired items in the
        order they are created, the creation order of items in this test is: items in the package and then the package.
        """

        package = self.articles[3].copy()

        items_in_published_repo = self._publish_package_and_assert_published_collection(package)
        published_service = get_resource_service(PUBLISHED)

        # Validate if version is available for each item in the package after publishing
        for item in items_in_published_repo:
            if item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                items_in_package = self.package_service.get_item_refs(package)
                for item_in_pkg in items_in_package:
                    if config.VERSION not in item_in_pkg:
                        self.fail('version is not found for item in the package. Item Id: %s' % item_in_pkg['guid'])

            # Expiring the published items
            published_service.update_published_items(item['item_id'], 'expiry', utcnow() + timedelta(minutes=-60))

        RemoveExpiredPublishContent().run()
        self.assertEqual(published_service.get(req=None, lookup=None).count(), 0)

    def test_remove_when_only_package_expires(self):
        """
        Tests if items in the package are copied to legal archive when only the package in published collection expires.
        In this test only the package expires. Since the job sorts the expired items in the order they are created,
        the creation order of items in this test is: items in the package and then the package.
        """

        package = self.articles[3].copy()

        self._publish_package_and_assert_published_collection(package)
        published_service = get_resource_service(PUBLISHED)

        # Expiring the package
        published_service.update_published_items(package[config.ID_FIELD], 'expiry',
                                                 utcnow() + timedelta(minutes=-60))

        RemoveExpiredPublishContent().run()
        items_in_published_repo = published_service.get(req=None, lookup=None)
        self.assertEqual(items_in_published_repo.count(), 2)

        for item in items_in_published_repo:
            self.assertTrue(item['allow_post_publish_actions'])

    def _publish_package_and_assert_published_collection(self, package):
        # Please make sure that groups has only text items
        item_refs = self.package_service.get_residrefs(package)

        # Inserting docs into archive_versions for all items in the package and for the package
        for item_ref in item_refs:
            item = None
            for article in self.articles:
                if article[config.ID_FIELD] == item_ref:
                    item = article
                    break

            item = item.copy()
            updates = {'targeted_for': [{'name': 'Wire', 'allow': True}]}
            get_resource_service(ARCHIVE).patch(id=item[config.ID_FIELD], updates=updates)

            self._create_and_insert_into_versions(item, False)

        self._create_and_insert_into_versions(package, False)

        updates = {ITEM_STATE: CONTENT_STATE.PUBLISHED, config.VERSION: package[config.VERSION] + 1,
                   GROUPS: package.get(GROUPS)}
        get_resource_service(ARCHIVE_PUBLISH).patch(id=package[config.ID_FIELD], updates=updates)

        items_in_published_repo = get_resource_service(PUBLISHED).get_from_mongo(req=None, lookup=None)
        self.assertEqual(items_in_published_repo.count(), 3)

        return items_in_published_repo

    def _init_data(self):
        self.users = [{'_id': '1', 'username': '******'}]
        self.desks = [{'_id': ObjectId('123456789ABCDEF123456789'), 'name': 'desk1'}]
        self.vocabularies = [
            {"_id": "rightsinfo", "items": [
                {"is_active": True, "name": "AAP", "copyrightHolder": "Australian Associated Press",
                 "copyrightNotice": "AAP content is owned by or licensed to AAP",
                 "usageTerms": "The direct recipient must comply with the limitations specified in the AAP Information"
                 },
                {"is_active": True, "name": "default", "copyrightHolder": "Australian Associated Press",
                 "copyrightNotice": "AAP content is owned by or licensed to AAP.",
                 "usageTerms": "The direct recipient must comply with the limitations specified in the AAP Information."
                 }]
             }
        ]

        self.subscribers = [{"_id": "1", "name": "sub1", "is_active": True, "subscriber_type": SUBSCRIBER_TYPES.WIRE,
                             "media_type": "media", "sequence_num_settings": {"max": 10, "min": 1},
                             "email": "*****@*****.**",
                             "destinations": [{"name": "dest1", "format": "nitf", "delivery_type": "ftp",
                                               "config": {"address": "127.0.0.1", "username": "******"}}]
                             },
                            {"_id": "2", "name": "sub2", "is_active": True, "subscriber_type": SUBSCRIBER_TYPES.DIGITAL,
                             "media_type": "media", "sequence_num_settings": {"max": 10, "min": 1},
                             "email": "*****@*****.**",
                             "destinations": [{"name": "dest1", "format": "newsmlg2", "delivery_type": "ftp",
                                               "config": {"address": "127.0.0.1", "username": "******"}}]
                             }]

        self.articles = [{'guid': '1',
                          '_id': '1',
                          'last_version': 3,
                          config.VERSION: 4,
                          'body_html': "shouldn't be referenced by any package",
                          'urgency': 4,
                          'anpa_category': [{'qcode': 'A', 'name': 'Sport'}],
                          'headline': 'no package reference',
                          'pubstatus': 'usable',
                          'firstcreated': utcnow(),
                          'byline': 'By Alan Karben',
                          'dateline': {'located': {'city': 'Sydney'}},
                          'keywords': ['Student', 'Crime', 'Police', 'Missing'],
                          'subject': [{'qcode': '17004000', 'name': 'Statistics'},
                                      {'qcode': '04001002', 'name': 'Weather'}],
                          'expiry': utcnow() + timedelta(minutes=20),
                          'task': {'user': '******', 'desk': '123456789ABCDEF123456789'},
                          ITEM_STATE: CONTENT_STATE.PROGRESS,
                          ITEM_TYPE: CONTENT_TYPE.TEXT,
                          'unique_name': '#1'},
                         {'guid': '2',
                          '_id': '2',
                          'last_version': 3,
                          config.VERSION: 4,
                          'body_html': 'some body',
                          'urgency': 4,
                          'headline': 'some headline',
                          'abstract': 'Abstract Sample',
                          'anpa_category': [{'qcode': 'A', 'name': 'Sport'}],
                          'pubstatus': 'done',
                          'firstcreated': utcnow(),
                          'byline': 'By Alan Karben',
                          'dateline': {'located': {'city': 'Sydney'}},
                          'slugline': 'taking takes',
                          'keywords': ['Student', 'Crime', 'Police', 'Missing'],
                          'subject': [{'qcode': '17004000', 'name': 'Statistics'},
                                      {'qcode': '04001002', 'name': 'Weather'}],
                          'task': {'user': '******', 'desk': '123456789ABCDEF123456789'},
                          ITEM_STATE: CONTENT_STATE.PROGRESS,
                          'expiry': utcnow() + timedelta(minutes=20),
                          ITEM_TYPE: CONTENT_TYPE.TEXT,
                          'unique_name': '#2'},
                         {'guid': '3',
                          '_id': '3',
                          'last_version': 3,
                          config.VERSION: 4,
                          'body_html': 'some body',
                          'urgency': 4,
                          'headline': 'some headline',
                          'abstract': 'Abstract Sample',
                          'anpa_category': [{'qcode': 'A', 'name': 'Sport'}],
                          'pubstatus': 'done',
                          'firstcreated': utcnow(),
                          'byline': 'By Alan Karben',
                          'dateline': {'located': {'city': 'Sydney'}},
                          'slugline': 'taking takes',
                          'keywords': ['Student', 'Crime', 'Police', 'Missing'],
                          'subject': [{'qcode': '17004000', 'name': 'Statistics'},
                                      {'qcode': '04001002', 'name': 'Weather'}],
                          'task': {'user': '******', 'desk': '123456789ABCDEF123456789'},
                          ITEM_STATE: CONTENT_STATE.PROGRESS,
                          'expiry': utcnow() + timedelta(minutes=20),
                          ITEM_TYPE: CONTENT_TYPE.TEXT,
                          'unique_name': '#3'},
                         {'guid': '4',
                          '_id': '4',
                          'headline': 'simple package with 2 items',
                          'last_version': 2,
                          config.VERSION: 3,
                          ITEM_TYPE: CONTENT_TYPE.COMPOSITE,
                          'groups': [{'id': 'root', 'refs': [{'idRef': 'main'}], 'role': 'grpRole:NEP'},
                                     {'id': 'main', 'role': 'grpRole:main',
                                      'refs': [
                                          {'location': ARCHIVE, 'guid': '2', ITEM_TYPE: CONTENT_TYPE.TEXT, RESIDREF: '2'
                                           },
                                          {'location': ARCHIVE, 'guid': '3', ITEM_TYPE: CONTENT_TYPE.TEXT, RESIDREF: '3'
                                           }]
                                      }],
                          'firstcreated': utcnow(),
                          'expiry': utcnow() + timedelta(minutes=20),
                          'unique_name': '#4',
                          ITEM_STATE: CONTENT_STATE.PROGRESS},
                         {'guid': '5',
                          '_id': '5',
                          'headline': 'package and item is also a package',
                          config.VERSION: 3,
                          ITEM_TYPE: CONTENT_TYPE.COMPOSITE,
                          'groups': [{'id': 'root', 'refs': [{'idRef': 'main'}], 'role': 'grpRole:NEP'},
                                     {'id': 'main', 'role': 'grpRole:main',
                                      'refs': [{'location': ARCHIVE, ITEM_TYPE: CONTENT_TYPE.COMPOSITE, RESIDREF: '4'}]
                                      }],
                          'firstcreated': utcnow(),
                          'expiry': utcnow() + timedelta(minutes=20),
                          'unique_name': '#5',
                          ITEM_STATE: CONTENT_STATE.PROGRESS}
                         ]

    def _create_and_insert_into_versions(self, item, insert_last_version_as_published):
        version = item[config.VERSION]
        archive_versions = []

        while version != 0:
            versioned_item = item.copy()
            versioned_item['_id_document'] = versioned_item['_id']
            versioned_item[config.VERSION] = version
            del versioned_item['_id']

            if insert_last_version_as_published and item[config.VERSION] == version:
                versioned_item[ITEM_STATE] = CONTENT_STATE.PUBLISHED

            archive_versions.append(versioned_item)
            version -= 1

        self.app.data.insert('archive_versions', archive_versions)

    def _move_to_archived_and_assert_can_remove_from_production(self, item_id, assert_function, item_to_assert=None):
        published_service = get_resource_service(PUBLISHED)
        published_item = list(published_service.get_from_mongo(req=None, lookup={'item_id': item_id}))
        self.assertEqual(len(published_item), 1)

        # Moving to archived explicitly
        published_item = published_item[0]
        published_service.patch(id=published_item[config.ID_FIELD], updates={'allow_post_publish_actions': False})

        assert_function(published_service.can_remove_from_production(
            item_to_assert if item_to_assert else published_item))

        return published_item