Beispiel #1
0
    def enhance_with_archive_items(self, items):
        if items:
            ids = list(set([item.get('item_id') for item in items if item.get('item_id')]))
            archive_items = []
            archive_lookup = {}
            if ids:
                query = {'$and': [{config.ID_FIELD: {'$in': ids}}]}
                archive_req = ParsedRequest()
                archive_req.max_results = len(ids)
                # can't access published from elastic due filter on the archive resource hence going to mongo
                archive_items = list(superdesk.get_resource_service(ARCHIVE)
                                     .get_from_mongo(req=archive_req, lookup=query))

                takes_service = TakesPackageService()
                takes_service.enhance_items_with_takes_packages(archive_items)
                for item in archive_items:
                    handle_existing_data(item)
                    archive_lookup[item[config.ID_FIELD]] = item

            for item in items:
                archive_item = archive_lookup.get(item.get('item_id'), {config.VERSION: item.get(config.VERSION, 1)})

                updates = {
                    config.ID_FIELD: item.get('item_id'),
                    'item_id': item.get(config.ID_FIELD),
                    'lock_user': archive_item.get('lock_user', None),
                    'lock_time': archive_item.get('lock_time', None),
                    'lock_action': archive_item.get('lock_action', None),
                    'lock_session': archive_item.get('lock_session', None),
                    'archive_item': archive_item if archive_item else None
                }

                item.update(updates)
                handle_existing_data(item)
Beispiel #2
0
    def on_update(self, updates, original):
        """Runs on archive update.

        Overridden to validate the updates to the article and take necessary actions depending on the updates. In brief,
        it does the following:
            1. Sets state, item operation, version created, version creator, sign off and word count.
            2. Resets Item Expiry
            3. If the request is to de-schedule then checks and de-schedules the associated Takes Package also.
            4. Creates Crops if article is a picture
        """
        user = get_user()
        self._validate_updates(original, updates, user)

        if PUBLISH_SCHEDULE in updates and original[ITEM_STATE] == CONTENT_STATE.SCHEDULED:
            # check if there is a takes package and deschedule the takes package.
            takes_service = TakesPackageService()
            package = takes_service.get_take_package(original)
            if package and package.get(ITEM_STATE) == CONTENT_STATE.SCHEDULED:
                get_resource_service('published').delete_by_article_id(package.get(config.ID_FIELD))
                self.delete_by_article_ids([package.get(config.ID_FIELD)])
                updates[LINKED_IN_PACKAGES] = [package for package in original.get(LINKED_IN_PACKAGES, [])
                                               if package.get(PACKAGE_TYPE) != TAKES_PACKAGE]
            return

        if self.__is_req_for_save(updates):
            update_state(original, updates)

        remove_unwanted(updates)
        self._add_system_updates(original, updates, user)
        self._add_desk_metadata(updates, original)
        self._handle_media_updates(updates, original, user)
Beispiel #3
0
    def validate_embargo(self, item):
        """
        Validates the embargo of the item. Following are checked:
            1. Item can't be a package or a take or a re-write of another story
            2. Publish Schedule and Embargo are mutually exclusive
            3. Always a future date except in case of Corrected and Killed.
        :raises: SuperdeskApiError.badRequestError() if the validation fails
        """

        if item[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE:
            if EMBARGO in item:
                embargo = item.get(SCHEDULE_SETTINGS, {}).get('utc_{}'.format(EMBARGO))
                if embargo:
                    if item.get(PUBLISH_SCHEDULE) or item[ITEM_STATE] == CONTENT_STATE.SCHEDULED:
                        raise SuperdeskApiError.badRequestError("An item can't have both Publish Schedule and Embargo")

                    if (item[ITEM_STATE] not in {CONTENT_STATE.KILLED, CONTENT_STATE.SCHEDULED}) \
                            and embargo <= utcnow():
                        raise SuperdeskApiError.badRequestError("Embargo cannot be earlier than now")

                    package = TakesPackageService().get_take_package(item)
                    if package and package.get(SEQUENCE, 1) > 1:
                        raise SuperdeskApiError.badRequestError("Takes doesn't support Embargo")

                    if item.get('rewrite_of'):
                        raise SuperdeskApiError.badRequestError("Rewrites doesn't support Embargo")

                    if not isinstance(embargo, datetime.date) or not embargo.time():
                        raise SuperdeskApiError.badRequestError("Invalid Embargo")

        elif is_normal_package(item):
            if item.get(EMBARGO):
                raise SuperdeskApiError.badRequestError("A Package doesn't support Embargo")

            self.packageService.check_if_any_item_in_package_has_embargo(item)
Beispiel #4
0
    def on_update(self, updates, original):
        """
        Overridden to validate the updates to the article and take necessary actions depending on the updates. In brief,
        it does the following:
            1. Sets state, item operation, version created, version creator, sign off and word count.
            2. Resets Item Expiry
            3. If the request is to de-schedule then checks and de-schedules the associated Takes Package also.
            4. Creates Crops if article is a picture
        """
        user = get_user()
        self._validate_updates(original, updates, user)

        if 'publish_schedule' in updates and original[ITEM_STATE] == CONTENT_STATE.SCHEDULED:
            self.deschedule_item(updates, original)  # this is an deschedule action

            # check if there is a takes package and deschedule the takes package.
            package = TakesPackageService().get_take_package(original)
            if package and package.get('state') == 'scheduled':
                package_updates = {'publish_schedule': None, 'groups': package.get('groups')}
                self.patch(package.get(config.ID_FIELD), package_updates)

            return

        if self.__is_req_for_save(updates):
            update_state(original, updates)

        remove_unwanted(updates)
        self._add_system_updates(original, updates, user)

        if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE:  # create crops
            CropService().create_multiple_crops(updates, original)
    def enhance_with_archive_items(self, items):
        if items:
            ids = list(set([item.get('item_id') for item in items if item.get('item_id')]))
            archive_items = []
            if ids:
                query = {'$and': [{config.ID_FIELD: {'$in': ids}}]}
                archive_req = ParsedRequest()
                archive_req.max_results = len(ids)
                # can't access published from elastic due filter on the archive resource hence going to mongo
                archive_items = list(superdesk.get_resource_service(ARCHIVE)
                                     .get_from_mongo(req=archive_req, lookup=query))

                takes_service = TakesPackageService()
                for item in archive_items:
                    handle_existing_data(item)
                    takes_service.enhance_with_package_info(item)

            for item in items:
                archive_item = [i for i in archive_items if i.get(config.ID_FIELD) == item.get('item_id')]
                archive_item = archive_item[0] if len(archive_item) > 0 else \
                    {config.VERSION: item.get(config.VERSION, 1)}

                updates = {
                    config.ID_FIELD: item.get('item_id'),
                    'item_id': item.get(config.ID_FIELD),
                    'lock_user': archive_item.get('lock_user', None),
                    'lock_time': archive_item.get('lock_time', None),
                    'lock_session': archive_item.get('lock_session', None),
                    'archive_item': archive_item if archive_item else None
                }

                item.update(updates)
                handle_existing_data(item)
    def enhance_with_archive_items(self, items):
        if items:
            ids = list(set([item.get("item_id") for item in items if item.get("item_id")]))
            archive_items = []
            archive_lookup = {}
            if ids:
                query = {"$and": [{config.ID_FIELD: {"$in": ids}}]}
                archive_req = ParsedRequest()
                archive_req.max_results = len(ids)
                # can't access published from elastic due filter on the archive resource hence going to mongo
                archive_items = list(
                    superdesk.get_resource_service(ARCHIVE).get_from_mongo(req=archive_req, lookup=query)
                )

                takes_service = TakesPackageService()
                takes_service.enhance_items_with_takes_packages(archive_items)
                for item in archive_items:
                    handle_existing_data(item)
                    archive_lookup[item[config.ID_FIELD]] = item

            for item in items:
                archive_item = archive_lookup.get(item.get("item_id"), {config.VERSION: item.get(config.VERSION, 1)})

                updates = {
                    config.ID_FIELD: item.get("item_id"),
                    "item_id": item.get(config.ID_FIELD),
                    "lock_user": archive_item.get("lock_user", None),
                    "lock_time": archive_item.get("lock_time", None),
                    "lock_session": archive_item.get("lock_session", None),
                    "archive_item": archive_item if archive_item else None,
                }

                item.update(updates)
                handle_existing_data(item)
Beispiel #7
0
    def on_update(self, updates, original):
        """
        Overridden to validate the updates to the article and take necessary actions depending on the updates. In brief,
        it does the following:
            1. Sets state, item operation, version created, version creator, sign off and word count.
            2. Resets Item Expiry
            3. If the request is to de-schedule then checks and de-schedules the associated Takes Package also.
            4. Creates Crops if article is a picture
        """
        user = get_user()
        self._validate_updates(original, updates, user)

        if PUBLISH_SCHEDULE in updates and original[
                ITEM_STATE] == CONTENT_STATE.SCHEDULED:
            self.deschedule_item(updates,
                                 original)  # this is an deschedule action

            # check if there is a takes package and deschedule the takes package.
            takes_service = TakesPackageService()
            package = takes_service.get_take_package(original)
            if package and package.get(ITEM_STATE) == CONTENT_STATE.SCHEDULED:
                get_resource_service('published').delete_by_article_id(
                    package.get(config.ID_FIELD))
                self.delete_by_article_ids([package.get(config.ID_FIELD)])
                updates[LINKED_IN_PACKAGES] = [
                    package
                    for package in original.get(LINKED_IN_PACKAGES, [])
                    if package.get(PACKAGE_TYPE) != TAKES_PACKAGE
                ]
            return

        if self.__is_req_for_save(updates):
            update_state(original, updates)

        remove_unwanted(updates)
        self._add_system_updates(original, updates, user)

        self._add_desk_metadata(updates, original)

        if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE:  # create crops
            CropService().create_multiple_crops(updates, original)

        updates_feature_image = updates.get('associations',
                                            {}).get('featureimage')
        if updates_feature_image and 'poi' in updates_feature_image:
            original_feature_image = original.get('associations',
                                                  {}).get('featureimage', {})
            if original_feature_image and original_feature_image.get(
                    'poi', {}) == updates_feature_image['poi']:
                return
            _id = updates_feature_image[config.ID_FIELD] if config.ID_FIELD in updates_feature_image \
                else original_feature_image[config.ID_FIELD]
            image_item = self.find_one(req=None, _id=_id)
            if image_item:
                image_item['poi'] = updates_feature_image['poi']
                image_item = self.patch(_id, image_item)
                updates['associations']['featureimage'][
                    'renditions'] = image_item['renditions']
    def _update_rewrite(self, original):
        """ Removes the reference from the rewritten story in published collection """
        rewrite_service = ArchiveRewriteService()
        if original.get('rewrite_of') and original.get('event_id'):
            rewrite_service._clear_rewritten_flag(original.get('event_id'),
                                                  original[config.ID_FIELD],
                                                  'rewritten_by')

        # write the rewritten_by to the take before spiked
        archive_service = get_resource_service(ARCHIVE)
        published_service = get_resource_service('published')
        takes_service = TakesPackageService()
        takes_package = takes_service.get_take_package(original)
        if takes_package and takes_package.get(
                SEQUENCE, 0) > 1 and original.get('rewritten_by'):
            # get the rewritten by
            rewritten_by = archive_service.find_one(
                req=None, _id=original.get('rewritten_by'))

            # get the take
            take_id = takes_service.get_take_by_take_no(
                original,
                take_no=takes_package.get(SEQUENCE) - 1,
                package=takes_package)
            take = archive_service.find_one(req=None, _id=take_id)

            # update the take and takes package with rewritten_by
            if take.get('rewritten_by') != rewritten_by[config.ID_FIELD]:
                if take.get(ITEM_STATE) in PUBLISH_STATES:
                    published_service.update_published_items(
                        take_id, 'rewritten_by', rewritten_by[config.ID_FIELD])

                archive_service.system_update(
                    take[config.ID_FIELD],
                    {'rewritten_by': rewritten_by[config.ID_FIELD]}, take)

            if takes_package.get('rewritten_by') != rewritten_by[
                    config.ID_FIELD]:
                if takes_package.get(ITEM_STATE) in PUBLISH_STATES:
                    published_service.update_published_items(
                        takes_package.get(config.ID_FIELD), 'rewritten_by',
                        rewritten_by[config.ID_FIELD])

                archive_service.system_update(
                    takes_package[config.ID_FIELD],
                    {'rewritten_by': rewritten_by[config.ID_FIELD]},
                    takes_package)

            if rewritten_by.get('rewrite_of') != takes_package.get(
                    config.ID_FIELD):
                archive_service.system_update(
                    rewritten_by[config.ID_FIELD],
                    {'rewrite_of': takes_package.get(config.ID_FIELD)},
                    rewritten_by)
Beispiel #9
0
    def on_update(self, updates, original):
        """Runs on archive update.

        Overridden to validate the updates to the article and take necessary actions depending on the updates. In brief,
        it does the following:
            1. Sets state, item operation, version created, version creator, sign off and word count.
            2. Resets Item Expiry
            3. If the request is to de-schedule then checks and de-schedules the associated Takes Package also.
            4. Creates Crops if article is a picture
        """
        user = get_user()
        self._validate_updates(original, updates, user)

        if PUBLISH_SCHEDULE in updates and original[
                ITEM_STATE] == CONTENT_STATE.SCHEDULED:
            # check if there is a takes package and deschedule the takes package.
            takes_service = TakesPackageService()
            package = takes_service.get_take_package(original)
            if package and package.get(ITEM_STATE) == CONTENT_STATE.SCHEDULED:
                get_resource_service('published').delete_by_article_id(
                    package.get(config.ID_FIELD))
                self.delete_by_article_ids([package.get(config.ID_FIELD)])
                updates[LINKED_IN_PACKAGES] = [
                    package
                    for package in original.get(LINKED_IN_PACKAGES, [])
                    if package.get(PACKAGE_TYPE) != TAKES_PACKAGE
                ]
            return

        if self.__is_req_for_save(updates):
            update_state(original, updates)

        remove_unwanted(updates)
        self._add_system_updates(original, updates, user)

        self._add_desk_metadata(updates, original)

        if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE:  # create crops
            CropService().create_multiple_crops(updates, original)

        # iterate over associations. Validate and process them if they are stored in database
        if 'associations' in updates:
            for item_name, item_obj in updates.get('associations').items():
                if item_obj and config.ID_FIELD in item_obj:
                    _id = item_obj[config.ID_FIELD]
                    stored_item = self.find_one(req=None, _id=_id)
                    if stored_item:
                        self._validate_updates(stored_item, item_obj, user)
                        if stored_item[
                                ITEM_TYPE] == CONTENT_TYPE.PICTURE:  # create crops
                            CropService().create_multiple_crops(
                                item_obj, stored_item)
                        stored_item.update(item_obj)
                        updates['associations'][item_name] = stored_item
 def set_usn(self, odbc_item, article):
     """
     Set the usn (unique story number) in the odbc item
     :param odbc_item:
     :param article:
     :return:
     """
     takes_package_service = TakesPackageService()
     pkg = takes_package_service.get_take_package(article)
     if pkg is not None:
         odbc_item['usn'] = pkg.get('unique_id', None)  # @usn
     else:
         odbc_item['usn'] = article.get('unique_id', None)  # @usn
 def set_usn(self, odbc_item, article):
     """
     Set the usn (unique story number) in the odbc item
     :param odbc_item:
     :param article:
     :return:
     """
     takes_package_service = TakesPackageService()
     pkg = takes_package_service.get_take_package(article)
     if pkg is not None:
         odbc_item['usn'] = pkg.get('unique_id', None)  # @usn
     else:
         odbc_item['usn'] = article.get('unique_id', None)  # @usn
Beispiel #12
0
    def on_update(self, updates, original):
        """
        Overridden to validate the updates to the article and take necessary actions depending on the updates. In brief,
        it does the following:
            1. Sets state, item operation, version created, version creator, sign off and word count.
            2. Resets Item Expiry
            3. If the request is to de-schedule then checks and de-schedules the associated Takes Package also.
            4. Creates Crops if article is a picture
        """
        user = get_user()
        self._validate_updates(original, updates, user)

        if PUBLISH_SCHEDULE in updates and original[ITEM_STATE] == CONTENT_STATE.SCHEDULED:
            self.deschedule_item(updates, original)  # this is an deschedule action

            # check if there is a takes package and deschedule the takes package.
            takes_service = TakesPackageService()
            package = takes_service.get_take_package(original)
            if package and package.get(ITEM_STATE) == CONTENT_STATE.SCHEDULED:
                get_resource_service('published').delete_by_article_id(package.get(config.ID_FIELD))
                self.delete_by_article_ids([package.get(config.ID_FIELD)])
                updates[LINKED_IN_PACKAGES] = [package for package in original.get(LINKED_IN_PACKAGES, [])
                                               if package.get(PACKAGE_TYPE) != TAKES_PACKAGE]
            return

        if self.__is_req_for_save(updates):
            update_state(original, updates)

        remove_unwanted(updates)
        self._add_system_updates(original, updates, user)

        self._add_desk_metadata(updates, original)

        if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE:  # create crops
            CropService().create_multiple_crops(updates, original)

        updates_feature_image = updates.get('associations', {}).get('featureimage')
        if updates_feature_image and 'poi' in updates_feature_image:
            original_feature_image = original.get('associations', {}).get('featureimage', {})
            if original_feature_image and original_feature_image.get('poi', {}) == updates_feature_image['poi']:
                return
            _id = updates_feature_image[config.ID_FIELD] if config.ID_FIELD in updates_feature_image \
                else original_feature_image[config.ID_FIELD]
            image_item = self.find_one(req=None, _id=_id)
            if image_item:
                image_item['poi'] = updates_feature_image['poi']
                image_item = self.patch(_id, image_item)
                updates['associations']['featureimage']['renditions'] = image_item['renditions']
Beispiel #13
0
    def _update_rewrite(self, original):
        """Removes the reference from the rewritten story in published collection."""
        rewrite_service = ArchiveRewriteService()
        if original.get('rewrite_of') and original.get('event_id'):
            rewrite_service._clear_rewritten_flag(original.get('event_id'),
                                                  original[config.ID_FIELD], 'rewritten_by')

        # write the rewritten_by to the take before spiked
        archive_service = get_resource_service(ARCHIVE)
        published_service = get_resource_service('published')
        takes_service = TakesPackageService()
        takes_package = takes_service.get_take_package(original)

        if takes_package and takes_package.get(SEQUENCE, 0) > 1 and original.get('rewritten_by'):
            # get the rewritten by
            rewritten_by = archive_service.find_one(req=None, _id=original.get('rewritten_by'))
            # get the take
            take_id = takes_service.get_take_by_take_no(original,
                                                        take_no=takes_package.get(SEQUENCE) - 1,
                                                        package=takes_package)
            take = archive_service.find_one(req=None, _id=take_id)

            # update the take and takes package with rewritten_by
            if take.get('rewritten_by') != rewritten_by[config.ID_FIELD]:
                if take.get(ITEM_STATE) in PUBLISH_STATES:
                    published_service.update_published_items(take_id, 'rewritten_by', rewritten_by[config.ID_FIELD])

                archive_service.system_update(take[config.ID_FIELD],
                                              {'rewritten_by': rewritten_by[config.ID_FIELD]}, take)

            if takes_package.get('rewritten_by') != rewritten_by[config.ID_FIELD]:
                if takes_package.get(ITEM_STATE) in PUBLISH_STATES:
                    published_service.update_published_items(takes_package.get(config.ID_FIELD),
                                                             'rewritten_by', rewritten_by[config.ID_FIELD])

                archive_service.system_update(takes_package[config.ID_FIELD],
                                              {'rewritten_by': rewritten_by[config.ID_FIELD]}, takes_package)

            if rewritten_by.get('rewrite_of') != takes_package.get(config.ID_FIELD):
                archive_service.system_update(rewritten_by[config.ID_FIELD],
                                              {'rewrite_of': takes_package.get(config.ID_FIELD)},
                                              rewritten_by)
        elif original.get('rewritten_by') or (takes_package and takes_package.get('rewritten_by')):
            # you are spike the story from which the rewrite was triggered.
            # in this case both rewrite_of and rewritten_by are published.
            rewrite_id = original.get('rewritten_by') or takes_package.get('rewritten_by')
            rewritten_by = archive_service.find_one(req=None, _id=rewrite_id)
            archive_service.system_update(rewrite_id, {'rewrite_of': None, 'rewrite_sequence': 0}, rewritten_by)
Beispiel #14
0
    def validate_embargo(self, item):
        """
        Validates the embargo of the item. Following are checked:
            1. Item can't be a package or a take or a re-write of another story
            2. Publish Schedule and Embargo are mutually exclusive
            3. Always a future date except in case of Corrected and Killed.
        :raises: SuperdeskApiError.badRequestError() if the validation fails
        """

        if item[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE:
            embargo = item.get(EMBARGO)
            if embargo:
                if item.get('publish_schedule') or item[ITEM_STATE] == CONTENT_STATE.SCHEDULED:
                    raise SuperdeskApiError.badRequestError("An item can't have both Publish Schedule and Embargo")

                package = TakesPackageService().get_take_package(item)
                if package:
                    raise SuperdeskApiError.badRequestError("Takes doesn't support Embargo")

                if item.get('rewrite_of'):
                    raise SuperdeskApiError.badRequestError("Rewrites doesn't support Embargo")

                if not isinstance(embargo, datetime.date) or not embargo.time():
                    raise SuperdeskApiError.badRequestError("Invalid Embargo")

                if item[ITEM_STATE] not in PUBLISH_STATES and embargo <= utcnow():
                    raise SuperdeskApiError.badRequestError("Embargo cannot be earlier than now")
        elif is_normal_package(item):
            if item.get(EMBARGO):
                raise SuperdeskApiError.badRequestError("A Package doesn't support Embargo")

            self.packageService.check_if_any_item_in_package_has_embargo(item)
Beispiel #15
0
    def on_update(self, updates, original):
        """Runs on archive update.

        Overridden to validate the updates to the article and take necessary actions depending on the updates. In brief,
        it does the following:
            1. Sets state, item operation, version created, version creator, sign off and word count.
            2. Resets Item Expiry
            3. If the request is to de-schedule then checks and de-schedules the associated Takes Package also.
            4. Creates Crops if article is a picture
        """
        user = get_user()
        self._validate_updates(original, updates, user)

        if PUBLISH_SCHEDULE in updates and original[ITEM_STATE] == CONTENT_STATE.SCHEDULED:
            # check if there is a takes package and deschedule the takes package.
            takes_service = TakesPackageService()
            package = takes_service.get_take_package(original)
            if package and package.get(ITEM_STATE) == CONTENT_STATE.SCHEDULED:
                get_resource_service('published').delete_by_article_id(package.get(config.ID_FIELD))
                self.delete_by_article_ids([package.get(config.ID_FIELD)])
                updates[LINKED_IN_PACKAGES] = [package for package in original.get(LINKED_IN_PACKAGES, [])
                                               if package.get(PACKAGE_TYPE) != TAKES_PACKAGE]
            return

        if self.__is_req_for_save(updates):
            update_state(original, updates)

        remove_unwanted(updates)
        self._add_system_updates(original, updates, user)

        self._add_desk_metadata(updates, original)

        if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE:  # create crops
            CropService().create_multiple_crops(updates, original)

        # iterate over associations. Validate and process them if they are stored in database
        if 'associations' in updates:
            for item_name, item_obj in updates.get('associations').items():
                if item_obj and config.ID_FIELD in item_obj:
                    _id = item_obj[config.ID_FIELD]
                    stored_item = self.find_one(req=None, _id=_id)
                    if stored_item:
                        self._validate_updates(stored_item, item_obj, user)
                        if stored_item[ITEM_TYPE] == CONTENT_TYPE.PICTURE:  # create crops
                            CropService().create_multiple_crops(item_obj, stored_item)
                        stored_item.update(item_obj)
                        updates['associations'][item_name] = stored_item
Beispiel #16
0
    def _validate_unlink(self, target):
        """Validates that the links for takes or updates can be removed.

        :param target: article whose links will be removed
        :raises: SuperdeskApiError
        """
        if target[ITEM_TYPE] != CONTENT_TYPE.TEXT:
            raise SuperdeskApiError.badRequestError("Only text stories can be unlinked!")

        # if the story is in published states then it cannot be unlinked
        if target[ITEM_STATE] in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED]:
            raise SuperdeskApiError.badRequestError("Published stories cannot be unlinked!")

        # if the story is not the last take then it cannot be unlinked
        if TakesPackageService().get_take_package(target) and \
                not TakesPackageService().is_last_takes_package_item(target):
            raise SuperdeskApiError.badRequestError("Only the last take can be unlinked!")
Beispiel #17
0
class ArchiveLinkService(Service):
    packageService = TakesPackageService()

    def create(self, docs, **kwargs):
        target_id = request.view_args['target_id']
        doc = docs[0]
        link_id = doc.get('link_id')
        desk_id = doc.get('desk')
        service = get_resource_service(ARCHIVE)
        target = service.find_one(req=None, _id=target_id)
        self._validate_link(target, target_id)
        link = {}

        if desk_id:
            link = {'task': {'desk': desk_id}}
            user = get_user()
            lookup = {'_id': desk_id, 'members.user': user['_id']}
            desk = get_resource_service('desks').find_one(req=None, **lookup)
            if not desk:
                raise SuperdeskApiError.forbiddenError(
                    "No privileges to create new take on requested desk.")

            link['task']['stage'] = desk['working_stage']

        if link_id:
            link = service.find_one(req=None, _id=link_id)

        linked_item = self.packageService.link_as_next_take(target, link)
        insert_into_versions(id_=linked_item[config.ID_FIELD])
        doc.update(linked_item)
        build_custom_hateoas(CUSTOM_HATEOAS, doc)
        return [linked_item['_id']]

    def _validate_link(self, target, target_id):
        """Validates the article to be linked.

        :param target: article to be linked
        :param target_id: id of the article to be linked
        :raises: SuperdeskApiError
        """
        if not target:
            raise SuperdeskApiError.notFoundError(
                message='Cannot find the target item with id {}.'.format(
                    target_id))

        if target.get(EMBARGO):
            raise SuperdeskApiError.badRequestError(
                "Takes can't be created for an Item having Embargo")

        if is_genre(target, BROADCAST_GENRE):
            raise SuperdeskApiError.badRequestError(
                "Cannot add new take to the story with genre as broadcast.")

        if get_resource_service('published').is_rewritten_before(
                target['_id']):
            raise SuperdeskApiError.badRequestError(
                message='Article has been rewritten before !')
Beispiel #18
0
    def enhance_with_archive_items(self, items):
        if items:
            ids = list(set([item.get("item_id") for item in items if item.get("item_id")]))
            archive_items = []
            if ids:
                query = {"$and": [{config.ID_FIELD: {"$in": ids}}]}
                archive_req = ParsedRequest()
                archive_req.max_results = len(ids)
                # can't access published from elastic due filter on the archive resource hence going to mongo
                archive_items = list(
                    superdesk.get_resource_service(ARCHIVE).get_from_mongo(req=archive_req, lookup=query)
                )

                takes_service = TakesPackageService()
                for item in archive_items:
                    handle_existing_data(item)
                    takes_service.enhance_with_package_info(item)

            for item in items:
                try:
                    archive_item = [i for i in archive_items if i.get(config.ID_FIELD) == item.get("item_id")][0]
                except IndexError:
                    logger.exception(
                        (
                            "Data inconsistency found for the published item {}. "
                            "Cannot find item {} in the archive collection."
                        ).format(item.get(config.ID_FIELD), item.get("item_id"))
                    )
                    archive_item = {}

                updates = {
                    config.ID_FIELD: item.get("item_id"),
                    "item_id": item.get(config.ID_FIELD),
                    "lock_user": archive_item.get("lock_user", None),
                    "lock_time": archive_item.get("lock_time", None),
                    "lock_session": archive_item.get("lock_session", None),
                    "archive_item": archive_item if archive_item else None,
                }

                item.update(updates)
                handle_existing_data(item)
Beispiel #19
0
    def delete(self, lookup):
        target_id = request.view_args['target_id']
        archive_service = get_resource_service(ARCHIVE)
        target = archive_service.find_one(req=None, _id=target_id)
        self._validate_unlink(target)
        updates = {}

        takes_package = TakesPackageService().get_take_package(target)

        if takes_package and TakesPackageService().is_last_takes_package_item(target):
            # remove the take link
            PackageService().remove_refs_in_package(takes_package, target_id)

        if target.get('rewrite_of'):
            # remove the rewrite info
            ArchiveSpikeService().update_rewrite(target)

        if not takes_package and not target.get('rewrite_of'):
            # there is nothing to do
            raise SuperdeskApiError.badRequestError("Only takes and updates can be unlinked!")

        if target.get('rewrite_of'):
            updates['rewrite_of'] = None

        if target.get('anpa_take_key'):
            updates['anpa_take_key'] = None

        if target.get('rewrite_sequence'):
            updates['rewrite_sequence'] = None

        if target.get('sequence'):
            updates['sequence'] = None

        updates['event_id'] = generate_guid(type=GUID_TAG)

        archive_service.system_update(target_id, updates, target)
        user = get_user(required=True)
        push_notification('item:unlink', item=target_id, user=str(user.get(config.ID_FIELD)))
        app.on_archive_item_updated(updates, target, ITEM_UNLINK)
Beispiel #20
0
    def enhance_with_archive_items(self, items):
        if items:
            ids = list(set([item.get('item_id') for item in items if item.get('item_id')]))
            archive_items = []
            if ids:
                query = {'$and': [{'_id': {'$in': ids}}]}
                archive_req = ParsedRequest()
                archive_req.max_results = len(ids)
                # can't access published from elastic due filter on the archive resource hence going to mongo
                archive_items = list(superdesk.get_resource_service(ARCHIVE)
                                     .get_from_mongo(req=archive_req, lookup=query))

                takes_service = TakesPackageService()
                for item in archive_items:
                    handle_existing_data(item)
                    takes_service.enhance_with_package_info(item)

            for item in items:
                try:
                    archive_item = [i for i in archive_items if i.get('_id') == item.get('item_id')][0]
                except IndexError:
                    logger.exception(('Data inconsistency found for the published item {}. '
                                      'Cannot find item {} in the archive collection.')
                                     .format(item.get('_id'), item.get('item_id')))
                    archive_item = {}

                updates = {
                    '_id': item.get('item_id'),
                    'item_id': item.get('_id'),
                    'lock_user': archive_item.get('lock_user', None),
                    'lock_time': archive_item.get('lock_time', None),
                    'lock_session': archive_item.get('lock_session', None),
                    'archive_item': archive_item if archive_item else None
                }

                item.update(updates)
                handle_existing_data(item)
Beispiel #21
0
class BasePublishService(BaseService):
    """
    Base service class for "publish" endpoint
    """

    publish_type = 'publish'
    published_state = 'published'

    non_digital = partial(filter, lambda s: s.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE)
    digital = partial(filter, lambda s: (s.get('subscriber_type', '') in {SUBSCRIBER_TYPES.DIGITAL,
                                                                          SUBSCRIBER_TYPES.ALL}))
    takes_package_service = TakesPackageService()
    package_service = PackageService()

    def on_update(self, updates, original):
        self._validate(original, updates)
        self._set_updates(original, updates, updates.get(config.LAST_UPDATED, utcnow()))
        convert_task_attributes_to_objectId(updates)  # ???
        self._process_publish_updates(original, updates)

    def on_updated(self, updates, original):
        original = get_resource_service(ARCHIVE).find_one(req=None, _id=original[config.ID_FIELD])
        updates.update(original)

        if updates[ITEM_OPERATION] != ITEM_KILL and \
                original.get(ITEM_TYPE) in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]:
            get_resource_service('archive_broadcast').on_broadcast_master_updated(updates[ITEM_OPERATION], original)

        get_resource_service('archive_broadcast').reset_broadcast_status(updates, original)
        push_content_notification([updates])
        self._import_into_legal_archive(updates)

    def update(self, id, updates, original):
        """
        Handles workflow of each Publish, Corrected and Killed.
        """
        try:
            user = get_user()
            auto_publish = updates.pop('auto_publish', False)

            if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                self._publish_package_items(original, updates)
                self._update_archive(original, updates, should_insert_into_versions=auto_publish)
            else:
                self._publish_associations(original, id)
                updated = deepcopy(original)
                updated.update(updates)

                if self.published_state != CONTENT_STATE.KILLED:
                    self._process_takes_package(original, updated, updates)

                self._update_archive(original, updated, should_insert_into_versions=auto_publish)
                self.update_published_collection(published_item_id=original[config.ID_FIELD], updated=updated)

            from apps.publish.enqueue import enqueue_published
            enqueue_published.apply_async()

            push_notification('item:publish', item=str(id),
                              unique_name=original['unique_name'],
                              desk=str(original.get('task', {}).get('desk', '')),
                              user=str(user.get(config.ID_FIELD, '')))
        except SuperdeskApiError as e:
            raise e
        except KeyError as e:
            raise SuperdeskApiError.badRequestError(
                message="Key is missing on article to be published: {}".format(str(e)))
        except Exception as e:
            logger.exception("Something bad happened while publishing %s".format(id))
            raise SuperdeskApiError.internalError(message="Failed to publish the item: {}".format(str(e)))

    def _process_takes_package(self, original, updated, updates):
        # if target_for is set then we don't to digital client.
        targeted_for = updates.get('targeted_for', original.get('targeted_for'))
        if original[ITEM_TYPE] in {CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED} \
                and not (targeted_for or is_genre(original, BROADCAST_GENRE)):
            # check if item is in a digital package
            last_updated = updates.get(config.LAST_UPDATED, utcnow())
            package = self.takes_package_service.get_take_package(original)
            if not package:
                '''
                If type of the item is text or preformatted then item need to be sent to
                digital subscribers, so package the item as a take.
                '''
                package_id = self.takes_package_service.package_story_as_a_take(updated, {}, None)
                package = get_resource_service(ARCHIVE).find_one(req=None, _id=package_id)
            package_id = package[config.ID_FIELD]
            package_updates = self.process_takes(updates_of_take_to_be_published=updates,
                                                 original_of_take_to_be_published=original,
                                                 package=package)
            # If the original package is corrected then the next take shouldn't change it
            # back to 'published'
            preserve_state = package.get(ITEM_STATE, '') == CONTENT_STATE.CORRECTED and \
                updates.get(ITEM_OPERATION, ITEM_PUBLISH) == ITEM_PUBLISH
            self._set_updates(package, package_updates, last_updated, preserve_state)
            package_updates.setdefault(ITEM_OPERATION, updates.get(ITEM_OPERATION, ITEM_PUBLISH))
            self._update_archive(package, package_updates)
            package.update(package_updates)
            self.update_published_collection(published_item_id=package_id)
            self._import_into_legal_archive(package)

    def _validate(self, original, updates):
        self.raise_if_not_marked_for_publication(original)
        self.raise_if_invalid_state_transition(original)

        updated = original.copy()
        updated.update(updates)

        takes_package = self.takes_package_service.get_take_package(original)

        if self.publish_type == 'publish':
            # validate if take can be published
            if takes_package and not self.takes_package_service.can_publish_take(
                    takes_package, updates.get(SEQUENCE, original.get(SEQUENCE, 1))):
                raise PublishQueueError.previous_take_not_published_error(
                    Exception("Previous takes are not published."))

            validate_schedule(updated.get(PUBLISH_SCHEDULE), takes_package.get(SEQUENCE, 1) if takes_package else 1)
            update_schedule_settings(updated, PUBLISH_SCHEDULE, updated.get(PUBLISH_SCHEDULE))

            if original[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE and updates.get(EMBARGO):
                get_resource_service(ARCHIVE).validate_embargo(updated)

        if self.publish_type in [ITEM_CORRECT, ITEM_KILL]:
            if updates.get(EMBARGO):
                raise SuperdeskApiError.badRequestError("Embargo can't be set after publishing")

            if updates.get('dateline'):
                raise SuperdeskApiError.badRequestError("Dateline can't be modified after publishing")

        if self.publish_type == ITEM_PUBLISH and updated.get('rewritten_by'):
            # if update is published then user cannot publish the takes
            rewritten_by = get_resource_service(ARCHIVE).find_one(req=None, _id=updated.get('rewritten_by'))
            if rewritten_by and rewritten_by.get(ITEM_STATE) in PUBLISH_STATES:
                raise SuperdeskApiError.badRequestError("Cannot publish the story after Update is published.!")

        validate_item = {'act': self.publish_type, 'type': original['type'], 'validate': updated}
        validation_errors = get_resource_service('validate').post([validate_item])
        if validation_errors[0]:
            raise ValidationError(validation_errors)

        if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            package_validation_errors = []
            self._validate_package_contents(original, takes_package, package_validation_errors)
            if len(package_validation_errors) > 0:
                raise ValidationError(package_validation_errors)

            self._validate_package(original, updates)

    def _validate_package(self, package, updates):
        items = self.package_service.get_residrefs(package)
        if self.publish_type in [ITEM_CORRECT, ITEM_KILL]:
            removed_items, added_items = self._get_changed_items(items, updates)
            # we raise error if correction is done on a empty package. Kill is fine.
            if len(removed_items) == len(items) and len(added_items) == 0 and self.publish_type == ITEM_CORRECT:
                raise SuperdeskApiError.badRequestError("Corrected package cannot be empty!")

    def raise_if_not_marked_for_publication(self, original):
        if original.get('flags', {}).get('marked_for_not_publication', False):
            raise SuperdeskApiError.badRequestError('Cannot publish an item which is marked as Not for Publication')

    def raise_if_invalid_state_transition(self, original):
        if not is_workflow_state_transition_valid(self.publish_type, original[ITEM_STATE]):
            error_message = "Can't {} as item state is {}" if original[ITEM_TYPE] == CONTENT_TYPE.TEXT else \
                "Can't {} as either package state or one of the items state is {}"
            raise InvalidStateTransitionError(error_message.format(self.publish_type, original[ITEM_STATE]))

    def get_digital_id_for_package_item(self, package_item):
        """
        Finds the digital item id for a given item in a package
        :param package_item: item in a package
        :return string: Digital item id if there's one otherwise id of package_item
        """
        if package_item[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]:
            return package_item[config.ID_FIELD]
        else:
            package_item_takes_package_id = self.takes_package_service.get_take_package_id(package_item)
            if not package_item_takes_package_id:
                return package_item[config.ID_FIELD]
            return package_item_takes_package_id

    def _process_publish_updates(self, original, updates):
        """ Common updates for published items """
        desk = None
        if original.get('task', {}).get('desk'):
            desk = get_resource_service('desks').find_one(req=None, _id=original['task']['desk'])
        if not original.get('ingest_provider'):
            updates['source'] = desk['source'] if desk and desk.get('source', '') \
                else app.settings['DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES']
        updates['pubstatus'] = PUB_STATUS.CANCELED if self.publish_type == 'kill' else PUB_STATUS.USABLE
        self._set_item_expiry(updates, original)

    def _set_item_expiry(self, updates, original):
        """
        Set the expiry for the item
        :param dict updates: doc on which publishing action is performed
        """
        desk_id = original.get('task', {}).get('desk')
        stage_id = original.get('task', {}).get('stage')

        if EMBARGO in updates or PUBLISH_SCHEDULE in updates:
            offset = get_utc_schedule(updates, PUBLISH_SCHEDULE) or get_utc_schedule(updates, EMBARGO)
        elif EMBARGO in original or PUBLISH_SCHEDULE in original:
            offset = get_utc_schedule(original, PUBLISH_SCHEDULE) or get_utc_schedule(original, EMBARGO)

        updates['expiry'] = get_expiry(desk_id, stage_id, offset=offset)

    def _is_take_item(self, item):
        """ Returns True if the item was a take
        """
        return item[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE and \
            (not (item.get('targeted_for') or is_genre(item, BROADCAST_GENRE)))

    def process_takes(self, updates_of_take_to_be_published, package, original_of_take_to_be_published=None):
        """
        Primary rule for publishing a Take in Takes Package is: all previous takes must be published before a take
        can be published.

        Also, generates body_html of the takes package and make sure the metadata for the package is the same as the
        metadata of the take to be published.

        :param dict updates_of_take_to_be_published: updates for the take to be published
        :param dict package: Takes package to publish
        :param dict original_of_take_to_be_published: original of the take to be published
        :return: Takes Package Updates
        """

        takes = self.takes_package_service.get_published_takes(package)
        body_html = updates_of_take_to_be_published.get('body_html',
                                                        original_of_take_to_be_published.get('body_html', ''))
        package_updates = {}

        groups = package.get(GROUPS, [])
        if groups:
            take_refs = [ref for group in groups if group['id'] == 'main' for ref in group.get('refs')]
            sequence_num_of_take_to_be_published = 0
            take_article_id = updates_of_take_to_be_published.get(
                config.ID_FIELD, original_of_take_to_be_published[config.ID_FIELD])

            for r in take_refs:
                if r[GUID_FIELD] == take_article_id:
                    sequence_num_of_take_to_be_published = r[SEQUENCE]
                    r['is_published'] = True
                    break

            if takes and self.published_state != 'killed':
                body_html_list = [take.get('body_html', '') for take in takes]
                if self.published_state == 'published':
                    body_html_list.append(body_html)
                else:
                    body_html_list[sequence_num_of_take_to_be_published - 1] = body_html

                package_updates['body_html'] = '<br>'.join(body_html_list)
            else:
                package_updates['body_html'] = body_html

            metadata_tobe_copied = self.takes_package_service.fields_for_creating_take.copy()
            metadata_tobe_copied.extend([PUBLISH_SCHEDULE, SCHEDULE_SETTINGS, 'byline'])
            updated_take = original_of_take_to_be_published.copy()
            updated_take.update(updates_of_take_to_be_published)
            metadata_from = updated_take
            # this rules has changed to use the last published metadata
            # per ticket SD-3885
            # if self.published_state == 'corrected' and len(takes) > 1:
            #     # get the last take metadata only if there are more than one takes
            #     metadata_from = takes[-1]

            for metadata in metadata_tobe_copied:
                if metadata in metadata_from:
                    package_updates[metadata] = metadata_from.get(metadata)

            if self.published_state == 'killed':
                # if published then update the groups in the take
                # to reflect the correct version, headline and slugline
                archive_service = get_resource_service(ARCHIVE)
                for ref in take_refs:
                    if ref.get(RESIDREF) != take_article_id:
                        archive_item = archive_service.find_one(req=None, _id=ref.get(RESIDREF))
                        ref['headline'] = archive_item.get('headline')
                        ref['slugline'] = archive_item.get('slugline')
                        ref[config.VERSION] = archive_item.get(config.VERSION)

            take_ref = next((ref for ref in take_refs if ref.get(RESIDREF) == take_article_id), None)
            if take_ref:
                # for published take update the version, headline and slugline
                take_ref['headline'] = updated_take.get('headline')
                take_ref['slugline'] = updated_take.get('slugline')
                take_ref[config.VERSION] = updated_take.get(config.VERSION)

            package_updates[GROUPS] = groups

        return package_updates

    def _publish_package_items(self, package, updates):
        """
        Publishes all items of a package recursively then publishes the package itself
        :param package: package to publish
        :param updates: payload
        """
        items = self.package_service.get_residrefs(package)

        if len(items) == 0 and self.publish_type == ITEM_PUBLISH:
            raise SuperdeskApiError.badRequestError("Empty package cannot be published!")

        removed_items = []
        if self.publish_type in [ITEM_CORRECT, ITEM_KILL]:
            removed_items, added_items = self._get_changed_items(items, updates)
            # we raise error if correction is done on a empty package. Kill is fine.
            if len(removed_items) == len(items) and len(added_items) == 0 and self.publish_type == ITEM_CORRECT:
                raise SuperdeskApiError.badRequestError("Corrected package cannot be empty!")
            items.extend(added_items)

        if items:
            archive_publish = get_resource_service('archive_publish')
            for guid in items:
                package_item = super().find_one(req=None, _id=guid)

                if not package_item:
                    raise SuperdeskApiError.badRequestError(
                        "Package item with id: {} does not exist.".format(guid))

                if package_item[ITEM_STATE] not in PUBLISH_STATES:  # if the item is not published then publish it
                    if package_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                        # if the item is a package do recursion to publish
                        sub_updates = {i: updates[i] for i in ['state', 'operation'] if i in updates}
                        sub_updates['groups'] = list(package_item['groups'])
                        self._publish_package_items(package_item, sub_updates)
                        self._update_archive(original=package_item, updates=sub_updates,
                                             should_insert_into_versions=False)
                    else:
                        # publish the item
                        package_item[PUBLISHED_IN_PACKAGE] = package[config.ID_FIELD]
                        archive_publish.patch(id=package_item.pop(config.ID_FIELD), updates=package_item)

                    insert_into_versions(id_=guid)

                elif guid in removed_items:
                    # remove the package information from the package item.
                    linked_in_packages = [linked for linked in package_item.get(LINKED_IN_PACKAGES)
                                          if linked.get(PACKAGE) != package.get(config.ID_FIELD)]
                    super().system_update(guid, {LINKED_IN_PACKAGES: linked_in_packages}, package_item)

                package_item = super().find_one(req=None, _id=guid)
                self.package_service.update_field_in_package(updates, package_item[config.ID_FIELD],
                                                             config.VERSION, package_item[config.VERSION])

        updated = deepcopy(package)
        updated.update(updates)
        self.update_published_collection(published_item_id=package[config.ID_FIELD], updated=updated)

    def update_published_collection(self, published_item_id, updated=None):
        """
        Updates the published collection with the published item.
        Set the last_published_version to false for previous versions of the published items.
        :param: str published_item_id: _id of the document.
        """
        published_item = super().find_one(req=None, _id=published_item_id)
        published_item = copy(published_item)
        if updated:
            published_item.update(updated)
        published_item['is_take_item'] = self.takes_package_service.get_take_package_id(published_item) is not None
        if not published_item.get('digital_item_id'):
            published_item['digital_item_id'] = self.get_digital_id_for_package_item(published_item)
        get_resource_service(PUBLISHED).update_published_items(published_item_id, LAST_PUBLISHED_VERSION, False)
        return get_resource_service(PUBLISHED).post([published_item])

    def set_state(self, original, updates):
        """
        Set the state of the document based on the action (publish, correction, kill)
        :param dict original: original document
        :param dict updates: updates related to document
        """
        updates[PUBLISH_SCHEDULE] = None
        updates[SCHEDULE_SETTINGS] = {}
        updates[ITEM_STATE] = self.published_state

    def _set_updates(self, original, updates, last_updated, preserve_state=False):
        """
        Sets config.VERSION, config.LAST_UPDATED, ITEM_STATE in updates document.
        If item is being published and embargo is available then append Editorial Note with 'Embargoed'.

        :param dict original: original document
        :param dict updates: updates related to the original document
        :param datetime last_updated: datetime of the updates.
        """
        if not preserve_state:
            self.set_state(original, updates)
        updates.setdefault(config.LAST_UPDATED, last_updated)

        if original[config.VERSION] == updates.get(config.VERSION, original[config.VERSION]):
            resolve_document_version(document=updates, resource=ARCHIVE, method='PATCH', latest_doc=original)

        if updates.get(EMBARGO, original.get(EMBARGO)) \
                and updates.get('ednote', original.get('ednote', '')).find('Embargo') == -1:
            updates['ednote'] = '{} {}'.format(original.get('ednote', ''), 'Embargoed.').strip()

        user = get_user()
        if user and user.get(config.ID_FIELD):
            updates['version_creator'] = user[config.ID_FIELD]

    def _update_archive(self, original, updates, versioned_doc=None, should_insert_into_versions=True):
        """
        Updates the articles into archive collection and inserts the latest into archive_versions.
        Also clears autosaved versions if any.
        :param: versioned_doc: doc which can be inserted into archive_versions
        :param: should_insert_into_versions if True inserts the latest document into versions collection
        """

        self.backend.update(self.datasource, original[config.ID_FIELD], updates, original)

        if should_insert_into_versions:
            if versioned_doc is None:
                insert_into_versions(id_=original[config.ID_FIELD])
            else:
                insert_into_versions(doc=versioned_doc)

        get_component(ItemAutosave).clear(original[config.ID_FIELD])

    def _get_changed_items(self, existing_items, updates):
        """
        Returns the added and removed items from existing_items
        :param existing_items: Existing list
        :param updates: Changes
        :return: list of removed items and list of added items
        """
        if 'groups' in updates:
            new_items = self.package_service.get_residrefs(updates)
            removed_items = list(set(existing_items) - set(new_items))
            added_items = list(set(new_items) - set(existing_items))
            return removed_items, added_items
        else:
            return [], []

    def _validate_package_contents(self, package, takes_package, validation_errors=[]):
        """
        If the item passed is a package this function will ensure that the unpublished content validates and none of
        the content is locked by other than the publishing session, also do not allow any killed or spiked content

        :param package:
        :param takes_package:
        :param validation_errors: validation errors are appended if there are any.
        """
        # Ensure it is the sort of thing we need to validate
        if package[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and not takes_package and self.publish_type == ITEM_PUBLISH:
            items = self.package_service.get_residrefs(package)

            # make sure package is not scheduled or spiked
            if package[ITEM_STATE] in (CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED):
                validation_errors.append('Package cannot be {}'.format(package[ITEM_STATE]))

            if package.get(EMBARGO):
                validation_errors.append('Package cannot have Embargo')

            if items:
                for guid in items:
                    doc = super().find_one(req=None, _id=guid)

                    if package[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                        digital = self.takes_package_service.get_take_package(doc) or {}
                        self._validate_package_contents(doc, digital, validation_errors)

                    # make sure no items are killed or spiked or scheduled
                    if doc[ITEM_STATE] in (CONTENT_STATE.KILLED, CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED):
                        validation_errors.append('Package cannot contain {} item'.format(doc[ITEM_STATE]))

                    if doc.get(EMBARGO):
                        validation_errors.append('Package cannot have Items with Embargo')

                    # don't validate items that already have published
                    if doc[ITEM_STATE] not in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]:
                        validate_item = {'act': self.publish_type, 'type': doc[ITEM_TYPE], 'validate': doc}
                        errors = get_resource_service('validate').post([validate_item], headline=True)
                        if errors[0]:
                            validation_errors.extend(errors[0])

                    # check the locks on the items
                    if doc.get('lock_session', None) and package['lock_session'] != doc['lock_session']:
                        validation_errors.extend(['{}: packaged item cannot be locked'.format(doc['headline'])])

    def _import_into_legal_archive(self, doc):
        """
        Import into legal archive async
        :param {dict} doc: document to be imported
        """

        if doc.get(ITEM_STATE) != CONTENT_STATE.SCHEDULED:
            kwargs = {
                'item_id': doc.get(config.ID_FIELD)
            }

            # countdown=3 is for elasticsearch to be refreshed with archive and published changes
            import_into_legal_archive.apply_async(countdown=3, kwargs=kwargs)  # @UndefinedVariable

    def _publish_associations(self, parent, guid):
        """Publish parent item associations."""
        associations = parent.get('associations', {})
        for rel, item in associations.copy().items():
            if item.get('pubstatus', 'usable') != 'usable':
                associations.pop(rel)
                continue
            self._publish_renditions(item, rel, guid)

    def _publish_renditions(self, item, rel, guid):
        """Publish item renditions."""
        images = []
        renditions = item.get('renditions', {})
        original = renditions.get('original')
        crop_service = CropService()
        for rendition_name, rendition in renditions.items():
            crop = get_crop(rendition)
            rend_spec = crop_service.get_crop_by_name(rendition_name)
            if crop and rend_spec:
                file_name = '%s/%s/%s' % (guid, rel, rendition_name)
                rendition['media'] = app.media.media_id(file_name, original.get('mimetype'))
                rendition['href'] = app.media.url_for_media(rendition['media'], original.get('mimetype'))
                rendition['width'] = rend_spec.get('width')
                rendition['height'] = rend_spec.get('height')
                rendition['ratio'] = rend_spec.get('ratio')
                rendition['mimetype'] = original.get('mimetype')
                images.append({
                    'rendition': rendition_name,
                    'file_name': file_name,
                    'media': rendition['media'],
                    'spec': rend_spec,
                    'crop': crop,
                })
        publish_images.delay(images=images, original=original, item=item)
class EnqueueService:
    """
    Creates the corresponding entries in the publish queue for items marked for publishing
    """

    publish_type = 'publish'
    published_state = 'published'

    non_digital = partial(
        filter,
        lambda s: s.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE)
    digital = partial(
        filter, lambda s: (s.get('subscriber_type', '') in
                           {SUBSCRIBER_TYPES.DIGITAL, SUBSCRIBER_TYPES.ALL}))
    takes_package_service = TakesPackageService()
    package_service = PackageService()

    def _enqueue_item(self, item):
        if item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and item.get(
                PACKAGE_TYPE):
            return self.publish(doc=item,
                                target_media_type=SUBSCRIBER_TYPES.DIGITAL)
        elif item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            return self._publish_package_items(item)
        elif item[ITEM_TYPE] not in [
                CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED
        ]:
            return self.publish(item, SUBSCRIBER_TYPES.DIGITAL)
        else:
            return self.publish(
                item,
                SUBSCRIBER_TYPES.WIRE if item.get('is_take_item') else None)

    def _publish_package_items(self, package):
        """
        Publishes all items of a package recursively then publishes the package itself
        :param package: package to publish
        :param updates: payload
        """
        items = self.package_service.get_residrefs(package)
        subscriber_items = {}
        queued = False
        removed_items = []
        if self.publish_type in ['correct', 'kill']:
            removed_items, added_items = self._get_changed_items(
                items, package)
            # we raise error if correction is done on a empty package. Kill is fine.
            if len(removed_items) == len(items) and len(
                    added_items) == 0 and self.publish_type == 'correct':
                raise SuperdeskApiError.badRequestError(
                    "Corrected package cannot be empty!")
            items.extend(added_items)

        if items:
            archive_service = get_resource_service('archive')
            for guid in items:
                package_item = archive_service.find_one(req=None, _id=guid)

                if not package_item:
                    raise SuperdeskApiError.badRequestError(
                        "Package item with id: {} has not been published.".
                        format(guid))

                subscribers, subscriber_codes = self._get_subscribers_for_package_item(
                    package_item)
                digital_item_id = BasePublishService(
                ).get_digital_id_for_package_item(package_item)
                self._extend_subscriber_items(subscriber_items, subscribers,
                                              package_item, digital_item_id,
                                              subscriber_codes)

            for removed_id in removed_items:
                package_item = archive_service.find_one(req=None,
                                                        _id=removed_id)
                subscribers, subscriber_codes = self._get_subscribers_for_package_item(
                    package_item)
                digital_item_id = None
                self._extend_subscriber_items(subscriber_items, subscribers,
                                              package_item, digital_item_id,
                                              subscriber_codes)

            queued = self.publish_package(package,
                                          target_subscribers=subscriber_items)

        return queued

    def _get_changed_items(self, existing_items, package):
        """
        Returns the added and removed items from existing_items
        :param existing_items: Existing list
        :param updates: Changes
        :return: list of removed items and list of added items
        """
        published_service = get_resource_service('published')
        req = ParsedRequest()
        query = {
            'query': {
                'filtered': {
                    'filter': {
                        'and': [{
                            'term': {
                                QUEUE_STATE: PUBLISH_STATE.QUEUED
                            }
                        }, {
                            'term': {
                                'item_id': package['item_id']
                            }
                        }]
                    }
                }
            },
            'sort': [{
                'publish_sequence_no': 'desc'
            }]
        }
        req.args = {'source': json.dumps(query)}
        req.max_results = 1000
        previously_published_packages = published_service.get(req=req,
                                                              lookup=None)
        previously_published_package = previously_published_packages[0]

        if 'groups' in previously_published_package:
            old_items = self.package_service.get_residrefs(
                previously_published_package)
            added_items = list(set(existing_items) - set(old_items))
            removed_items = list(set(old_items) - set(existing_items))
            return removed_items, added_items
        else:
            return [], []

    def enqueue_item(self, item):
        """
        Creates the corresponding entries in the publish queue for the given item
        :return bool: True if item is queued else false.
        """
        try:
            return self._enqueue_item(item)
        except SuperdeskApiError as e:
            raise e
        except KeyError as e:
            raise SuperdeskApiError.badRequestError(
                message="Key is missing on article to be published: {}".format(
                    str(e)))
        except Exception as e:
            logger.exception(
                "Something bad happened while publishing %s".format(id))
            raise SuperdeskApiError.internalError(
                message="Failed to publish the item: {}".format(str(e)))

    def get_subscribers(self, doc, target_media_type):
        """
        Get subscribers for doc based on target_media_type.
        Override this method in the ArchivePublishService, ArchiveCorrectService and ArchiveKillService
        :param doc: Document to publish/correct/kill
        :param target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article.
                Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire
                then the doc being queues is an Individual Article.
        :return: (list, list) List of filtered subscriber,
                List of subscribers that have not received item previously (empty list in this case).
        """
        raise NotImplementedError()

    def publish(self, doc, target_media_type=None):
        """
        Queue the content for publishing.
        1. Get the subscribers.
        2. Update the headline of wire stories with the sequence
        3. Queue the content for subscribers
        4. Queue the content for previously published subscribers if any.
        5. Sends notification if no formatter has found for any of the formats configured in Subscriber.
        6. If not queued and not formatters then raise exception.
        :param dict doc: document to publish
        :param str target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article.
                Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire
                then the doc being queues is an Individual Article.
        :return bool: if content is queued then True else False
        :raises PublishQueueError.item_not_queued_error:
                If the nothing is queued.
        """
        # Step 1
        subscribers, subscribers_yet_to_receive, subscriber_codes = self.get_subscribers(
            doc, target_media_type)

        # Step 2
        if target_media_type == SUBSCRIBER_TYPES.WIRE:
            self._update_headline_sequence(doc)

        # Step 3
        no_formatters, queued = self.queue_transmission(
            deepcopy(doc), subscribers, subscriber_codes)

        # Step 4
        if subscribers_yet_to_receive:
            formatters_not_found, queued_new_subscribers = \
                self.queue_transmission(deepcopy(doc), subscribers_yet_to_receive, subscriber_codes)
            no_formatters.extend(formatters_not_found)
            queued = queued or queued_new_subscribers

        # Step 5
        self._push_formatter_notification(doc, no_formatters)

        # Step 6
        if not target_media_type and not queued:
            logger.exception(
                'Nothing is saved to publish queue for story: {} for action: {}'
                .format(doc[config.ID_FIELD], self.publish_type))

        return queued

    def _push_formatter_notification(self, doc, no_formatters=[]):
        if len(no_formatters) > 0:
            user = get_user()
            push_notification('item:publish:wrong:format',
                              item=str(doc[config.ID_FIELD]),
                              unique_name=doc['unique_name'],
                              desk=str(doc.get('task', {}).get('desk', '')),
                              user=str(user.get(config.ID_FIELD, '')),
                              formats=no_formatters)

    def _get_subscriber_codes(self, subscribers):
        subscriber_codes = {}
        all_products = list(
            get_resource_service('products').get(req=None, lookup=None))

        for subscriber in subscribers:
            codes = self._get_codes(subscriber)
            products = [
                p for p in all_products
                if p[config.ID_FIELD] in subscriber.get('products', [])
            ]

            for product in products:
                codes.extend(self._get_codes(product))
                subscriber_codes[subscriber[config.ID_FIELD]] = list(
                    set(codes))

        return subscriber_codes

    def resend(self, doc, subscribers):
        subscriber_codes = self._get_subscriber_codes(subscribers)
        wire_subscribers = list(self.non_digital(subscribers))
        digital_subscribers = list(self.digital(subscribers))

        if len(wire_subscribers) > 0:
            doc['item_id'] = doc[config.ID_FIELD]
            self._resend_to_subscribers(doc, wire_subscribers,
                                        subscriber_codes)

        if len(digital_subscribers) > 0:
            package = self.takes_package_service.get_take_package(doc)
            package['item_id'] = package[config.ID_FIELD]
            self._resend_to_subscribers(package, digital_subscribers,
                                        subscriber_codes)

    def _resend_to_subscribers(self, doc, subscribers, subscriber_codes):
        formatter_messages, queued = self.queue_transmission(
            doc, subscribers, subscriber_codes)
        self._push_formatter_notification(doc, formatter_messages)
        if not queued:
            logger.exception(
                'Nothing is saved to publish queue for story: {} for action: {}'
                .format(doc[config.ID_FIELD], 'resend'))

    def publish_package(self, package, target_subscribers):
        """
        Publishes a given non-take package to given subscribers.
        For each subscriber updates the package definition with the wanted_items for that subscriber
        and removes unwanted_items that doesn't supposed to go that subscriber.
        Text stories are replaced by the digital versions.
        :param package: Package to be published
        :param target_subscribers: List of subscriber and items-per-subscriber
        """
        all_items = self.package_service.get_residrefs(package)
        no_formatters, queued = [], False
        for items in target_subscribers.values():
            updated = deepcopy(package)
            subscriber = items['subscriber']
            codes = items['codes']
            wanted_items = [
                item for item in items['items']
                if items['items'].get(item, None)
            ]
            unwanted_items = [
                item for item in all_items if item not in wanted_items
            ]
            for i in unwanted_items:
                still_items_left = self.package_service.remove_ref_from_inmem_package(
                    updated, i)
                if not still_items_left and self.publish_type != 'correct':
                    # if nothing left in the package to be published and
                    # if not correcting then don't send the package
                    return
            for key in wanted_items:
                self.package_service.replace_ref_in_package(
                    updated, key, items['items'][key])

            formatters, temp_queued = self.queue_transmission(
                updated, [subscriber], {subscriber[config.ID_FIELD]: codes})

            no_formatters.extend(formatters)
            if temp_queued:
                queued = temp_queued

        return queued

    def queue_transmission(self, doc, subscribers, subscriber_codes={}):
        """
        Method formats and then queues the article for transmission to the passed subscribers.
        ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once
        based on the format_types configured across for all the subscribers as the formatted item must have a published
        sequence number generated by Subscriber.
        :param dict doc: document to queue for transmission
        :param list subscribers: List of subscriber dict.
        :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False
        """

        try:
            queued = False
            no_formatters = []
            for subscriber in subscribers:
                try:
                    if doc[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] and \
                            subscriber.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE:
                        # wire subscribers can get only text and preformatted stories
                        continue

                    for destination in subscriber['destinations']:
                        embed_package_items = doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and \
                            PACKAGE_TYPE not in doc and destination['config'].get('packaged', False)
                        if embed_package_items:
                            doc = self._embed_package_items(doc)
                        # Step 2(a)
                        formatter = get_formatter(destination['format'], doc)

                        if not formatter:  # if formatter not found then record it
                            no_formatters.append(destination['format'])
                            continue

                        formatted_docs = formatter.format(
                            doc, subscriber,
                            subscriber_codes.get(subscriber[config.ID_FIELD]))

                        for idx, publish_data in enumerate(formatted_docs):
                            if not isinstance(publish_data, dict):
                                pub_seq_num, formatted_doc = publish_data
                                formatted_docs[idx] = {
                                    'published_seq_num': pub_seq_num,
                                    'formatted_item': formatted_doc
                                }
                            else:
                                assert 'published_seq_num' in publish_data and 'formatted_item' in publish_data,\
                                    "missing keys in publish_data"

                        for publish_queue_item in formatted_docs:
                            publish_queue_item['item_id'] = doc['item_id']
                            publish_queue_item['item_version'] = doc[
                                config.VERSION]
                            publish_queue_item['subscriber_id'] = subscriber[
                                config.ID_FIELD]
                            publish_queue_item['codes'] = subscriber_codes.get(
                                subscriber[config.ID_FIELD])
                            publish_queue_item['destination'] = destination
                            # publish_schedule is just to indicate in the queue item is create via scheduled item
                            publish_queue_item[
                                PUBLISH_SCHEDULE] = get_utc_schedule(
                                    doc, PUBLISH_SCHEDULE) or None
                            publish_queue_item['unique_name'] = doc.get(
                                'unique_name', None)
                            publish_queue_item['content_type'] = doc.get(
                                'type', None)
                            publish_queue_item['headline'] = doc.get(
                                'headline', None)
                            publish_queue_item[
                                'publishing_action'] = self.published_state
                            publish_queue_item['ingest_provider'] = \
                                ObjectId(doc.get('ingest_provider')) if doc.get('ingest_provider') else None
                            if doc.get(PUBLISHED_IN_PACKAGE):
                                publish_queue_item[PUBLISHED_IN_PACKAGE] = doc[
                                    PUBLISHED_IN_PACKAGE]
                            try:
                                encoded_item = publish_queue_item.pop(
                                    'encoded_item')
                            except KeyError:
                                pass
                            else:
                                binary = io.BytesIO(encoded_item)
                                publish_queue_item[
                                    'encoded_item_id'] = app.storage.put(
                                        binary)
                            publish_queue_item.pop(ITEM_STATE, None)
                            get_resource_service('publish_queue').post(
                                [publish_queue_item])
                            queued = True
                except:
                    logger.exception(
                        "Failed to queue item for id {} with headline {} for subscriber {}."
                        .format(doc.get(config.ID_FIELD), doc.get('headline'),
                                subscriber.get('name')))

            return no_formatters, queued
        except:
            raise

    def _embed_package_items(self, package):
        """ Embeds all package items in the package document
        """
        for group in package.get(GROUPS, []):
            if group[GROUP_ID] == ROOT_GROUP:
                continue
            for ref in group[REFS]:
                if RESIDREF not in ref:
                    continue
                package_item = get_resource_service('published').find_one(
                    req=None,
                    item_id=ref[RESIDREF],
                    _current_version=ref[config.VERSION])
                if not package_item:
                    msg = 'Can not find package %s published item %s' % (
                        package['item_id'], ref['residRef'])
                    raise SuperdeskPublishError(500, msg)
                package_item[config.ID_FIELD] = package_item['item_id']
                ref['package_item'] = package_item
        return package

    def _update_headline_sequence(self, doc):
        """ Updates the headline of the text story if there's any sequence value in it """
        if doc.get(SEQUENCE):
            doc['headline'] = '{}={}'.format(doc['headline'],
                                             doc.get(SEQUENCE))

    def _get_subscribers_for_package_item(self, package_item):
        """
        Finds the list of subscribers for a given item in a package
        :param package_item: item in a package
        :return list: List of subscribers
        :return string: Digital item id if there's one otherwise None
        """
        if package_item[ITEM_TYPE] not in [
                CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED
        ]:
            query = {
                '$and': [{
                    'item_id': package_item[config.ID_FIELD]
                }, {
                    'publishing_action': package_item[ITEM_STATE]
                }]
            }
        else:
            package_item_takes_package = self.takes_package_service.get_take_package(
                package_item)
            if not package_item_takes_package:
                # this item has not been published to digital subscribers so
                # the list of subscribers are empty
                return [], {}

            query = {
                '$and': [{
                    'item_id': package_item_takes_package[config.ID_FIELD]
                }, {
                    'publishing_action':
                    package_item_takes_package[ITEM_STATE]
                }]
            }

        return self._get_subscribers_for_previously_sent_items(query)

    def _get_subscribers_for_previously_sent_items(self, lookup):
        """
        Returns list of subscribers that have previously received the item.
        :param dict lookup: elastic query to filter the publish queue
        :return: list of subscribers and list of product codes per subscriber
        """
        req = ParsedRequest()
        subscribers = []
        subscriber_codes = {}
        queued_items = list(
            get_resource_service('publish_queue').get(req=req, lookup=lookup))
        if len(queued_items) > 0:
            subscriber_ids = {
                queued_item['subscriber_id']
                for queued_item in queued_items
            }
            subscriber_codes = {
                q['subscriber_id']: q.get('codes', [])
                for q in queued_items
            }
            query = {
                '$and': [{
                    config.ID_FIELD: {
                        '$in': list(subscriber_ids)
                    }
                }]
            }
            subscribers = list(
                get_resource_service('subscribers').get(req=None,
                                                        lookup=query))
        return subscribers, subscriber_codes

    def filter_subscribers(self, doc, subscribers, target_media_type):
        """
        Filter subscribers to whom the current document is going to be delivered.
        :param doc: Document to publish/kill/correct
        :param subscribers: List of Subscribers that might potentially get this document
        :param target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article.
                Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire
                then the doc being queues is an Individual Article.
        :return: List of of filtered subscribers and list of product codes per subscriber.
        """
        filtered_subscribers = []
        subscriber_codes = {}
        req = ParsedRequest()
        req.args = {'is_global': True}
        filter_service = get_resource_service('content_filters')
        existing_products = {
            p[config.ID_FIELD]: p
            for p in list(
                get_resource_service('products').get(req=req, lookup=None))
        }
        global_filters = list(filter_service.get(req=req, lookup=None))

        for subscriber in subscribers:
            if target_media_type and subscriber.get(
                    'subscriber_type', '') != SUBSCRIBER_TYPES.ALL:
                can_send_takes_packages = subscriber[
                    'subscriber_type'] == SUBSCRIBER_TYPES.DIGITAL
                if target_media_type == SUBSCRIBER_TYPES.WIRE and can_send_takes_packages or \
                        target_media_type == SUBSCRIBER_TYPES.DIGITAL and not can_send_takes_packages:
                    continue

            conforms, skip_filters = self.conforms_subscriber_targets(
                subscriber, doc)
            if not conforms:
                continue

            if not self.conforms_global_filter(subscriber, global_filters,
                                               doc):
                continue

            product_codes = self._get_codes(subscriber)
            subscriber_added = False
            for product_id in subscriber.get('products', []):
                # check if the product filter conforms with the story
                product = existing_products.get(product_id)

                if not product:
                    continue

                if not self.conforms_product_targets(product, doc):
                    continue

                if self.conforms_content_filter(product, doc):
                    # gather the codes of products
                    product_codes.extend(self._get_codes(product))
                    if not subscriber_added:
                        filtered_subscribers.append(subscriber)
                        subscriber_added = True

            if skip_filters and not subscriber_added:
                filtered_subscribers.append(subscriber)
                subscriber_added = True

            # unify the list of codes by removing duplicates
            if subscriber_added:
                subscriber_codes[subscriber[config.ID_FIELD]] = list(
                    set(product_codes))

        return filtered_subscribers, subscriber_codes

    def conforms_product_targets(self, product, article):
        """
        Checks if the given article has any target information and if it does
        it checks if the product satisfies any of the target information
        :param product: Product to test
        :param article: article
        :return:
            bool: True if the article conforms the targets for the given product
        """
        geo_restrictions = product.get('geo_restrictions')

        # If not targeted at all then Return true
        if not BasePublishService().is_targeted(article, 'target_regions'):
            return geo_restrictions is None

        if geo_restrictions:
            for region in article.get('target_regions', []):
                if region['qcode'] == geo_restrictions and region['allow']:
                    return True
                if region['qcode'] != geo_restrictions and not region['allow']:
                    return True
        return False

    def conforms_subscriber_targets(self, subscriber, article):
        """
        Checks if the given article has any target information and if it does
        it checks if the subscriber satisfies any of the target information
        :param subscriber: Subscriber to test
        :param article: article
        :return:
            bool: True/False if the article conforms the targets
            bool: True if the given subscriber is specifically targeted, False otherwise
        """
        # If not targeted at all then Return true
        if not BasePublishService().is_targeted(article, 'target_subscribers') and \
                not BasePublishService().is_targeted(article, 'target_types'):
            return True, False

        subscriber_type = subscriber.get('subscriber_type')

        for t in article.get('target_subscribers', []):
            if str(t.get('_id')) == str(subscriber['_id']):
                return True, True

        if subscriber_type:
            for t in article.get('target_types', []):
                if t['qcode'] == subscriber_type and t['allow']:
                    return True, False
                if t['qcode'] != subscriber_type and not t['allow']:
                    return True, False

        # If there's a region target then continue with the subscriber to check products
        if BasePublishService().is_targeted(article, 'target_regions'):
            return True, False

        # Nothing matches so this subscriber doesn't conform
        return False, False

    def conforms_content_filter(self, product, doc):
        """
        Checks if the document matches the subscriber filter
        :param product: Product where the filter is used
        :param doc: Document to test the filter against
        :return:
        True if there's no filter
        True if matches and permitting
        False if matches and blocking
        False if doesn't match and permitting
        True if doesn't match and blocking
        """
        content_filter = product.get('content_filter')

        if content_filter is None or 'filter_id' not in content_filter or content_filter[
                'filter_id'] is None:
            return True

        service = get_resource_service('content_filters')
        filter = service.find_one(req=None, _id=content_filter['filter_id'])
        does_match = service.does_match(filter, doc)

        if does_match:
            return content_filter['filter_type'] == 'permitting'
        else:
            return content_filter['filter_type'] == 'blocking'

    def conforms_global_filter(self, subscriber, global_filters, doc):
        """
        Checks if subscriber has a override rule against each of the
        global filter and if not checks if document matches the global filter
        :param subscriber: Subscriber to get if the global filter is overriden
        :param global_filters: List of all global filters
        :param doc: Document to test the global filter against
        :return: True if at least one global filter is not overriden
        and it matches the document
        False if global filter matches the document or all of them overriden
        """
        service = get_resource_service('content_filters')
        gfs = subscriber.get('global_filters', {})
        for global_filter in global_filters:
            if gfs.get(str(global_filter[config.ID_FIELD]), True):
                # Global filter applies to this subscriber
                if service.does_match(global_filter, doc):
                    # All global filters behaves like blocking filters
                    return False
        return True

    def _extend_subscriber_items(self, subscriber_items, subscribers, item,
                                 digital_item_id, subscriber_codes):
        """
        Extends the subscriber_items with the given list of subscribers for the item
        :param subscriber_items: The existing list of subscribers
        :param subscribers: New subscribers that item has been published to - to be added
        :param item: item that has been published
        :param digital_item_id: digital_item_id
        """
        item_id = item[config.ID_FIELD]
        for subscriber in subscribers:
            sid = subscriber[config.ID_FIELD]
            item_list = subscriber_items.get(sid, {}).get('items', {})
            item_list[item_id] = digital_item_id
            subscriber_items[sid] = {
                'subscriber': subscriber,
                'items': item_list,
                'codes': subscriber_codes.get(sid, [])
            }

    def _get_codes(self, item):
        if item.get('codes'):
            return [c.strip() for c in item.get('codes').split(',') if c]
        else:
            return []
 def _validate_take(self, original):
     takes_service = TakesPackageService()
     if not takes_service.is_last_takes_package_item(original):
         raise SuperdeskApiError.badRequestError(
             message="Only last take of the package can be spiked.")
Beispiel #24
0
    def _validate_updates(self, original, updates, user):
        """
        Validates updates to the article for the below conditions, if any of them then exception is raised:
            1.  Is article locked by another user other than the user requesting for update
            2.  Is state of the article is Killed?
            3.  Is user trying to update the package with Public Service Announcements?
            4.  Is user authorized to update unique name of the article?
            5.  Is user trying to update the genre of a broadcast article?
            6.  Is article being scheduled and is in a package?
            7.  Is article being scheduled and schedule timestamp is invalid?
            8.  Does article has valid crops if the article type is a picture?
            9.  Is article a valid package if the article type is a package?
            10. Does article has a valid Embargo?
            11. Make sure that there are no duplicate anpa_category codes in the article.
            12. Make sure there are no duplicate subjects in the upadte

        :raises:
            SuperdeskApiError.forbiddenError()
                - if state of the article is killed or user is not authorized to update unique name or if article is
                  locked by another user
            SuperdeskApiError.badRequestError()
                - if Public Service Announcements are being added to a package or genre is being updated for a
                broadcast, is invalid for scheduling, the updates contain duplicate anpa_category or subject codes
        """

        lock_user = original.get('lock_user', None)
        force_unlock = updates.get('force_unlock', False)
        str_user_id = str(user.get(config.ID_FIELD)) if user else None

        if lock_user and str(lock_user) != str_user_id and not force_unlock:
            raise SuperdeskApiError.forbiddenError('The item was locked by another user')

        if original.get(ITEM_STATE) == CONTENT_STATE.KILLED:
            raise SuperdeskApiError.forbiddenError("Item isn't in a valid state to be updated.")

        if updates.get('body_footer') and is_normal_package(original):
            raise SuperdeskApiError.badRequestError("Package doesn't support Public Service Announcements")

        if 'unique_name' in updates and not is_admin(user) \
                and (user['active_privileges'].get('metadata_uniquename', 0) == 0):
            raise SuperdeskApiError.forbiddenError("Unauthorized to modify Unique Name")

        # if broadcast then update to genre is not allowed.
        if original.get('broadcast') and updates.get('genre') and \
                any(genre.get('value', '').lower() != BROADCAST_GENRE.lower() for genre in updates.get('genre')):
            raise SuperdeskApiError.badRequestError('Cannot change the genre for broadcast content.')

        if updates.get('publish_schedule') and original[ITEM_STATE] != CONTENT_STATE.SCHEDULED \
                and datetime.datetime.fromtimestamp(0).date() != updates['publish_schedule'].date():
            if is_item_in_package(original):
                raise SuperdeskApiError.badRequestError(
                    'This item is in a package and it needs to be removed before the item can be scheduled!')

            package = TakesPackageService().get_take_package(original) or {}
            validate_schedule(updates['publish_schedule'], package.get(SEQUENCE, 1))

        if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE:
            CropService().validate_multiple_crops(updates, original)
        elif original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            self.packageService.on_update(updates, original)

        # Do the validation after Circular Reference check passes in Package Service
        updated = original.copy()
        updated.update(updates)
        self.validate_embargo(updated)

        # Ensure that there are no duplicate categories in the update
        category_qcodes = [q['qcode'] for q in updates.get('anpa_category', []) or []]
        if category_qcodes and len(category_qcodes) != len(set(category_qcodes)):
            raise SuperdeskApiError.badRequestError("Duplicate category codes are not allowed")

        # Ensure that there are no duplicate subjects in the update
        subject_qcodes = [q['qcode'] for q in updates.get('subject', []) or []]
        if subject_qcodes and len(subject_qcodes) != len(set(subject_qcodes)):
            raise SuperdeskApiError.badRequestError("Duplicate subjects are not allowed")
Beispiel #25
0
class BasePublishService(BaseService):
    """
    Base service class for "publish" endpoint
    """

    publish_type = 'publish'
    published_state = 'published'

    non_digital = partial(filter, lambda s: s.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE)
    digital = partial(filter, lambda s: (s.get('subscriber_type', '') in {SUBSCRIBER_TYPES.DIGITAL,
                                                                          SUBSCRIBER_TYPES.ALL}))
    takes_package_service = TakesPackageService()
    package_service = PackageService()

    def raise_if_not_marked_for_publication(self, original):
        if original.get('flags', {}).get('marked_for_not_publication', False):
            raise SuperdeskApiError.badRequestError('Cannot publish an item which is marked as Not for Publication')

    def raise_if_invalid_state_transition(self, original):
        if not is_workflow_state_transition_valid(self.publish_type, original[ITEM_STATE]):
            error_message = "Can't {} as item state is {}" if original[ITEM_TYPE] == CONTENT_TYPE.TEXT else \
                "Can't {} as either package state or one of the items state is {}"
            raise InvalidStateTransitionError(error_message.format(self.publish_type, original[ITEM_STATE]))

    def on_update(self, updates, original):
        self.raise_if_not_marked_for_publication(original)
        self.raise_if_invalid_state_transition(original)

        updated = original.copy()
        updated.update(updates)

        takes_package = self.takes_package_service.get_take_package(original)

        if self.publish_type == 'publish':
            # validate if take can be published
            if takes_package and not self.takes_package_service.can_publish_take(
                    takes_package, updates.get(SEQUENCE, original.get(SEQUENCE, 1))):
                raise PublishQueueError.previous_take_not_published_error(
                    Exception("Previous takes are not published."))

            validate_schedule(updated.get('publish_schedule'), takes_package.get(SEQUENCE, 1) if takes_package else 1)

            if original[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE and updates.get(EMBARGO):
                get_resource_service(ARCHIVE).validate_embargo(updated)

        if self.publish_type in ['correct', 'kill']:
            if updates.get(EMBARGO):
                raise SuperdeskApiError.badRequestError("Embargo can't be set after publishing")

            if updates.get('dateline'):
                raise SuperdeskApiError.badRequestError("Dateline can't be modified after publishing")

        validate_item = {'act': self.publish_type, 'type': original['type'], 'validate': updated}
        validation_errors = get_resource_service('validate').post([validate_item])
        if validation_errors[0]:
            raise ValidationError(validation_errors)

        # validate the package if it is one
        package_validation_errors = []
        self._validate_package_contents(original, takes_package, package_validation_errors)
        if len(package_validation_errors) > 0:
            raise ValidationError(package_validation_errors)

        self._set_updates(original, updates, updates.get(config.LAST_UPDATED, utcnow()))
        updates[ITEM_OPERATION] = ITEM_PUBLISH
        convert_task_attributes_to_objectId(updates)

    def on_updated(self, updates, original):
        self.update_published_collection(published_item_id=original[config.ID_FIELD])
        original = get_resource_service(ARCHIVE).find_one(req=None, _id=original[config.ID_FIELD])
        updates.update(original)
        user = get_user()

        if updates[ITEM_OPERATION] != ITEM_KILL and \
                original.get(ITEM_TYPE) in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]:
            get_resource_service('archive_broadcast').on_broadcast_master_updated(updates[ITEM_OPERATION], original)

        get_resource_service('archive_broadcast').reset_broadcast_status(updates, original)
        push_notification('item:updated', item=str(original[config.ID_FIELD]), user=str(user.get(config.ID_FIELD)))
        self._import_into_legal_archive(updates)

    def update(self, id, updates, original):
        """
        Handles workflow of each Publish, Corrected and Killed.
        """
        try:
            user = get_user()
            last_updated = updates.get(config.LAST_UPDATED, utcnow())
            auto_publish = updates.pop('auto_publish', False)

            if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                self._publish_package_items(original, updates)

            queued_digital = False
            package = None

            if original[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE:
                # if target_for is set the we don't to digital client.
                if not (updates.get('targeted_for', original.get('targeted_for')) or
                        is_genre(original, BROADCAST_GENRE)):
                    # check if item is in a digital package
                    package = self.takes_package_service.get_take_package(original)

                    if package:
                        queued_digital = self._publish_takes_package(package, updates, original, last_updated)
                    else:
                        '''
                        If type of the item is text or preformatted
                        then item need to be sent to digital subscribers.
                        So, package the item as a take.
                        '''
                        updated = copy(original)
                        updated.update(updates)

                        if original[ITEM_TYPE] in {CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED} and \
                                self.sending_to_digital_subscribers(updated):
                            # create a takes package
                            package_id = self.takes_package_service.package_story_as_a_take(updated, {}, None)
                            updates[LINKED_IN_PACKAGES] = updated[LINKED_IN_PACKAGES]
                            package = get_resource_service(ARCHIVE).find_one(req=None, _id=package_id)
                            queued_digital = self._publish_takes_package(package, updates, original, last_updated)

                # queue only text items
                media_type = None
                updated = deepcopy(original)
                updated.update(updates)
                if package:
                    media_type = SUBSCRIBER_TYPES.WIRE

                queued_wire = self.publish(doc=original, updates=updates, target_media_type=media_type)

                queued = queued_digital or queued_wire
                if not queued:
                    logger.exception('Nothing is saved to publish queue for story: {} for action: {}'.
                                     format(original[config.ID_FIELD], self.publish_type))

            self._update_archive(original=original, updates=updates, should_insert_into_versions=auto_publish)
            push_notification('item:publish', item=str(id), unique_name=original['unique_name'],
                              desk=str(original.get('task', {}).get('desk', '')),
                              user=str(user.get(config.ID_FIELD, '')))
        except SuperdeskApiError as e:
            raise e
        except KeyError as e:
            raise SuperdeskApiError.badRequestError(
                message="Key is missing on article to be published: {}".format(str(e)))
        except Exception as e:
            logger.exception("Something bad happened while publishing %s".format(id))
            raise SuperdeskApiError.internalError(message="Failed to publish the item: {}".format(str(e)))

    def _publish_takes_package(self, package, updates, original, last_updated):
        """
        Process the takes to form digital master file content and publish.
        :param dict package: Takes package
        :param dict updates: updates for the take
        :param dict original: original takes
        :param datetime.datetime last_updated: datetime for the updates
        :return bool: boolean flag indicating takes package is queued or not
        """

        package_updates = self.process_takes(updates_of_take_to_be_published=updates,
                                             original_of_take_to_be_published=original,
                                             package=package)

        self._set_updates(package, package_updates, last_updated)
        package_updates.setdefault(ITEM_OPERATION, updates.get(ITEM_OPERATION, ITEM_PUBLISH))
        self._update_archive(package, package_updates)
        '''
        When embargo is lapsed and the article should go to Digital Subscribers the BasePublishService creates a
        Takes Package whose state is draft. In this case, we can't initiate post-publish actions on the Takes Package as
        the package hasn't been published. And post-publish service's get_subscribers() will return empty list.
        Also, logically without publishing a package post-publish actions on the item doesn't make sense.
        That's the reason checking the Takes Package state and invoking the appropriate Publish Service.
        '''
        if package[ITEM_STATE] in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]:
            package.update(package_updates)
            queued_digital = self.publish(doc=package, updates=None, target_media_type=SUBSCRIBER_TYPES.DIGITAL)
        else:
            package.update(package_updates)
            queued_digital = get_resource_service('archive_publish').publish(doc=package,
                                                                             updates=None,
                                                                             target_media_type=SUBSCRIBER_TYPES.DIGITAL)

        self.update_published_collection(published_item_id=package[config.ID_FIELD])
        self._import_into_legal_archive(package)
        return queued_digital

    def _import_into_legal_archive(self, doc):
        """
        Import into legal archive async
        :param {dict} doc: document to be imported
        """

        if doc.get(ITEM_STATE) != CONTENT_STATE.SCHEDULED:
            kwargs = {
                'doc': doc
            }
            import_into_legal_archive.apply_async(kwargs=kwargs)

    def _publish_package_items(self, package, updates):
        """
        Publishes all items of a package recursively then publishes the package itself
        :param package: package to publish
        :param updates: payload
        """
        items = self.package_service.get_residrefs(package)

        if len(items) == 0 and self.publish_type == ITEM_PUBLISH:
            raise SuperdeskApiError.badRequestError("Empty package cannot be published!")

        removed_items = []
        if self.publish_type in [ITEM_CORRECT, ITEM_KILL]:
            removed_items, added_items = self._get_changed_items(items, updates)
            # we raise error if correction is done on a empty package. Kill is fine.
            if len(removed_items) == len(items) and len(added_items) == 0 and self.publish_type == ITEM_CORRECT:
                raise SuperdeskApiError.badRequestError("Corrected package cannot be empty!")
            items.extend(added_items)

        subscriber_items = {}

        if items:
            archive_publish = get_resource_service('archive_publish')
            for guid in items:
                package_item = super().find_one(req=None, _id=guid)

                if not package_item:
                    raise SuperdeskApiError.badRequestError(
                        "Package item with id: {} does not exist.".format(guid))

                if package_item[ITEM_STATE] not in PUBLISH_STATES:  # if the item is not published then publish it
                    if package_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                        # if the item is a package do recursion to publish
                        sub_updates = {i: updates[i] for i in ['state', 'operation'] if i in updates}
                        sub_updates['groups'] = list(package_item['groups'])
                        self._publish_package_items(package_item, sub_updates)
                        self._update_archive(original=package_item, updates=sub_updates,
                                             should_insert_into_versions=False)
                        self.update_published_collection(published_item_id=package_item[config.ID_FIELD])
                    else:
                        # publish the item
                        archive_publish.patch(id=package_item.pop(config.ID_FIELD), updates=package_item)

                    insert_into_versions(id_=guid)

                elif guid in removed_items:
                    # remove the package information from the package item.
                    linked_in_packages = [linked for linked in package_item.get(LINKED_IN_PACKAGES)
                                          if linked.get(PACKAGE) != package.get(config.ID_FIELD)]
                    super().system_update(guid, {LINKED_IN_PACKAGES: linked_in_packages}, package_item)

                package_item = super().find_one(req=None, _id=guid)
                subscribers = self._get_subscribers_for_package_item(package_item)
                self.package_service.update_field_in_package(updates, package_item[config.ID_FIELD],
                                                             config.VERSION, package_item[config.VERSION])

                if package_item[config.ID_FIELD] in removed_items:
                    digital_item_id = None
                else:
                    digital_item_id = self._get_digital_id_for_package_item(package_item)

                self._extend_subscriber_items(subscriber_items, subscribers, package_item, digital_item_id)

            self.publish_package(package, updates, target_subscribers=subscriber_items)

    def _extend_subscriber_items(self, subscriber_items, subscribers, item, digital_item_id):
        """
        Extends the subscriber_items with the given list of subscribers for the item
        :param subscriber_items: The existing list of subscribers
        :param subscribers: New subscribers that item has been published to - to be added
        :param item: item that has been published
        :param digital_item_id: digital_item_id
        """
        item_id = item[config.ID_FIELD]
        for subscriber in subscribers:
            sid = subscriber[config.ID_FIELD]
            item_list = subscriber_items.get(sid, {}).get('items', {})
            item_list[item_id] = digital_item_id
            subscriber_items[sid] = {'subscriber': subscriber, 'items': item_list}

    def _get_changed_items(self, existing_items, updates):
        """
        Returns the added and removed items from existing_items
        :param existing_items: Existing list
        :param updates: Changes
        :return: list of removed items and list of added items
        """
        if 'groups' in updates:
            new_items = self.package_service.get_residrefs(updates)
            removed_items = list(set(existing_items) - set(new_items))
            added_items = list(set(new_items) - set(existing_items))
            return removed_items, added_items
        else:
            return [], []

    def _get_digital_id_for_package_item(self, package_item):
        """
        Finds the digital item id for a given item in a package
        :param package_item: item in a package
        :return string: Digital item id if there's one otherwise id of package_item
        """
        if package_item[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]:
            return package_item[config.ID_FIELD]
        else:
            package_item_takes_package_id = self.takes_package_service.get_take_package_id(package_item)
            if not package_item_takes_package_id:
                return package_item[config.ID_FIELD]
            return package_item_takes_package_id

    def _get_subscribers_for_package_item(self, package_item):
        """
        Finds the list of subscribers for a given item in a package
        :param package_item: item in a package
        :return list: List of subscribers
        :return string: Digital item id if there's one otherwise None
        """
        if package_item[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]:
            query = {'$and': [{'item_id': package_item[config.ID_FIELD]},
                              {'publishing_action': package_item[ITEM_STATE]}]}
        else:
            package_item_takes_package = self.takes_package_service.get_take_package(package_item)
            if not package_item_takes_package:
                # this item has not been published to digital subscribers so
                # the list of subscribers are empty
                return []

            query = {'$and': [{'item_id': package_item_takes_package[config.ID_FIELD]},
                              {'publishing_action': package_item_takes_package[ITEM_STATE]}]}

        return self._get_subscribers_for_previously_sent_items(query)

    def _set_updates(self, original, updates, last_updated):
        """
        Sets config.VERSION, config.LAST_UPDATED, ITEM_STATE in updates document.
        If item is being published and embargo is available then append Editorial Note with 'Embargoed'.

        :param dict original: original document
        :param dict updates: updates related to the original document
        :param datetime last_updated: datetime of the updates.
        """

        self.set_state(original, updates)
        updates.setdefault(config.LAST_UPDATED, last_updated)

        if original[config.VERSION] == updates.get(config.VERSION, original[config.VERSION]):
            resolve_document_version(document=updates, resource=ARCHIVE, method='PATCH', latest_doc=original)

        if updates.get(EMBARGO, original.get(EMBARGO)) \
                and updates.get('ednote', original.get('ednote', '')).find('Embargo') == -1:
            updates['ednote'] = '{} {}'.format(original.get('ednote', ''), 'Embargoed.').strip()

    def _update_archive(self, original, updates, versioned_doc=None, should_insert_into_versions=True):
        """
        Updates the articles into archive collection and inserts the latest into archive_versions.
        Also clears autosaved versions if any.
        :param: versioned_doc: doc which can be inserted into archive_versions
        :param: should_insert_into_versions if True inserts the latest document into versions collection
        """

        self.backend.update(self.datasource, original[config.ID_FIELD], updates, original)

        if should_insert_into_versions:
            if versioned_doc is None:
                insert_into_versions(id_=original[config.ID_FIELD])
            else:
                insert_into_versions(doc=versioned_doc)

        get_component(ItemAutosave).clear(original[config.ID_FIELD])

    def set_state(self, original, updates):
        """
        Set the state of the document based on the action (publish, correction, kill)
        :param dict original: original document
        :param dict updates: updates related to document
        """
        updates['publish_schedule'] = None
        updates[ITEM_STATE] = self.published_state

    def process_takes(self, updates_of_take_to_be_published, package, original_of_take_to_be_published=None):
        """
        Primary rule for publishing a Take in Takes Package is: all previous takes must be published before a take
        can be published.

        Also, generates body_html of the takes package and make sure the metadata for the package is the same as the
        metadata of the take to be published.

        :param dict updates_of_take_to_be_published: updates for the take to be published
        :param dict package: Takes package to publish
        :param dict original_of_take_to_be_published: original of the take to be published
        :return: Takes Package Updates
        """

        takes = self.takes_package_service.get_published_takes(package)
        body_html = updates_of_take_to_be_published.get('body_html',
                                                        original_of_take_to_be_published.get('body_html', ''))
        package_updates = {}

        groups = package.get(GROUPS, [])
        if groups:
            take_refs = [ref for group in groups if group['id'] == 'main' for ref in group.get('refs')]
            sequence_num_of_take_to_be_published = 0

            take_article_id = updates_of_take_to_be_published.get(
                config.ID_FIELD, original_of_take_to_be_published[config.ID_FIELD])

            for r in take_refs:
                if r[GUID_FIELD] == take_article_id:
                    sequence_num_of_take_to_be_published = r[SEQUENCE]
                    break

            if takes and self.published_state != 'killed':
                body_html_list = [take.get('body_html', '') for take in takes]
                if self.published_state == 'published':
                    body_html_list.append(body_html)
                else:
                    body_html_list[sequence_num_of_take_to_be_published - 1] = body_html

                package_updates['body_html'] = '<br>'.join(body_html_list)
            else:
                package_updates['body_html'] = body_html

            metadata_tobe_copied = self.takes_package_service.fields_for_creating_take.copy()
            metadata_tobe_copied.extend(['publish_schedule', 'byline'])
            updated_take = original_of_take_to_be_published.copy()
            updated_take.update(updates_of_take_to_be_published)
            metadata_from = updated_take
            if self.published_state == 'corrected' and len(takes) > 1:
                # get the last take metadata only if there are more than one takes
                metadata_from = takes[-1]

            for metadata in metadata_tobe_copied:
                if metadata in metadata_from:
                    package_updates[metadata] = metadata_from.get(metadata)

            package_updates[GROUPS] = groups
            self.package_service.update_field_in_package(package_updates,
                                                         original_of_take_to_be_published[config.ID_FIELD],
                                                         config.VERSION,
                                                         updates_of_take_to_be_published[config.VERSION])

        return package_updates

    def publish_package(self, package, updates, target_subscribers):
        """
        Publishes a given non-take package to given subscribers.
        For each subscriber updates the package definition with the wanted_items for that subscriber
        and removes unwanted_items that doesn't supposed to go that subscriber.
        Text stories are replaced by the digital versions.
        :param package: Package to be published
        :param updates: Updates to the package
        :param target_subscribers: List of subscriber and items-per-subscriber
        """
        self._process_publish_updates(package, updates)
        all_items = self.package_service.get_residrefs(package)
        for items in target_subscribers.values():
            updated = deepcopy(package)
            updates_copy = deepcopy(updates)
            updated.update(updates_copy)
            subscriber = items['subscriber']
            wanted_items = [item for item in items['items'] if items['items'].get(item, None)]
            unwanted_items = [item for item in all_items if item not in wanted_items]
            for i in unwanted_items:
                still_items_left = self.package_service.remove_ref_from_inmem_package(updated, i)
                if not still_items_left and self.publish_type != 'correct':
                    # if nothing left in the package to be published and
                    # if not correcting then don't send the package
                    return
            for key in wanted_items:
                self.package_service.replace_ref_in_package(updated, key, items['items'][key])
            self.queue_transmission(updated, [subscriber])

    def _process_publish_updates(self, doc, updates):
        """ Common updates for published items """
        desk = None
        if doc.get('task', {}).get('desk'):
            desk = get_resource_service('desks').find_one(req=None, _id=doc['task']['desk'])
        if not doc.get('ingest_provider'):
            updates['source'] = desk['source'] if desk and desk.get('source', '') \
                else DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES
        updates['pubstatus'] = PUB_STATUS.CANCELED if self.publish_type == 'kill' else PUB_STATUS.USABLE

    def publish(self, doc, updates, target_media_type=None):
        """
        Queue the content for publishing.
        1. Sets the Metadata Properties - source and pubstatus
        2. Get the subscribers.
        3. Update the headline of wire stories with the sequence
        4. Queue the content for subscribers
        5. Queue the content for previously published subscribers if any.
        6. Sends notification if no formatter has found for any of the formats configured in Subscriber.
        7. If not queued and not formatters then raise exception.
        :param dict doc: document to publish
        :param dict updates: updates for the document
        :param str target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article.
                Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire
                then the doc being queues is an Individual Article.
        :param dict target_subscribers: list of subscribers that document needs to get sent
        :return bool: if content is queued then True else False
        :raises PublishQueueError.item_not_queued_error:
                If the nothing is queued.
        """

        queued = True
        no_formatters = []
        updated = doc.copy()

        # Step 1
        if updates:
            self._process_publish_updates(doc, updates)
            updated.update(updates)

        # Step 2
        subscribers, subscribers_yet_to_receive = self.get_subscribers(doc, target_media_type)

        # Step 3
        if target_media_type == SUBSCRIBER_TYPES.WIRE:
            self._update_headline_sequence(updated)

        # Step 4
        no_formatters, queued = self.queue_transmission(updated, subscribers)

        # Step 5
        if subscribers_yet_to_receive:
            formatters_not_found, queued_new_subscribers = self.queue_transmission(updated, subscribers_yet_to_receive)
            no_formatters.extend(formatters_not_found)
            queued = queued or queued_new_subscribers

        # Step 6
        user = get_user()
        if len(no_formatters) > 0:
            push_notification('item:publish:wrong:format',
                              item=str(doc[config.ID_FIELD]), unique_name=doc['unique_name'],
                              desk=str(doc.get('task', {}).get('desk', '')),
                              user=str(user.get(config.ID_FIELD, '')),
                              formats=no_formatters)

        # Step 7
        if not target_media_type and not queued:
            logger.exception('Nothing is saved to publish queue for story: {} for action: {}'.
                             format(doc[config.ID_FIELD], self.publish_type))

        return queued

    def sending_to_digital_subscribers(self, doc):
        """
        Returns False if item has embargo and is in future.
        Returns True if there is a digital subscriber either in the previously sent or in yet to be sent subscribers

        :param doc: document
        :return bool: True if there's at least one
        """

        if doc.get(EMBARGO) and doc.get(EMBARGO) > utcnow():
            return False

        subscribers, subscribers_yet_to_receive = self.get_subscribers(doc, SUBSCRIBER_TYPES.DIGITAL)
        subscribers = list(self.digital(subscribers))
        subscribers_yet_to_receive = list(self.digital(subscribers_yet_to_receive))
        return len(subscribers) > 0 or len(subscribers_yet_to_receive) > 0

    def get_subscribers(self, doc, target_media_type):
        """
        Get subscribers for doc based on target_media_type.
        Override this method in the ArchivePublishService, ArchiveCorrectService and ArchiveKillService
        :param doc: Document to publish/correct/kill
        :param target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article.
                Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire
                then the doc being queues is an Individual Article.
        :return: (list, list) List of filtered subscriber,
                List of subscribers that have not received item previously (empty list in this case).
        """
        raise NotImplementedError()

    def _get_subscribers_for_previously_sent_items(self, lookup):
        """
        Returns list of subscribers that have previously received the item.
        :param dict lookup: elastic query to filter the publish queue
        :return: list of subscribers
        """
        req = ParsedRequest()
        subscribers = []
        queued_items = get_resource_service('publish_queue').get(req=req, lookup=lookup)
        if queued_items.count():
            subscriber_ids = {queued_item['subscriber_id'] for queued_item in queued_items}
            query = {'$and': [{config.ID_FIELD: {'$in': list(subscriber_ids)}}]}
            subscribers = list(get_resource_service('subscribers').get(req=None, lookup=query))
        return subscribers

    def filter_subscribers(self, doc, subscribers, target_media_type):
        """
        Filter subscribers to whom the current document is going to be delivered.
        :param doc: Document to publish/kill/correct
        :param subscribers: List of Subscribers that might potentially get this document
        :param target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article.
                Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire
                then the doc being queues is an Individual Article.
        :return: List of of filtered subscriber.
        """
        filtered_subscribers = []
        req = ParsedRequest()
        req.args = {'is_global': True}
        service = get_resource_service('content_filters')
        global_filters = list(service.get(req=req, lookup=None))

        for subscriber in subscribers:
            if target_media_type and subscriber.get('subscriber_type', '') != SUBSCRIBER_TYPES.ALL:
                can_send_takes_packages = subscriber['subscriber_type'] == SUBSCRIBER_TYPES.DIGITAL
                if target_media_type == SUBSCRIBER_TYPES.WIRE and can_send_takes_packages or \
                        target_media_type == SUBSCRIBER_TYPES.DIGITAL and not can_send_takes_packages:
                    continue

            if doc.get('targeted_for'):
                found_match = [t for t in doc['targeted_for'] if t['name'] == subscriber.get('subscriber_type', '')]

                if len(found_match) == 0 and subscriber.get('geo_restrictions'):
                    found_match = [t for t in doc['targeted_for'] if t['name'] == subscriber['geo_restrictions']]
                    if len(found_match) == 0 or found_match[0]['allow'] is False:
                        continue
                elif len(found_match) > 0 and found_match[0]['allow'] is False:
                    continue

            if not self.conforms_global_filter(subscriber, global_filters, doc):
                continue

            if not self.conforms_content_filter(subscriber, doc):
                continue

            filtered_subscribers.append(subscriber)

        return filtered_subscribers

    def queue_transmission(self, doc, subscribers):
        """
        Method formats and then queues the article for transmission to the passed subscribers.
        ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once
        based on the format_types configured across for all the subscribers as the formatted item must have a published
        sequence number generated by Subscriber.
        :param dict doc: document to queue for transmission
        :param list subscribers: List of subscriber dict.
        :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False
        """

        try:
            queued = False
            no_formatters = []
            for subscriber in subscribers:
                try:
                    if doc[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] and \
                            subscriber.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE:
                        # wire subscribers can get only text and preformatted stories
                        continue

                    for destination in subscriber['destinations']:
                        # Step 2(a)
                        formatter = get_formatter(destination['format'], doc)

                        if not formatter:  # if formatter not found then record it
                            no_formatters.append(destination['format'])
                            continue

                        formatted_docs = formatter.format(doc, subscriber)

                        for pub_seq_num, formatted_doc in formatted_docs:
                            publish_queue_item = dict()
                            publish_queue_item['item_id'] = doc['_id']
                            publish_queue_item['item_version'] = doc[config.VERSION]
                            publish_queue_item['formatted_item'] = formatted_doc
                            publish_queue_item['subscriber_id'] = subscriber['_id']
                            publish_queue_item['destination'] = destination
                            publish_queue_item['published_seq_num'] = pub_seq_num
                            publish_queue_item['publish_schedule'] = doc.get('publish_schedule', None)
                            publish_queue_item['unique_name'] = doc.get('unique_name', None)
                            publish_queue_item['content_type'] = doc.get('type', None)
                            publish_queue_item['headline'] = doc.get('headline', None)

                            self.set_state(doc, publish_queue_item)
                            if publish_queue_item.get(ITEM_STATE):
                                publish_queue_item['publishing_action'] = publish_queue_item.get(ITEM_STATE)
                                del publish_queue_item[ITEM_STATE]
                            else:
                                publish_queue_item['publishing_action'] = self.published_state

                            get_resource_service('publish_queue').post([publish_queue_item])
                            queued = True
                except:
                    logger.exception("Failed to queue item for id {} with headline {} for subscriber {}."
                                     .format(doc.get(config.ID_FIELD), doc.get('headline'), subscriber.get('name')))

            return no_formatters, queued
        except:
            raise

    def update_published_collection(self, published_item_id):
        """
        Updates the published collection with the published item.
        Set the last_published_version to false for previous versions of the published items.
        :param: str published_item_id: _id of the document.
        """
        published_item = super().find_one(req=None, _id=published_item_id)
        published_item = copy(published_item)
        get_resource_service('published').update_published_items(published_item_id, LAST_PUBLISHED_VERSION, False)
        get_resource_service('published').post([published_item])

    def conforms_content_filter(self, subscriber, doc):
        """
        Checks if the document matches the subscriber filter
        :param subscriber: Subscriber to get the filter
        :param doc: Document to test the filter against
        :return:
        True if there's no filter
        True if matches and permitting
        False if matches and blocking
        False if doesn't match and permitting
        True if doesn't match and blocking
        """
        content_filter = subscriber.get('content_filter')

        if content_filter is None or 'filter_id' not in content_filter or content_filter['filter_id'] is None:
            return True

        service = get_resource_service('content_filters')
        filter = service.find_one(req=None, _id=content_filter['filter_id'])
        does_match = service.does_match(filter, doc)

        if does_match:
            return content_filter['filter_type'] == 'permitting'
        else:
            return content_filter['filter_type'] == 'blocking'

    def conforms_global_filter(self, subscriber, global_filters, doc):
        """
        Checks if subscriber has a override rule against each of the
        global filter and if not checks if document matches the global filter
        :param subscriber: Subscriber to get if the global filter is overriden
        :param global_filters: List of all global filters
        :param doc: Document to test the global filter against
        :return: True if at least one global filter is not overriden
        and it matches the document
        False if global filter matches the document or all of them overriden
        """
        service = get_resource_service('content_filters')
        gfs = subscriber.get('global_filters', {})
        for global_filter in global_filters:
            if gfs.get(str(global_filter['_id']), True):
                # Global filter applies to this subscriber
                if service.does_match(global_filter, doc):
                    # All global filters behaves like blocking filters
                    return False
        return True

    def _update_headline_sequence(self, doc):
        """ Updates the headline of the text story if there's any sequence value in it """
        if doc.get(SEQUENCE):
            doc['headline'] = '{}={}'.format(doc['headline'], doc.get(SEQUENCE))

    def _validate_package_contents(self, package, takes_package, validation_errors=[]):
        """
        If the item passed is a package this function will ensure that the unpublished content validates and none of
        the content is locked by other than the publishing session, also do not allow any killed or spiked content

        :param package:
        :param takes_package:
        :param validation_errors: validation errors are appended if there are any.
        """
        # Ensure it is the sort of thing we need to validate
        if package[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and not takes_package and self.publish_type == ITEM_PUBLISH:
            items = self.package_service.get_residrefs(package)

            # make sure package is not scheduled or spiked
            if package[ITEM_STATE] in (CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED):
                validation_errors.append('Package cannot be {}'.format(package[ITEM_STATE]))

            if package.get(EMBARGO):
                validation_errors.append('Package cannot have Embargo')

            if items:
                for guid in items:
                    doc = super().find_one(req=None, _id=guid)

                    if package[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                        digital = self.takes_package_service.get_take_package(doc) or {}
                        self._validate_package_contents(doc, digital, validation_errors)

                    # make sure no items are killed or spiked or scheduled
                    if doc[ITEM_STATE] in (CONTENT_STATE.KILLED, CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED):
                        validation_errors.append('Package cannot contain {} item'.format(doc[ITEM_STATE]))

                    if doc.get(EMBARGO):
                        validation_errors.append('Package cannot have Items with Embargo')

                    # don't validate items that already have published
                    if doc[ITEM_STATE] not in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]:
                        validate_item = {'act': self.publish_type, 'type': doc[ITEM_TYPE], 'validate': doc}
                        errors = get_resource_service('validate').post([validate_item], headline=True)
                        if errors[0]:
                            validation_errors.extend(errors[0])

                    # check the locks on the items
                    if doc.get('lock_session', None) and package['lock_session'] != doc['lock_session']:
                        validation_errors.extend(['{}: packaged item cannot be locked'.format(doc['headline'])])
    def format(self, article, subscriber, codes=None):
        try:
            docs = []
            formatted_article = deepcopy(article)
            for category in self._get_category_list(formatted_article.get('anpa_category')):
                mapped_source = self._get_mapped_source(formatted_article)
                formatted_article[config.ID_FIELD] = formatted_article.get('item_id',
                                                                           formatted_article.get(config.ID_FIELD))
                is_last_take = TakesPackageService().is_last_takes_package_item(formatted_article)
                is_first_part = formatted_article.get('sequence', 1) == 1
                pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
                anpa = []

                if codes:
                    anpa.append(b'\x05')
                    anpa.append(' '.join(codes).encode('ascii'))
                    anpa.append(b'\x0D\x0A')

                # start of message header (syn syn soh)
                anpa.append(b'\x16\x16\x01')
                anpa.append(get_service_level(category, formatted_article).encode('ascii'))

                # story number
                anpa.append(str(pub_seq_num).zfill(4).encode('ascii'))

                # field seperator
                anpa.append(b'\x0A')  # -LF
                anpa.append(map_priority(formatted_article.get('priority')).encode('ascii'))
                anpa.append(b'\x20')

                anpa.append(category['qcode'].lower().encode('ascii'))

                anpa.append(b'\x13')
                # format identifier
                if formatted_article.get(FORMAT, FORMATS.HTML) == FORMATS.PRESERVED:
                    anpa.append(b'\x12')
                else:
                    anpa.append(b'\x11')
                anpa.append(b'\x20')

                # keyword
                keyword = 'bc-{}'.format(self.append_legal(article=formatted_article, truncate=True)).replace(' ', '-')
                keyword = keyword[:24] if len(keyword) > 24 else keyword
                anpa.append(keyword.encode('ascii'))
                anpa.append(b'\x20')

                # version field
                anpa.append(b'\x20')

                # reference field
                anpa.append(b'\x20')

                # filing date
                anpa.append('{}-{}'.format(formatted_article['_updated'].strftime('%m'),
                                           formatted_article['_updated'].strftime('%d')).encode('ascii'))
                anpa.append(b'\x20')

                # add the word count
                anpa.append(str(formatted_article.get('word_count', '0000')).zfill(4).encode('ascii'))
                anpa.append(b'\x0D\x0A')

                anpa.append(b'\x02')  # STX

                self._process_headline(anpa, formatted_article, category['qcode'].encode('ascii'))

                keyword = SluglineMapper().map(article=formatted_article, category=category['qcode'].upper(),
                                               truncate=True).encode('ascii', 'ignore')
                anpa.append(keyword)
                take_key = (formatted_article.get('anpa_take_key', '') or '').encode('ascii', 'ignore')
                anpa.append((b'\x20' + take_key) if len(take_key) > 0 else b'')
                anpa.append(b'\x0D\x0A')

                if formatted_article.get(EMBARGO):
                    embargo = '{}{}\r\n'.format('Embargo Content. Timestamp: ',
                                                get_utc_schedule(formatted_article, EMBARGO).isoformat())
                    anpa.append(embargo.encode('ascii', 'replace'))

                if formatted_article.get('ednote', '') != '':
                    ednote = '{}\r\n'.format(to_ascii(formatted_article.get('ednote')))
                    anpa.append(ednote.encode('ascii', 'replace'))

                if formatted_article.get(BYLINE):
                    anpa.append(BeautifulSoup(formatted_article.get(BYLINE), 'html.parser').text.encode
                                ('ascii', 'ignore'))
                    anpa.append(b'\x0D\x0A')

                if formatted_article.get(FORMAT) == FORMATS.PRESERVED:
                    soup = BeautifulSoup(self.append_body_footer(formatted_article), "html.parser")
                    anpa.append(soup.get_text().encode('ascii', 'replace'))
                else:
                    body = to_ascii(formatted_article.get('body_html', ''))
                    # we need to inject the dateline
                    if is_first_part and formatted_article.get('dateline', {}).get('text') \
                            and not article.get('auto_publish', False):
                        soup = BeautifulSoup(body, "html.parser")
                        ptag = soup.find('p')
                        if ptag is not None:
                            ptag.insert(0, NavigableString(
                                '{} '.format(formatted_article.get('dateline').get('text')).encode('ascii', 'ignore')))
                            body = str(soup)
                    anpa.append(self.get_text_content(body))
                    if formatted_article.get('body_footer'):
                        anpa.append(self.get_text_content(to_ascii(formatted_article.get('body_footer', ''))))

                anpa.append(b'\x0D\x0A')
                if not is_last_take:
                    anpa.append('MORE'.encode('ascii'))
                else:
                    anpa.append(mapped_source.encode('ascii'))
                sign_off = (formatted_article.get('sign_off', '') or '').encode('ascii')
                anpa.append((b'\x20' + sign_off) if len(sign_off) > 0 else b'')
                anpa.append(b'\x0D\x0A')

                anpa.append(b'\x03')  # ETX

                # time and date
                anpa.append(datetime.datetime.now().strftime('%d-%m-%y %H-%M-%S').encode('ascii'))

                anpa.append(b'\x04')  # EOT
                anpa.append(b'\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A')

                docs.append({'published_seq_num': pub_seq_num, 'encoded_item': b''.join(anpa),
                             'formatted_item': b''.join(anpa).decode('ascii')})

            return docs
        except Exception as ex:
            raise FormatterError.AnpaFormatterError(ex, subscriber)
Beispiel #27
0
class ArchiveService(BaseService):
    packageService = PackageService()
    takesService = TakesPackageService()
    mediaService = ArchiveMediaService()

    def on_fetched(self, docs):
        """
        Overriding this to handle existing data in Mongo & Elastic
        """
        self.__enhance_items(docs[config.ITEMS])

    def on_fetched_item(self, doc):
        self.__enhance_items([doc])

    def __enhance_items(self, items):
        for item in items:
            handle_existing_data(item)
            self.takesService.enhance_with_package_info(item)

    def on_create(self, docs):
        on_create_item(docs)

        for doc in docs:
            doc['version_creator'] = doc['original_creator']
            remove_unwanted(doc)
            update_word_count(doc)
            set_item_expiry({}, doc)

            if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                self.packageService.on_create([doc])

            # Do the validation after Circular Reference check passes in Package Service
            self.validate_embargo(doc)

            if doc.get('media'):
                self.mediaService.on_create([doc])

            # let client create version 0 docs
            if doc.get('version') == 0:
                doc[config.VERSION] = doc['version']

            if not doc.get('ingest_provider'):
                doc['source'] = DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES

    def on_created(self, docs):
        packages = [
            doc for doc in docs if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE
        ]
        if packages:
            self.packageService.on_created(packages)

        for doc in docs:
            subject = get_subject(doc)
            if subject:
                msg = 'added new {{ type }} item about "{{ subject }}"'
            else:
                msg = 'added new {{ type }} item with empty header/title'
            add_activity(ACTIVITY_CREATE,
                         msg,
                         self.datasource,
                         item=doc,
                         type=doc[ITEM_TYPE],
                         subject=subject)
        push_content_notification(docs)

    def on_update(self, updates, original):
        updates[ITEM_OPERATION] = ITEM_UPDATE
        is_update_allowed(original)
        user = get_user()

        if 'publish_schedule' in updates and original['state'] == 'scheduled':
            # this is an deschedule action
            self.deschedule_item(updates, original)
            # check if there is a takes package and deschedule the takes package.
            package = TakesPackageService().get_take_package(original)
            if package and package.get('state') == 'scheduled':
                package_updates = {
                    'publish_schedule': None,
                    'groups': package.get('groups')
                }
                self.patch(package.get(config.ID_FIELD), package_updates)
            return

        if updates.get('publish_schedule'):

            if datetime.datetime.fromtimestamp(0).date() == updates.get(
                    'publish_schedule').date():
                # publish_schedule field will be cleared
                updates['publish_schedule'] = None
            else:
                # validate the schedule
                if is_item_in_package(original):
                    raise SuperdeskApiError.\
                        badRequestError(message='This item is in a package' +
                                                ' it needs to be removed before the item can be scheduled!')
                package = TakesPackageService().get_take_package(
                    original) or {}
                validate_schedule(updates.get('publish_schedule'),
                                  package.get(SEQUENCE, 1))

        if 'unique_name' in updates and not is_admin(user) \
                and (user['active_privileges'].get('metadata_uniquename', 0) == 0):
            raise SuperdeskApiError.forbiddenError(
                "Unauthorized to modify Unique Name")

        remove_unwanted(updates)

        if self.__is_req_for_save(updates):
            update_state(original, updates)

        lock_user = original.get('lock_user', None)
        force_unlock = updates.get('force_unlock', False)

        updates.setdefault('original_creator',
                           original.get('original_creator'))

        str_user_id = str(user.get('_id')) if user else None
        if lock_user and str(lock_user) != str_user_id and not force_unlock:
            raise SuperdeskApiError.forbiddenError(
                'The item was locked by another user')

        updates['versioncreated'] = utcnow()
        set_item_expiry(updates, original)
        updates['version_creator'] = str_user_id
        set_sign_off(updates, original=original)
        update_word_count(updates)

        if force_unlock:
            del updates['force_unlock']

        # create crops
        crop_service = ArchiveCropService()
        crop_service.validate_multiple_crops(updates, original)
        crop_service.create_multiple_crops(updates, original)

        if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            self.packageService.on_update(updates, original)

        update_version(updates, original)

        # Do the validation after Circular Reference check passes in Package Service
        updated = original.copy()
        updated.update(updates)
        self.validate_embargo(updated)

    def on_updated(self, updates, original):
        get_component(ItemAutosave).clear(original['_id'])

        if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            self.packageService.on_updated(updates, original)

        ArchiveCropService().delete_replaced_crop_files(updates, original)

        updated = copy(original)
        updated.update(updates)

        if config.VERSION in updates:
            add_activity(
                ACTIVITY_UPDATE,
                'created new version {{ version }} for item {{ type }} about "{{ subject }}"',
                self.datasource,
                item=updated,
                version=updates[config.VERSION],
                subject=get_subject(updates, original),
                type=updated[ITEM_TYPE])

        push_content_notification([updated, original])

    def on_replace(self, document, original):
        document[ITEM_OPERATION] = ITEM_UPDATE
        remove_unwanted(document)
        user = get_user()
        lock_user = original.get('lock_user', None)
        force_unlock = document.get('force_unlock', False)
        user_id = str(user.get('_id'))
        if lock_user and str(lock_user) != user_id and not force_unlock:
            raise SuperdeskApiError.forbiddenError(
                'The item was locked by another user')
        document['versioncreated'] = utcnow()
        set_item_expiry(document, original)
        document['version_creator'] = user_id
        if force_unlock:
            del document['force_unlock']

    def on_replaced(self, document, original):
        get_component(ItemAutosave).clear(original['_id'])
        add_activity(ACTIVITY_UPDATE,
                     'replaced item {{ type }} about {{ subject }}',
                     self.datasource,
                     item=original,
                     type=original['type'],
                     subject=get_subject(original))
        push_content_notification([document, original])

    def on_deleted(self, doc):
        if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            self.packageService.on_deleted(doc)

        remove_media_files(doc)

        add_activity(ACTIVITY_DELETE,
                     'removed item {{ type }} about {{ subject }}',
                     self.datasource,
                     item=doc,
                     type=doc[ITEM_TYPE],
                     subject=get_subject(doc))
        push_content_notification([doc])

    def replace(self, id, document, original):
        return self.restore_version(id, document, original) or super().replace(
            id, document, original)

    def find_one(self, req, **lookup):
        item = super().find_one(req, **lookup)

        if item and str(item.get('task', {}).get('stage', '')) in \
                get_resource_service('users').get_invisible_stages_ids(get_user().get('_id')):
            raise SuperdeskApiError.forbiddenError(
                "User does not have permissions to read the item.")

        handle_existing_data(item)
        return item

    def restore_version(self, id, doc, original):
        item_id = id
        old_version = int(doc.get('old_version', 0))
        last_version = int(doc.get('last_version', 0))
        if (not all([item_id, old_version, last_version])):
            return None

        old = get_resource_service('archive_versions').find_one(
            req=None, _id_document=item_id, _current_version=old_version)
        if old is None:
            raise SuperdeskApiError.notFoundError('Invalid version %s' %
                                                  old_version)

        curr = get_resource_service(SOURCE).find_one(req=None, _id=item_id)
        if curr is None:
            raise SuperdeskApiError.notFoundError('Invalid item id %s' %
                                                  item_id)

        if curr[config.VERSION] != last_version:
            raise SuperdeskApiError.preconditionFailedError(
                'Invalid last version %s' % last_version)

        old['_id'] = old['_id_document']
        old['_updated'] = old['versioncreated'] = utcnow()
        set_item_expiry(old, doc)
        del old['_id_document']
        old[ITEM_OPERATION] = ITEM_RESTORE

        resolve_document_version(old, SOURCE, 'PATCH', curr)
        remove_unwanted(old)
        set_sign_off(updates=old, original=curr)

        super().replace(id=item_id, document=old, original=curr)

        del doc['old_version']
        del doc['last_version']
        doc.update(old)
        return item_id

    def duplicate_content(self, original_doc):
        """
        Duplicates the 'original_doc' including it's version history. Copy and Duplicate actions use this method.

        :return: guid of the duplicated article
        """

        if original_doc.get(ITEM_TYPE, '') == CONTENT_TYPE.COMPOSITE:
            for groups in original_doc.get('groups'):
                if groups.get('id') != 'root':
                    associations = groups.get('refs', [])
                    for assoc in associations:
                        if assoc.get(RESIDREF):
                            item, _item_id, _endpoint = self.packageService.get_associated_item(
                                assoc)
                            assoc[RESIDREF] = assoc[
                                'guid'] = self.duplicate_content(item)

        return self._duplicate_item(original_doc)

    def _duplicate_item(self, original_doc):
        """
        Duplicates the 'original_doc' including it's version history. If the article being duplicated is contained
        in a desk then the article state is changed to Submitted.

        :return: guid of the duplicated article
        """

        new_doc = original_doc.copy()
        self._remove_after_copy(new_doc)

        new_doc[ITEM_OPERATION] = ITEM_DUPLICATE
        item_model = get_model(ItemModel)

        on_duplicate_item(new_doc)
        resolve_document_version(new_doc, SOURCE, 'PATCH', new_doc)
        if original_doc.get('task', {}).get(
                'desk') is not None and new_doc.get('state') != 'submitted':
            new_doc[ITEM_STATE] = CONTENT_STATE.SUBMITTED
        item_model.create([new_doc])
        self._duplicate_versions(original_doc['guid'], new_doc)

        return new_doc['guid']

    def _remove_after_copy(self, copied_item):
        """
        Removes the properties which doesn't make sense to have for an item after copy.
        """

        del copied_item[config.ID_FIELD]
        del copied_item['guid']
        copied_item.pop(LINKED_IN_PACKAGES, None)
        copied_item.pop(EMBARGO, None)
        copied_item.pop('publish_schedule', None)

    def _duplicate_versions(self, old_id, new_doc):
        """
        Duplicates the version history of the article identified by old_id. Each version identifiers are changed
        to have the identifiers of new_doc.

        :param old_id: identifier to fetch version history
        :param new_doc: identifiers from this doc will be used to create version history for the duplicated item.
        """
        resource_def = app.config['DOMAIN']['archive']
        version_id = versioned_id_field(resource_def)
        old_versions = get_resource_service('archive_versions').get(
            req=None, lookup={'guid': old_id})

        new_versions = []
        for old_version in old_versions:
            old_version[version_id] = new_doc[config.ID_FIELD]
            del old_version[config.ID_FIELD]

            old_version['guid'] = new_doc['guid']
            old_version['unique_name'] = new_doc['unique_name']
            old_version['unique_id'] = new_doc['unique_id']
            old_version['versioncreated'] = utcnow()
            if old_version[VERSION] == new_doc[VERSION]:
                old_version[ITEM_OPERATION] = new_doc[ITEM_OPERATION]
            new_versions.append(old_version)
        last_version = deepcopy(new_doc)
        last_version['_id_document'] = new_doc['_id']
        del last_version['_id']
        new_versions.append(last_version)
        if new_versions:
            get_resource_service('archive_versions').post(new_versions)

    def deschedule_item(self, updates, doc):
        """
        Deschedule an item. This operation removed the item from publish queue and published collection.
        :param dict updates: updates for the document
        :param doc: original is document.
        """
        updates['state'] = 'in_progress'
        updates['publish_schedule'] = None
        updates[ITEM_OPERATION] = ITEM_DESCHEDULE
        # delete entries from publish queue
        get_resource_service('publish_queue').delete_by_article_id(doc['_id'])
        # delete entry from published repo
        get_resource_service('published').delete_by_article_id(doc['_id'])

    def validate_schedule(self, schedule):
        if not isinstance(schedule, datetime.date):
            raise SuperdeskApiError.badRequestError(
                "Schedule date is not recognized")
        if not schedule.date() or schedule.date().year <= 1970:
            raise SuperdeskApiError.badRequestError(
                "Schedule date is not recognized")
        if not schedule.time():
            raise SuperdeskApiError.badRequestError(
                "Schedule time is not recognized")
        if schedule < utcnow():
            raise SuperdeskApiError.badRequestError(
                "Schedule cannot be earlier than now")

    def can_edit(self, item, user_id):
        """
        Determines if the user can edit the item or not.
        """
        # TODO: modify this function when read only permissions for stages are implemented
        # TODO: and Content state related checking.

        if not current_user_has_privilege('archive'):
            return False, 'User does not have sufficient permissions.'

        item_location = item.get('task')

        if item_location:
            if item_location.get('desk'):
                if not superdesk.get_resource_service('user_desks').is_member(
                        user_id, item_location.get('desk')):
                    return False, 'User is not a member of the desk.'
            elif item_location.get('user'):
                if not str(item_location.get('user')) == str(user_id):
                    return False, 'Item belongs to another user.'

        return True, ''

    def remove_expired(self, doc):
        """
        Removes the article from production if the state is spiked
        """

        assert doc[ITEM_STATE] == CONTENT_STATE.SPIKED, \
            "Article state is %s. Only Spiked Articles can be removed" % doc[ITEM_STATE]

        doc_id = str(doc[config.ID_FIELD])
        resource_def = app.config['DOMAIN']['archive_versions']
        get_resource_service('archive_versions').delete(
            lookup={versioned_id_field(resource_def): doc_id})
        super().delete_action({config.ID_FIELD: doc_id})

    def __is_req_for_save(self, doc):
        """
        Patch of /api/archive is being used in multiple places. This method differentiates from the patch
        triggered by user or not.
        """

        if 'req_for_save' in doc:
            req_for_save = doc['req_for_save']
            del doc['req_for_save']

            return req_for_save == 'true'

        return True

    def validate_embargo(self, item):
        """
        Validates the embargo of the item. Following are checked:
            1. Item can't be a package or a take or a re-write of another story
            2. Publish Schedule and Embargo are mutually exclusive
            3. Always a future date except in case of Corrected and Killed.
        :raises: SuperdeskApiError.badRequestError() if the validation fails
        """

        if item[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE:
            embargo = item.get(EMBARGO)
            if embargo:
                if item.get('publish_schedule'
                            ) or item[ITEM_STATE] == CONTENT_STATE.SCHEDULED:
                    raise SuperdeskApiError.badRequestError(
                        "An item can't have both Publish Schedule and Embargo")

                package = TakesPackageService().get_take_package(item)
                if package:
                    raise SuperdeskApiError.badRequestError(
                        "Takes doesn't support Embargo")

                if item.get('rewrite_of'):
                    raise SuperdeskApiError.badRequestError(
                        "Rewrites doesn't support Embargo")

                if not isinstance(embargo,
                                  datetime.date) or not embargo.time():
                    raise SuperdeskApiError.badRequestError("Invalid Embargo")

                if item[ITEM_STATE] not in PUBLISH_STATES and embargo <= utcnow(
                ):
                    raise SuperdeskApiError.badRequestError(
                        "Embargo cannot be earlier than now")
        elif item[
                ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and not self.takesService.is_takes_package(
                    item):
            if item.get(EMBARGO):
                raise SuperdeskApiError.badRequestError(
                    "A Package doesn't support Embargo")

            self.packageService.check_if_any_item_in_package_has_embargo(item)
Beispiel #28
0
    def update_rewrite(self, original):
        """Removes the reference from the rewritten story in published collection."""
        rewrite_service = ArchiveRewriteService()
        if original.get('rewrite_of') and original.get(ITEM_EVENT_ID):
            rewrite_service._clear_rewritten_flag(original.get(ITEM_EVENT_ID),
                                                  original[config.ID_FIELD],
                                                  'rewritten_by')

        # write the rewritten_by to the take before spiked
        archive_service = get_resource_service(ARCHIVE)
        published_service = get_resource_service('published')
        takes_service = TakesPackageService()
        takes_package = takes_service.get_take_package(original)

        if takes_package and takes_package.get(
                SEQUENCE, 0) > 1 and original.get('rewritten_by'):
            # get the rewritten by
            rewritten_by = archive_service.find_one(
                req=None, _id=original.get('rewritten_by'))
            # get the take
            take_id = takes_service.get_take_by_take_no(
                original,
                take_no=takes_package.get(SEQUENCE) - 1,
                package=takes_package)
            take = archive_service.find_one(req=None, _id=take_id)

            # update the take and takes package with rewritten_by
            if take.get('rewritten_by') != rewritten_by[config.ID_FIELD]:
                if take.get(ITEM_STATE) in PUBLISH_STATES:
                    published_service.update_published_items(
                        take_id, 'rewritten_by', rewritten_by[config.ID_FIELD])

                archive_service.system_update(
                    take[config.ID_FIELD],
                    {'rewritten_by': rewritten_by[config.ID_FIELD]}, take)

            if takes_package.get('rewritten_by') != rewritten_by[
                    config.ID_FIELD]:
                if takes_package.get(ITEM_STATE) in PUBLISH_STATES:
                    published_service.update_published_items(
                        takes_package.get(config.ID_FIELD), 'rewritten_by',
                        rewritten_by[config.ID_FIELD])

                archive_service.system_update(
                    takes_package[config.ID_FIELD],
                    {'rewritten_by': rewritten_by[config.ID_FIELD]},
                    takes_package)

            if rewritten_by.get('rewrite_of') != takes_package.get(
                    config.ID_FIELD):
                archive_service.system_update(
                    rewritten_by[config.ID_FIELD],
                    {'rewrite_of': takes_package.get(config.ID_FIELD)},
                    rewritten_by)
        elif original.get('rewritten_by') or (
                takes_package and takes_package.get('rewritten_by')):
            # you are spike the story from which the rewrite was triggered.
            # in this case both rewrite_of and rewritten_by are published.
            rewrite_id = original.get('rewritten_by') or takes_package.get(
                'rewritten_by')
            rewritten_by = archive_service.find_one(req=None, _id=rewrite_id)
            archive_service.system_update(rewrite_id, {
                'rewrite_of': None,
                'rewrite_sequence': 0
            }, rewritten_by)
            app.on_archive_item_updated(
                {
                    'rewrite_of': None,
                    'rewrite_sequence': 0
                }, original, ITEM_UNLINK)
Beispiel #29
0
class ArchiveService(BaseService):
    packageService = PackageService()
    takesService = TakesPackageService()
    mediaService = ArchiveMediaService()

    def on_fetched(self, docs):
        """
        Overriding this to handle existing data in Mongo & Elastic
        """
        self.__enhance_items(docs[config.ITEMS])

    def on_fetched_item(self, doc):
        self.__enhance_items([doc])

    def __enhance_items(self, items):
        for item in items:
            handle_existing_data(item)
            self.takesService.enhance_with_package_info(item)

    def on_create(self, docs):
        on_create_item(docs)

        for doc in docs:
            if doc.get('body_footer') and is_normal_package(doc):
                raise SuperdeskApiError.badRequestError("Package doesn't support Public Service Announcements")

            doc['version_creator'] = doc['original_creator']
            remove_unwanted(doc)
            update_word_count(doc)
            set_item_expiry({}, doc)

            if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                self.packageService.on_create([doc])

            # Do the validation after Circular Reference check passes in Package Service
            self.validate_embargo(doc)

            if doc.get('media'):
                self.mediaService.on_create([doc])

            # let client create version 0 docs
            if doc.get('version') == 0:
                doc[config.VERSION] = doc['version']

            if not doc.get('ingest_provider'):
                doc['source'] = DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES

            doc.setdefault('priority', DEFAULT_PRIORITY_VALUE_FOR_MANUAL_ARTICLES)
            doc.setdefault('urgency', DEFAULT_URGENCY_VALUE_FOR_MANUAL_ARTICLES)

            convert_task_attributes_to_objectId(doc)

    def on_created(self, docs):
        packages = [doc for doc in docs if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE]
        if packages:
            self.packageService.on_created(packages)

        for doc in docs:
            subject = get_subject(doc)
            if subject:
                msg = 'added new {{ type }} item about "{{ subject }}"'
            else:
                msg = 'added new {{ type }} item with empty header/title'
            add_activity(ACTIVITY_CREATE, msg,
                         self.datasource, item=doc, type=doc[ITEM_TYPE], subject=subject)
        push_content_notification(docs)

    def on_update(self, updates, original):
        """
        Overridden to validate the updates to the article and take necessary actions depending on the updates. In brief,
        it does the following:
            1. Sets state, item operation, version created, version creator, sign off and word count.
            2. Resets Item Expiry
            3. If the request is to de-schedule then checks and de-schedules the associated Takes Package also.
            4. Creates Crops if article is a picture
        """
        user = get_user()
        self._validate_updates(original, updates, user)

        if 'publish_schedule' in updates and original[ITEM_STATE] == CONTENT_STATE.SCHEDULED:
            self.deschedule_item(updates, original)  # this is an deschedule action

            # check if there is a takes package and deschedule the takes package.
            package = TakesPackageService().get_take_package(original)
            if package and package.get('state') == 'scheduled':
                package_updates = {'publish_schedule': None, 'groups': package.get('groups')}
                self.patch(package.get(config.ID_FIELD), package_updates)

            return

        if self.__is_req_for_save(updates):
            update_state(original, updates)

        remove_unwanted(updates)
        self._add_system_updates(original, updates, user)

        if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE:  # create crops
            CropService().create_multiple_crops(updates, original)

    def on_updated(self, updates, original):
        get_component(ItemAutosave).clear(original['_id'])

        if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            self.packageService.on_updated(updates, original)

        CropService().delete_replaced_crop_files(updates, original)

        updated = copy(original)
        updated.update(updates)

        if config.VERSION in updates:
            add_activity(ACTIVITY_UPDATE, 'created new version {{ version }} for item {{ type }} about "{{ subject }}"',
                         self.datasource, item=updated,
                         version=updates[config.VERSION], subject=get_subject(updates, original),
                         type=updated[ITEM_TYPE])

        push_content_notification([updated, original])
        get_resource_service('archive_broadcast').reset_broadcast_status(updates, original)

    def on_replace(self, document, original):
        document[ITEM_OPERATION] = ITEM_UPDATE
        remove_unwanted(document)
        user = get_user()
        lock_user = original.get('lock_user', None)
        force_unlock = document.get('force_unlock', False)
        user_id = str(user.get('_id'))
        if lock_user and str(lock_user) != user_id and not force_unlock:
            raise SuperdeskApiError.forbiddenError('The item was locked by another user')
        document['versioncreated'] = utcnow()
        set_item_expiry(document, original)
        document['version_creator'] = user_id
        if force_unlock:
            del document['force_unlock']

    def on_replaced(self, document, original):
        get_component(ItemAutosave).clear(original['_id'])
        add_activity(ACTIVITY_UPDATE, 'replaced item {{ type }} about {{ subject }}',
                     self.datasource, item=original,
                     type=original['type'], subject=get_subject(original))
        push_content_notification([document, original])

    def on_deleted(self, doc):
        if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            self.packageService.on_deleted(doc)

        remove_media_files(doc)

        add_activity(ACTIVITY_DELETE, 'removed item {{ type }} about {{ subject }}',
                     self.datasource, item=doc,
                     type=doc[ITEM_TYPE], subject=get_subject(doc))
        push_content_notification([doc])

    def replace(self, id, document, original):
        return self.restore_version(id, document, original) or super().replace(id, document, original)

    def find_one(self, req, **lookup):
        item = super().find_one(req, **lookup)

        if item and str(item.get('task', {}).get('stage', '')) in \
                get_resource_service('users').get_invisible_stages_ids(get_user().get('_id')):
            raise SuperdeskApiError.forbiddenError("User does not have permissions to read the item.")

        handle_existing_data(item)
        return item

    def restore_version(self, id, doc, original):
        item_id = id
        old_version = int(doc.get('old_version', 0))
        last_version = int(doc.get('last_version', 0))
        if (not all([item_id, old_version, last_version])):
            return None

        old = get_resource_service('archive_versions').find_one(req=None, _id_document=item_id,
                                                                _current_version=old_version)
        if old is None:
            raise SuperdeskApiError.notFoundError('Invalid version %s' % old_version)

        curr = get_resource_service(SOURCE).find_one(req=None, _id=item_id)
        if curr is None:
            raise SuperdeskApiError.notFoundError('Invalid item id %s' % item_id)

        if curr[config.VERSION] != last_version:
            raise SuperdeskApiError.preconditionFailedError('Invalid last version %s' % last_version)

        old['_id'] = old['_id_document']
        old['_updated'] = old['versioncreated'] = utcnow()
        set_item_expiry(old, doc)
        del old['_id_document']
        old[ITEM_OPERATION] = ITEM_RESTORE

        resolve_document_version(old, SOURCE, 'PATCH', curr)
        remove_unwanted(old)
        set_sign_off(updates=old, original=curr)

        super().replace(id=item_id, document=old, original=curr)

        del doc['old_version']
        del doc['last_version']
        doc.update(old)
        return item_id

    def duplicate_content(self, original_doc):
        """
        Duplicates the 'original_doc' including it's version history. Copy and Duplicate actions use this method.

        :return: guid of the duplicated article
        """

        if original_doc.get(ITEM_TYPE, '') == CONTENT_TYPE.COMPOSITE:
            for groups in original_doc.get('groups'):
                if groups.get('id') != 'root':
                    associations = groups.get('refs', [])
                    for assoc in associations:
                        if assoc.get(RESIDREF):
                            item, _item_id, _endpoint = self.packageService.get_associated_item(assoc)
                            assoc[RESIDREF] = assoc['guid'] = self.duplicate_content(item)

        return self._duplicate_item(original_doc)

    def _duplicate_item(self, original_doc):
        """
        Duplicates the 'original_doc' including it's version history. If the article being duplicated is contained
        in a desk then the article state is changed to Submitted.

        :return: guid of the duplicated article
        """

        new_doc = original_doc.copy()
        self._remove_after_copy(new_doc)
        on_duplicate_item(new_doc)
        resolve_document_version(new_doc, SOURCE, 'PATCH', new_doc)

        if original_doc.get('task', {}).get('desk') is not None and new_doc.get('state') != 'submitted':
            new_doc[ITEM_STATE] = CONTENT_STATE.SUBMITTED

        convert_task_attributes_to_objectId(new_doc)
        get_model(ItemModel).create([new_doc])
        self._duplicate_versions(original_doc['guid'], new_doc)

        return new_doc['guid']

    def _remove_after_copy(self, copied_item):
        """
        Removes the properties which doesn't make sense to have for an item after copy.
        """

        del copied_item[config.ID_FIELD]
        del copied_item['guid']
        copied_item.pop(LINKED_IN_PACKAGES, None)
        copied_item.pop(EMBARGO, None)
        copied_item.pop('publish_schedule', None)
        copied_item.pop('lock_time', None)
        copied_item.pop('lock_session', None)
        copied_item.pop('lock_user', None)

        task = copied_item.get('task', {})
        task.pop(LAST_PRODUCTION_DESK, None)
        task.pop(LAST_AUTHORING_DESK, None)

    def _duplicate_versions(self, old_id, new_doc):
        """
        Duplicates the version history of the article identified by old_id. Each version identifiers are changed
        to have the identifiers of new_doc.

        :param old_id: identifier to fetch version history
        :param new_doc: identifiers from this doc will be used to create version history for the duplicated item.
        """
        resource_def = app.config['DOMAIN']['archive']
        version_id = versioned_id_field(resource_def)
        old_versions = get_resource_service('archive_versions').get(req=None, lookup={'guid': old_id})

        new_versions = []
        for old_version in old_versions:
            old_version[version_id] = new_doc[config.ID_FIELD]
            del old_version[config.ID_FIELD]

            old_version['guid'] = new_doc['guid']
            old_version['unique_name'] = new_doc['unique_name']
            old_version['unique_id'] = new_doc['unique_id']
            old_version['versioncreated'] = utcnow()
            if old_version[VERSION] == new_doc[VERSION]:
                old_version[ITEM_OPERATION] = new_doc[ITEM_OPERATION]
            new_versions.append(old_version)
        last_version = deepcopy(new_doc)
        last_version['_id_document'] = new_doc['_id']
        del last_version['_id']
        new_versions.append(last_version)
        if new_versions:
            get_resource_service('archive_versions').post(new_versions)

    def deschedule_item(self, updates, doc):
        """
        Deschedule an item. This operation removed the item from publish queue and published collection.
        :param dict updates: updates for the document
        :param doc: original is document.
        """
        updates['state'] = 'in_progress'
        updates['publish_schedule'] = None
        updates[ITEM_OPERATION] = ITEM_DESCHEDULE
        # delete entry from published repo
        get_resource_service('published').delete_by_article_id(doc['_id'])

    def validate_schedule(self, schedule):
        if not isinstance(schedule, datetime.date):
            raise SuperdeskApiError.badRequestError("Schedule date is not recognized")
        if not schedule.date() or schedule.date().year <= 1970:
            raise SuperdeskApiError.badRequestError("Schedule date is not recognized")
        if not schedule.time():
            raise SuperdeskApiError.badRequestError("Schedule time is not recognized")
        if schedule < utcnow():
            raise SuperdeskApiError.badRequestError("Schedule cannot be earlier than now")

    def can_edit(self, item, user_id):
        """
        Determines if the user can edit the item or not.
        """
        # TODO: modify this function when read only permissions for stages are implemented
        # TODO: and Content state related checking.

        if not current_user_has_privilege('archive'):
            return False, 'User does not have sufficient permissions.'

        item_location = item.get('task')

        if item_location:
            if item_location.get('desk'):
                if not superdesk.get_resource_service('user_desks').is_member(user_id, item_location.get('desk')):
                    return False, 'User is not a member of the desk.'
            elif item_location.get('user'):
                if not str(item_location.get('user')) == str(user_id):
                    return False, 'Item belongs to another user.'

        return True, ''

    def delete_by_article_ids(self, ids):
        """
        remove the content
        :param list ids: list of ids to be removed
        """
        version_field = versioned_id_field(app.config['DOMAIN']['archive_versions'])
        get_resource_service('archive_versions').delete(lookup={version_field: {'$in': ids}})
        super().delete_action({config.ID_FIELD: {'$in': ids}})

    def __is_req_for_save(self, doc):
        """
        Patch of /api/archive is being used in multiple places. This method differentiates from the patch
        triggered by user or not.
        """

        if 'req_for_save' in doc:
            req_for_save = doc['req_for_save']
            del doc['req_for_save']

            return req_for_save == 'true'

        return True

    def validate_embargo(self, item):
        """
        Validates the embargo of the item. Following are checked:
            1. Item can't be a package or a take or a re-write of another story
            2. Publish Schedule and Embargo are mutually exclusive
            3. Always a future date except in case of Corrected and Killed.
        :raises: SuperdeskApiError.badRequestError() if the validation fails
        """

        if item[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE:
            embargo = item.get(EMBARGO)
            if embargo:
                if item.get('publish_schedule') or item[ITEM_STATE] == CONTENT_STATE.SCHEDULED:
                    raise SuperdeskApiError.badRequestError("An item can't have both Publish Schedule and Embargo")

                package = TakesPackageService().get_take_package(item)
                if package:
                    raise SuperdeskApiError.badRequestError("Takes doesn't support Embargo")

                if item.get('rewrite_of'):
                    raise SuperdeskApiError.badRequestError("Rewrites doesn't support Embargo")

                if not isinstance(embargo, datetime.date) or not embargo.time():
                    raise SuperdeskApiError.badRequestError("Invalid Embargo")

                if item[ITEM_STATE] not in PUBLISH_STATES and embargo <= utcnow():
                    raise SuperdeskApiError.badRequestError("Embargo cannot be earlier than now")
        elif is_normal_package(item):
            if item.get(EMBARGO):
                raise SuperdeskApiError.badRequestError("A Package doesn't support Embargo")

            self.packageService.check_if_any_item_in_package_has_embargo(item)

    def _validate_updates(self, original, updates, user):
        """
        Validates updates to the article for the below conditions, if any of them then exception is raised:
            1.  Is article locked by another user other than the user requesting for update
            2.  Is state of the article is Killed?
            3.  Is user trying to update the package with Public Service Announcements?
            4.  Is user authorized to update unique name of the article?
            5.  Is user trying to update the genre of a broadcast article?
            6.  Is article being scheduled and is in a package?
            7.  Is article being scheduled and schedule timestamp is invalid?
            8.  Does article has valid crops if the article type is a picture?
            9.  Is article a valid package if the article type is a package?
            10. Does article has a valid Embargo?
            11. Make sure that there are no duplicate anpa_category codes in the article.
            12. Make sure there are no duplicate subjects in the upadte

        :raises:
            SuperdeskApiError.forbiddenError()
                - if state of the article is killed or user is not authorized to update unique name or if article is
                  locked by another user
            SuperdeskApiError.badRequestError()
                - if Public Service Announcements are being added to a package or genre is being updated for a
                broadcast, is invalid for scheduling, the updates contain duplicate anpa_category or subject codes
        """

        lock_user = original.get('lock_user', None)
        force_unlock = updates.get('force_unlock', False)
        str_user_id = str(user.get(config.ID_FIELD)) if user else None

        if lock_user and str(lock_user) != str_user_id and not force_unlock:
            raise SuperdeskApiError.forbiddenError('The item was locked by another user')

        if original.get(ITEM_STATE) == CONTENT_STATE.KILLED:
            raise SuperdeskApiError.forbiddenError("Item isn't in a valid state to be updated.")

        if updates.get('body_footer') and is_normal_package(original):
            raise SuperdeskApiError.badRequestError("Package doesn't support Public Service Announcements")

        if 'unique_name' in updates and not is_admin(user) \
                and (user['active_privileges'].get('metadata_uniquename', 0) == 0):
            raise SuperdeskApiError.forbiddenError("Unauthorized to modify Unique Name")

        # if broadcast then update to genre is not allowed.
        if original.get('broadcast') and updates.get('genre') and \
                any(genre.get('value', '').lower() != BROADCAST_GENRE.lower() for genre in updates.get('genre')):
            raise SuperdeskApiError.badRequestError('Cannot change the genre for broadcast content.')

        if updates.get('publish_schedule') and original[ITEM_STATE] != CONTENT_STATE.SCHEDULED \
                and datetime.datetime.fromtimestamp(0).date() != updates['publish_schedule'].date():
            if is_item_in_package(original):
                raise SuperdeskApiError.badRequestError(
                    'This item is in a package and it needs to be removed before the item can be scheduled!')

            package = TakesPackageService().get_take_package(original) or {}
            validate_schedule(updates['publish_schedule'], package.get(SEQUENCE, 1))

        if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE:
            CropService().validate_multiple_crops(updates, original)
        elif original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            self.packageService.on_update(updates, original)

        # Do the validation after Circular Reference check passes in Package Service
        updated = original.copy()
        updated.update(updates)
        self.validate_embargo(updated)

        # Ensure that there are no duplicate categories in the update
        category_qcodes = [q['qcode'] for q in updates.get('anpa_category', []) or []]
        if category_qcodes and len(category_qcodes) != len(set(category_qcodes)):
            raise SuperdeskApiError.badRequestError("Duplicate category codes are not allowed")

        # Ensure that there are no duplicate subjects in the update
        subject_qcodes = [q['qcode'] for q in updates.get('subject', []) or []]
        if subject_qcodes and len(subject_qcodes) != len(set(subject_qcodes)):
            raise SuperdeskApiError.badRequestError("Duplicate subjects are not allowed")

    def _add_system_updates(self, original, updates, user):
        """
        As the name suggests, this method adds properties which are derived based on updates sent in the request.
            1. Sets item operation, version created, version creator, sign off and word count.
            2. Resets Item Expiry
        """

        convert_task_attributes_to_objectId(updates)

        updates[ITEM_OPERATION] = ITEM_UPDATE
        updates.setdefault('original_creator', original.get('original_creator'))
        updates['versioncreated'] = utcnow()
        updates['version_creator'] = str(user.get(config.ID_FIELD)) if user else None

        update_word_count(updates)
        update_version(updates, original)

        set_item_expiry(updates, original)
        set_sign_off(updates, original=original)

        # Clear publish_schedule field
        if updates.get('publish_schedule') \
                and datetime.datetime.fromtimestamp(0).date() == updates.get('publish_schedule').date():
            updates['publish_schedule'] = None

        if updates.get('force_unlock', False):
            del updates['force_unlock']

    def get_expired_items(self, expiry_datetime):
        """
        Get the expired items where content state is not scheduled
        and
        :param datetime expiry_datetime: expiry datetime
        :return pymongo.cursor: expired non published items.
        """
        query = {
            '$and': [
                {'expiry': {'$lte': date_to_str(expiry_datetime)}},
                {'$or': [
                    {'task.desk': {'$ne': None}},
                    {ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None}
                ]}
            ]
        }

        req = ParsedRequest()
        req.max_results = config.MAX_EXPIRY_QUERY_LIMIT
        req.sort = 'expiry,_created'
        return self.get_from_mongo(req=None, lookup=query)
Beispiel #30
0
class ArchiveLinkService(Service):
    packageService = TakesPackageService()

    def create(self, docs, **kwargs):
        target_id = request.view_args['target_id']
        doc = docs[0]
        link_id = doc.get('link_id')
        desk_id = doc.get('desk')
        service = get_resource_service(ARCHIVE)
        target = service.find_one(req=None, _id=target_id)
        self._validate_link(target, target_id)
        link = {}

        if desk_id:
            link = {'task': {'desk': desk_id}}
            user = get_user()
            lookup = {'_id': desk_id, 'members.user': user['_id']}
            desk = get_resource_service('desks').find_one(req=None, **lookup)
            if not desk:
                raise SuperdeskApiError.forbiddenError("No privileges to create new take on requested desk.")

            link['task']['stage'] = desk['working_stage']

        if link_id:
            link = service.find_one(req=None, _id=link_id)

        linked_item = self.packageService.link_as_next_take(target, link)
        insert_into_versions(id_=linked_item[config.ID_FIELD])
        doc.update(linked_item)
        build_custom_hateoas(CUSTOM_HATEOAS, doc)
        return [linked_item['_id']]

    def _validate_link(self, target, target_id):
        """Validates the article to be linked.

        :param target: article to be linked
        :param target_id: id of the article to be linked
        :raises: SuperdeskApiError
        """
        if not target:
            raise SuperdeskApiError.notFoundError(message='Cannot find the target item with id {}.'.format(target_id))

        if target.get(EMBARGO):
            raise SuperdeskApiError.badRequestError("Takes can't be created for an Item having Embargo")

        if is_genre(target, BROADCAST_GENRE):
            raise SuperdeskApiError.badRequestError("Cannot add new take to the story with genre as broadcast.")

        if get_resource_service('published').is_rewritten_before(target['_id']):
            raise SuperdeskApiError.badRequestError(message='Article has been rewritten before !')

    def _validate_unlink(self, target):
        """Validates that the links for takes or updates can be removed.

        :param target: article whose links will be removed
        :raises: SuperdeskApiError
        """
        if target[ITEM_TYPE] != CONTENT_TYPE.TEXT:
            raise SuperdeskApiError.badRequestError("Only text stories can be unlinked!")

        # if the story is in published states then it cannot be unlinked
        if target[ITEM_STATE] in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED]:
            raise SuperdeskApiError.badRequestError("Published stories cannot be unlinked!")

        # if the story is not the last take then it cannot be unlinked
        if TakesPackageService().get_take_package(target) and \
                not TakesPackageService().is_last_takes_package_item(target):
            raise SuperdeskApiError.badRequestError("Only the last take can be unlinked!")

    def on_delete(self, doc):
        self._validate_unlink(doc)

    def delete(self, lookup):
        target_id = request.view_args['target_id']
        archive_service = get_resource_service(ARCHIVE)
        target = archive_service.find_one(req=None, _id=target_id)
        self._validate_unlink(target)
        updates = {}

        takes_package = TakesPackageService().get_take_package(target)

        if takes_package and TakesPackageService().is_last_takes_package_item(target):
            # remove the take link
            PackageService().remove_refs_in_package(takes_package, target_id)

        if target.get('rewrite_of'):
            # remove the rewrite info
            ArchiveSpikeService().update_rewrite(target)

        if not takes_package and not target.get('rewrite_of'):
            # there is nothing to do
            raise SuperdeskApiError.badRequestError("Only takes and updates can be unlinked!")

        if target.get('rewrite_of'):
            updates['rewrite_of'] = None

        if target.get('anpa_take_key'):
            updates['anpa_take_key'] = None

        if target.get('rewrite_sequence'):
            updates['rewrite_sequence'] = None

        if target.get('sequence'):
            updates['sequence'] = None

        updates['event_id'] = generate_guid(type=GUID_TAG)

        archive_service.system_update(target_id, updates, target)
        user = get_user(required=True)
        push_notification('item:unlink', item=target_id, user=str(user.get(config.ID_FIELD)))
        app.on_archive_item_updated(updates, target, ITEM_UNLINK)
Beispiel #31
0
class ArchiveService(BaseService):
    packageService = PackageService()
    takesService = TakesPackageService()
    mediaService = ArchiveMediaService()
    cropService = CropService()

    def on_fetched(self, docs):
        """
        Overriding this to handle existing data in Mongo & Elastic
        """
        self.enhance_items(docs[config.ITEMS])

    def on_fetched_item(self, doc):
        self.enhance_items([doc])

    def enhance_items(self, items):
        for item in items:
            handle_existing_data(item)

        if not app.config.get('NO_TAKES', False):
            self.takesService.enhance_items_with_takes_packages(items)

    def on_create(self, docs):
        on_create_item(docs)

        for doc in docs:
            if doc.get('body_footer') and is_normal_package(doc):
                raise SuperdeskApiError.badRequestError("Package doesn't support Public Service Announcements")

            doc['version_creator'] = doc['original_creator']
            remove_unwanted(doc)
            update_word_count(doc)
            set_item_expiry({}, doc)

            if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                self.packageService.on_create([doc])

            # Do the validation after Circular Reference check passes in Package Service
            update_schedule_settings(doc, EMBARGO, doc.get(EMBARGO))
            self.validate_embargo(doc)

            if doc.get('media'):
                self.mediaService.on_create([doc])

            # let client create version 0 docs
            if doc.get('version') == 0:
                doc[config.VERSION] = doc['version']

            self._add_desk_metadata(doc, {})

            convert_task_attributes_to_objectId(doc)

    def on_created(self, docs):
        packages = [doc for doc in docs if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE]
        if packages:
            self.packageService.on_created(packages)

        profiles = set()
        for doc in docs:
            subject = get_subject(doc)
            if subject:
                msg = 'added new {{ type }} item about "{{ subject }}"'
            else:
                msg = 'added new {{ type }} item with empty header/title'
            add_activity(ACTIVITY_CREATE, msg,
                         self.datasource, item=doc, type=doc[ITEM_TYPE], subject=subject)

            if doc.get('profile'):
                profiles.add(doc['profile'])

            self.cropService.update_media_references(doc, {})
            if doc[ITEM_OPERATION] == ITEM_FETCH:
                app.on_archive_item_updated({'task': doc.get('task')}, doc, ITEM_FETCH)
            else:
                app.on_archive_item_updated({'task': doc.get('task')}, doc, ITEM_CREATE)

        get_resource_service('content_types').set_used(profiles)
        push_content_notification(docs)

    def on_update(self, updates, original):
        """Runs on archive update.

        Overridden to validate the updates to the article and take necessary actions depending on the updates. In brief,
        it does the following:
            1. Sets state, item operation, version created, version creator, sign off and word count.
            2. Resets Item Expiry
            3. If the request is to de-schedule then checks and de-schedules the associated Takes Package also.
            4. Creates Crops if article is a picture
        """
        user = get_user()
        self._validate_updates(original, updates, user)

        if PUBLISH_SCHEDULE in updates and original[ITEM_STATE] == CONTENT_STATE.SCHEDULED:
            # check if there is a takes package and deschedule the takes package.
            takes_service = TakesPackageService()
            package = takes_service.get_take_package(original)
            if package and package.get(ITEM_STATE) == CONTENT_STATE.SCHEDULED:
                get_resource_service('published').delete_by_article_id(package.get(config.ID_FIELD))
                self.delete_by_article_ids([package.get(config.ID_FIELD)])
                updates[LINKED_IN_PACKAGES] = [package for package in original.get(LINKED_IN_PACKAGES, [])
                                               if package.get(PACKAGE_TYPE) != TAKES_PACKAGE]
            return

        if self.__is_req_for_save(updates):
            update_state(original, updates)

        remove_unwanted(updates)
        self._add_system_updates(original, updates, user)
        self._add_desk_metadata(updates, original)
        self._handle_media_updates(updates, original, user)

    def _handle_media_updates(self, updates, original, user):

        if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE:  # create crops
            self.cropService.create_multiple_crops(updates, original)

        if ASSOCIATIONS not in updates or not updates.get(ASSOCIATIONS):
            return

        # iterate over associations. Validate and process them if they are stored in database
        for item_name, item_obj in updates.get(ASSOCIATIONS).items():
            if not (item_obj and config.ID_FIELD in item_obj):
                continue

            item_id = item_obj[config.ID_FIELD]
            media_item = {}
            if app.settings.get('COPY_METADATA_FROM_PARENT') and item_obj.get(ITEM_TYPE) in MEDIA_TYPES:
                stored_item = (original.get(ASSOCIATIONS) or {}).get(item_name) or item_obj
            else:
                media_item = stored_item = self.find_one(req=None, _id=item_id)
                if not stored_item:
                    continue

            self._validate_updates(stored_item, item_obj, user)
            if stored_item[ITEM_TYPE] == CONTENT_TYPE.PICTURE:  # create crops
                CropService().create_multiple_crops(item_obj, stored_item)

            # If the media item is not marked as 'used', mark it as used
            if original.get(ITEM_TYPE) == CONTENT_TYPE.TEXT and \
                    (item_obj is not stored_item or not stored_item.get('used')):
                if media_item is not stored_item:
                    media_item = self.find_one(req=None, _id=item_id)

                if media_item and not media_item.get('used'):
                    self.system_update(media_item['_id'], {'used': True}, media_item)

                stored_item['used'] = True

            stored_item.update(item_obj)
            updates[ASSOCIATIONS][item_name] = stored_item

    def on_updated(self, updates, original):
        get_component(ItemAutosave).clear(original['_id'])

        if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            self.packageService.on_updated(updates, original)

        updated = copy(original)
        updated.update(updates)

        if config.VERSION in updates:
            add_activity(ACTIVITY_UPDATE,
                         'created new version {{ version }} for item {{ type }} about "{{ subject }}"',
                         self.datasource, item=updated,
                         version=updates[config.VERSION], subject=get_subject(updates, original),
                         type=updated[ITEM_TYPE])

        push_content_notification([updated, original])
        get_resource_service('archive_broadcast').reset_broadcast_status(updates, original)

        if updates.get('profile'):
            get_resource_service('content_types').set_used([updates.get('profile')])

        self.cropService.update_media_references(updates, original)

    def on_replace(self, document, original):
        document[ITEM_OPERATION] = ITEM_UPDATE
        remove_unwanted(document)
        user = get_user()
        lock_user = original.get('lock_user', None)
        force_unlock = document.get('force_unlock', False)
        user_id = str(user.get('_id'))
        if lock_user and str(lock_user) != user_id and not force_unlock:
            raise SuperdeskApiError.forbiddenError('The item was locked by another user')
        document['versioncreated'] = utcnow()
        set_item_expiry(document, original)
        document['version_creator'] = user_id
        if force_unlock:
            del document['force_unlock']

    def on_replaced(self, document, original):
        get_component(ItemAutosave).clear(original['_id'])
        add_activity(ACTIVITY_UPDATE, 'replaced item {{ type }} about {{ subject }}',
                     self.datasource, item=original,
                     type=original['type'], subject=get_subject(original))
        push_content_notification([document, original])
        self.cropService.update_media_references(document, original)

    def on_deleted(self, doc):
        get_component(ItemAutosave).clear(doc['_id'])
        if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            self.packageService.on_deleted(doc)

        remove_media_files(doc)

        add_activity(ACTIVITY_DELETE, 'removed item {{ type }} about {{ subject }}',
                     self.datasource, item=doc,
                     type=doc[ITEM_TYPE], subject=get_subject(doc))
        push_expired_notification([doc.get(config.ID_FIELD)])
        app.on_archive_item_deleted(doc)

    def replace(self, id, document, original):
        return self.restore_version(id, document, original) or super().replace(id, document, original)

    def find_one(self, req, **lookup):
        item = super().find_one(req, **lookup)

        if item and str(item.get('task', {}).get('stage', '')) in \
                get_resource_service('users').get_invisible_stages_ids(get_user().get('_id')):
            raise SuperdeskApiError.forbiddenError("User does not have permissions to read the item.")

        handle_existing_data(item)
        return item

    def restore_version(self, id, doc, original):
        item_id = id
        old_version = int(doc.get('old_version', 0))
        last_version = int(doc.get('last_version', 0))
        if (not all([item_id, old_version, last_version])):
            return None

        old = get_resource_service('archive_versions').find_one(req=None, _id_document=item_id,
                                                                _current_version=old_version)
        if old is None:
            raise SuperdeskApiError.notFoundError('Invalid version %s' % old_version)

        curr = get_resource_service(SOURCE).find_one(req=None, _id=item_id)
        if curr is None:
            raise SuperdeskApiError.notFoundError('Invalid item id %s' % item_id)

        if curr[config.VERSION] != last_version:
            raise SuperdeskApiError.preconditionFailedError('Invalid last version %s' % last_version)

        old['_id'] = old['_id_document']
        old['_updated'] = old['versioncreated'] = utcnow()
        set_item_expiry(old, doc)
        old.pop('_id_document', None)
        old.pop(SIGN_OFF, None)
        old[ITEM_OPERATION] = ITEM_RESTORE

        resolve_document_version(old, SOURCE, 'PATCH', curr)
        remove_unwanted(old)
        set_sign_off(updates=old, original=curr)

        super().replace(id=item_id, document=old, original=curr)

        old.pop('old_version', None)
        old.pop('last_version', None)

        doc.update(old)
        return item_id

    def duplicate_content(self, original_doc, state=None):
        """
        Duplicates the 'original_doc' including it's version history. Copy and Duplicate actions use this method.

        :return: guid of the duplicated article
        """

        if original_doc.get(ITEM_TYPE, '') == CONTENT_TYPE.COMPOSITE:
            for groups in original_doc.get('groups'):
                if groups.get('id') != 'root':
                    associations = groups.get('refs', [])
                    for assoc in associations:
                        if assoc.get(RESIDREF):
                            item, _item_id, _endpoint = self.packageService.get_associated_item(assoc)
                            assoc[RESIDREF] = assoc['guid'] = self.duplicate_content(item)

        return self._duplicate_item(original_doc, state)

    def _duplicate_item(self, original_doc, state=None):
        """Duplicates an item.

        Duplicates the 'original_doc' including it's version history. If the article being duplicated is contained
        in a desk then the article state is changed to Submitted.

        :return: guid of the duplicated article
        """

        new_doc = original_doc.copy()
        self._remove_after_copy(new_doc)
        on_duplicate_item(new_doc, original_doc)
        resolve_document_version(new_doc, SOURCE, 'PATCH', new_doc)

        if original_doc.get('task', {}).get('desk') is not None and new_doc.get(ITEM_STATE) != CONTENT_STATE.SUBMITTED:
            new_doc[ITEM_STATE] = CONTENT_STATE.SUBMITTED

        if state:
            new_doc[ITEM_STATE] = state

        convert_task_attributes_to_objectId(new_doc)
        get_model(ItemModel).create([new_doc])
        self._duplicate_versions(original_doc['_id'], new_doc)
        self._duplicate_history(original_doc['_id'], new_doc)
        app.on_archive_item_updated({'duplicate_id': new_doc['guid']}, original_doc, ITEM_DUPLICATE)
        app.on_archive_item_updated({'duplicate_id': original_doc['_id']}, new_doc, ITEM_DUPLICATED_FROM)

        return new_doc['guid']

    def _remove_after_copy(self, copied_item):
        """Removes the properties which doesn't make sense to have for an item after copy.
        """
        # get the archive schema keys
        archive_schema_keys = list(app.config['DOMAIN'][SOURCE]['schema'].keys())
        archive_schema_keys.extend([config.ID_FIELD, config.LAST_UPDATED, config.DATE_CREATED,
                                    config.VERSION, config.ETAG])

        # Delete the keys that are not part of archive schema.
        keys_to_delete = [key for key in copied_item.keys() if key not in archive_schema_keys]
        keys_to_delete.extend([config.ID_FIELD, 'guid', LINKED_IN_PACKAGES, EMBARGO, PUBLISH_SCHEDULE,
                               SCHEDULE_SETTINGS, 'lock_time', 'lock_action', 'lock_session', 'lock_user', SIGN_OFF,
                               'rewritten_by', 'rewrite_of', 'rewrite_sequence', 'highlights', '_type', 'event_id'])

        for key in keys_to_delete:
            copied_item.pop(key, None)

        task = copied_item.get('task', {})
        task.pop(LAST_PRODUCTION_DESK, None)
        task.pop(LAST_AUTHORING_DESK, None)

    def _duplicate_versions(self, old_id, new_doc):
        """Duplicates versions for an item.

        Duplicates the versions of the article identified by old_id. Each version identifiers are changed
        to have the identifiers of new_doc.

        :param old_id: identifier to fetch versions
        :param new_doc: identifiers from this doc will be used to create versions for the duplicated item.
        """
        resource_def = app.config['DOMAIN']['archive']
        version_id = versioned_id_field(resource_def)
        old_versions = get_resource_service('archive_versions').get(req=None, lookup={version_id: old_id})

        new_versions = []
        for old_version in old_versions:
            old_version[version_id] = new_doc[config.ID_FIELD]
            del old_version[config.ID_FIELD]

            old_version['guid'] = new_doc['guid']
            old_version['unique_name'] = new_doc['unique_name']
            old_version['unique_id'] = new_doc['unique_id']
            old_version['versioncreated'] = utcnow()
            if old_version[config.VERSION] == new_doc[config.VERSION]:
                old_version[ITEM_OPERATION] = new_doc[ITEM_OPERATION]
            new_versions.append(old_version)

        last_version = deepcopy(new_doc)
        last_version['_id_document'] = new_doc['_id']
        del last_version['_id']
        new_versions.append(last_version)
        if new_versions:
            get_resource_service('archive_versions').post(new_versions)

    def _duplicate_history(self, old_id, new_doc):
        """Duplicates history for an item.

        Duplicates the history of the article identified by old_id. Each history identifiers are changed
        to have the identifiers of new_doc.

        :param old_id: identifier to fetch history
        :param new_doc: identifiers from this doc will be used to create version history for the duplicated item.
        """
        resource_def = app.config['DOMAIN']['archive']
        version_id = versioned_id_field(resource_def)
        old_history_items = get_resource_service('archive_history').get(req=None, lookup={'item_id': old_id})

        new_history_items = []
        for old_history_item in old_history_items:
            old_history_item[version_id] = new_doc[config.ID_FIELD]
            del old_history_item[config.ID_FIELD]
            old_history_item['item_id'] = new_doc['guid']
            new_history_items.append(old_history_item)

        if new_history_items:
            get_resource_service('archive_history').post(new_history_items)

    def update(self, id, updates, original):
        # this needs to here as resolve_nested_documents (in eve) will add the schedule_settings
        if PUBLISH_SCHEDULE in updates and original[ITEM_STATE] == CONTENT_STATE.SCHEDULED:
            self.deschedule_item(updates, original)  # this is an deschedule action

        return super().update(id, updates, original)

    def deschedule_item(self, updates, original):
        """Deschedule an item.

        This operation removed the item from publish queue and published collection.
        :param dict updates: updates for the document
        :param original: original is document.
        """
        updates[ITEM_STATE] = CONTENT_STATE.PROGRESS
        updates[PUBLISH_SCHEDULE] = original[PUBLISH_SCHEDULE]
        updates[SCHEDULE_SETTINGS] = original[SCHEDULE_SETTINGS]
        updates[ITEM_OPERATION] = ITEM_DESCHEDULE
        # delete entry from published repo
        get_resource_service('published').delete_by_article_id(original['_id'])

    def can_edit(self, item, user_id):
        """
        Determines if the user can edit the item or not.
        """
        # TODO: modify this function when read only permissions for stages are implemented
        # TODO: and Content state related checking.

        if not current_user_has_privilege('archive'):
            return False, 'User does not have sufficient permissions.'

        item_location = item.get('task')

        if item_location:
            if item_location.get('desk'):
                if not superdesk.get_resource_service('user_desks').is_member(user_id, item_location.get('desk')):
                    return False, 'User is not a member of the desk.'
            elif item_location.get('user'):
                if not str(item_location.get('user')) == str(user_id):
                    return False, 'Item belongs to another user.'

        return True, ''

    def delete_by_article_ids(self, ids):
        """Remove the content

        :param list ids: list of ids to be removed
        """
        version_field = versioned_id_field(app.config['DOMAIN']['archive_versions'])
        get_resource_service('archive_versions').delete_action(lookup={version_field: {'$in': ids}})
        super().delete_action({config.ID_FIELD: {'$in': ids}})

    def __is_req_for_save(self, doc):
        """Checks if doc contains req_for_save key.

        Patch of /api/archive is being used in multiple places. This method differentiates from the patch
        triggered by user or not.

        :param dictionary doc: doc to test
        """

        if 'req_for_save' in doc:
            req_for_save = doc['req_for_save']
            del doc['req_for_save']

            return req_for_save == 'true'

        return True

    def validate_embargo(self, item):
        """Validates the embargo of the item.

        Following are checked:
            1. Item can't be a package or a take or a re-write of another story
            2. Publish Schedule and Embargo are mutually exclusive
            3. Always a future date except in case of Corrected and Killed.
        :raises: SuperdeskApiError.badRequestError() if the validation fails
        """

        if item[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE:
            if EMBARGO in item:
                embargo = item.get(SCHEDULE_SETTINGS, {}).get('utc_{}'.format(EMBARGO))
                if embargo:
                    if item.get(PUBLISH_SCHEDULE) or item[ITEM_STATE] == CONTENT_STATE.SCHEDULED:
                        raise SuperdeskApiError.badRequestError("An item can't have both Publish Schedule and Embargo")

                    if (item[ITEM_STATE] not in {CONTENT_STATE.KILLED, CONTENT_STATE.SCHEDULED}) \
                            and embargo <= utcnow():
                        raise SuperdeskApiError.badRequestError("Embargo cannot be earlier than now")

                    package = TakesPackageService().get_take_package(item)
                    if package and package.get(SEQUENCE, 1) > 1:
                        raise SuperdeskApiError.badRequestError("Takes doesn't support Embargo")

                    if item.get('rewrite_of'):
                        raise SuperdeskApiError.badRequestError("Rewrites doesn't support Embargo")

                    if not isinstance(embargo, datetime.date) or not embargo.time():
                        raise SuperdeskApiError.badRequestError("Invalid Embargo")

        elif is_normal_package(item):
            if item.get(EMBARGO):
                raise SuperdeskApiError.badRequestError("A Package doesn't support Embargo")

            self.packageService.check_if_any_item_in_package_has_embargo(item)

    def _validate_updates(self, original, updates, user):
        """Validates updates to the article for the below conditions.

        If any of these conditions are met then exception is raised:
            1.  Is article locked by another user other than the user requesting for update
            2.  Is state of the article is Killed?
            3.  Is user trying to update the package with Public Service Announcements?
            4.  Is user authorized to update unique name of the article?
            5.  Is user trying to update the genre of a broadcast article?
            6.  Is article being scheduled and is in a package?
            7.  Is article being scheduled and schedule timestamp is invalid?
            8.  Does article has valid crops if the article type is a picture?
            9.  Is article a valid package if the article type is a package?
            10. Does article has a valid Embargo?
            11. Make sure that there are no duplicate anpa_category codes in the article.
            12. Make sure there are no duplicate subjects in the upadte

        :raises:
            SuperdeskApiError.forbiddenError()
                - if state of the article is killed or user is not authorized to update unique name or if article is
                  locked by another user
            SuperdeskApiError.badRequestError()
                - if Public Service Announcements are being added to a package or genre is being updated for a
                broadcast, is invalid for scheduling, the updates contain duplicate anpa_category or subject codes
        """
        updated = original.copy()
        updated.update(updates)

        lock_user = original.get('lock_user', None)
        force_unlock = updates.get('force_unlock', False)
        str_user_id = str(user.get(config.ID_FIELD)) if user else None

        if lock_user and str(lock_user) != str_user_id and not force_unlock:
            raise SuperdeskApiError.forbiddenError('The item was locked by another user')

        if original.get(ITEM_STATE) == CONTENT_STATE.KILLED:
            raise SuperdeskApiError.forbiddenError("Item isn't in a valid state to be updated.")

        if updates.get('body_footer') and is_normal_package(original):
            raise SuperdeskApiError.badRequestError("Package doesn't support Public Service Announcements")

        if 'unique_name' in updates and not is_admin(user) \
                and (user['active_privileges'].get('metadata_uniquename', 0) == 0):
            raise SuperdeskApiError.forbiddenError("Unauthorized to modify Unique Name")

        # if broadcast then update to genre is not allowed.
        if original.get('broadcast') and updates.get('genre') and \
                any(genre.get('qcode', '').lower() != BROADCAST_GENRE.lower() for genre in updates.get('genre')):
            raise SuperdeskApiError.badRequestError('Cannot change the genre for broadcast content.')

        if PUBLISH_SCHEDULE in updates or "schedule_settings" in updates:
            if is_item_in_package(original):
                raise SuperdeskApiError.badRequestError(
                    'This item is in a package and it needs to be removed before the item can be scheduled!')

            package = TakesPackageService().get_take_package(original) or {}
            update_schedule_settings(updated, PUBLISH_SCHEDULE, updated.get(PUBLISH_SCHEDULE))

            if updates.get(PUBLISH_SCHEDULE):
                validate_schedule(updated.get(SCHEDULE_SETTINGS, {}).get('utc_{}'.format(PUBLISH_SCHEDULE)),
                                  package.get(SEQUENCE, 1))

            updates[SCHEDULE_SETTINGS] = updated.get(SCHEDULE_SETTINGS, {})

        if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE:
            CropService().validate_multiple_crops(updates, original)
        elif original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            self.packageService.on_update(updates, original)

        # update the embargo date
        update_schedule_settings(updated, EMBARGO, updated.get(EMBARGO))
        # Do the validation after Circular Reference check passes in Package Service
        self.validate_embargo(updated)
        if EMBARGO in updates or "schedule_settings" in updates:
            updates[SCHEDULE_SETTINGS] = updated.get(SCHEDULE_SETTINGS, {})

        # Ensure that there are no duplicate categories in the update
        category_qcodes = [q['qcode'] for q in updates.get('anpa_category', []) or []]
        if category_qcodes and len(category_qcodes) != len(set(category_qcodes)):
            raise SuperdeskApiError.badRequestError("Duplicate category codes are not allowed")

        # Ensure that there are no duplicate subjects in the update
        subject_qcodes = [q['qcode'] for q in updates.get('subject', []) or []]
        if subject_qcodes and len(subject_qcodes) != len(set(subject_qcodes)):
            raise SuperdeskApiError.badRequestError("Duplicate subjects are not allowed")

    def _add_system_updates(self, original, updates, user):
        """Adds system updates to item.

        As the name suggests, this method adds properties which are derived based on updates sent in the request.
            1. Sets item operation, version created, version creator, sign off and word count.
            2. Resets Item Expiry
        """

        convert_task_attributes_to_objectId(updates)

        updates[ITEM_OPERATION] = ITEM_UPDATE
        updates.setdefault('original_creator', original.get('original_creator'))
        updates['versioncreated'] = utcnow()
        updates['version_creator'] = str(user.get(config.ID_FIELD)) if user else None

        update_word_count(updates, original)
        update_version(updates, original)

        set_item_expiry(updates, original)
        set_sign_off(updates, original=original)
        set_dateline(updates, original)

        # Clear publish_schedule field
        if updates.get(PUBLISH_SCHEDULE) \
                and datetime.datetime.fromtimestamp(0).date() == updates.get(PUBLISH_SCHEDULE).date():
            updates[PUBLISH_SCHEDULE] = None
            updates[SCHEDULE_SETTINGS] = {}

        if updates.get('force_unlock', False):
            del updates['force_unlock']

    def get_expired_items(self, expiry_datetime, invalid_only=False):
        """Get the expired items.

        Where content state is not scheduled and the item matches given parameters

        :param datetime expiry_datetime: expiry datetime
        :param bool invalid_only: True only invalid items
        :return pymongo.cursor: expired non published items.
        """
        unique_id = 0

        while True:
            req = ParsedRequest()
            req.sort = 'unique_id'
            query = {
                '$and': [
                    {'expiry': {'$lte': date_to_str(expiry_datetime)}},
                    {'$or': [
                        {'task.desk': {'$ne': None}},
                        {ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None}
                    ]}
                ]
            }

            query['$and'].append({'unique_id': {'$gt': unique_id}})

            if invalid_only:
                query['$and'].append({'expiry_status': 'invalid'})
            else:
                query['$and'].append({'expiry_status': {'$ne': 'invalid'}})

            req.where = json.dumps(query)

            req.max_results = config.MAX_EXPIRY_QUERY_LIMIT
            items = list(self.get_from_mongo(req=req, lookup=None))

            if not len(items):
                break

            unique_id = items[-1]['unique_id']
            yield items

    def _add_desk_metadata(self, updates, original):
        """Populate updates metadata from item desk in case it's set.

        It will only add data which is not set yet on the item.

        :param updates: updates to item that should be saved
        :param original: original item version before update
        """
        return get_resource_service('desks').apply_desk_metadata(updates, original)
Beispiel #32
0
 def on_update(self, updates, original):
     updates[ITEM_OPERATION] = ITEM_SPIKE
     takes_service = TakesPackageService()
     if not takes_service.can_spike_takes_package_item(original):
         raise SuperdeskApiError.badRequestError(message="Only last take of the package can be spiked.")
Beispiel #33
0
    def on_update(self, updates, original):
        updates[ITEM_OPERATION] = ITEM_UPDATE
        is_update_allowed(original)
        user = get_user()

        if 'publish_schedule' in updates and original['state'] == 'scheduled':
            # this is an deschedule action
            self.deschedule_item(updates, original)
            # check if there is a takes package and deschedule the takes package.
            package = TakesPackageService().get_take_package(original)
            if package and package.get('state') == 'scheduled':
                package_updates = {'publish_schedule': None, 'groups': package.get('groups')}
                self.patch(package.get(config.ID_FIELD), package_updates)
            return

        if updates.get('publish_schedule'):

            if datetime.datetime.fromtimestamp(0).date() == updates.get('publish_schedule').date():
                # publish_schedule field will be cleared
                updates['publish_schedule'] = None
            else:
                # validate the schedule
                if is_item_in_package(original):
                    raise SuperdeskApiError.badRequestError(message='This item is in a package' +
                                                            ' it needs to be removed before the item can be scheduled!')
                package = TakesPackageService().get_take_package(original) or {}
                validate_schedule(updates.get('publish_schedule'), package.get(SEQUENCE, 1))

        if 'unique_name' in updates and not is_admin(user) \
                and (user['active_privileges'].get('metadata_uniquename', 0) == 0):
            raise SuperdeskApiError.forbiddenError("Unauthorized to modify Unique Name")

        remove_unwanted(updates)

        if self.__is_req_for_save(updates):
            update_state(original, updates)

        lock_user = original.get('lock_user', None)
        force_unlock = updates.get('force_unlock', False)

        updates.setdefault('original_creator', original.get('original_creator'))

        str_user_id = str(user.get('_id')) if user else None
        if lock_user and str(lock_user) != str_user_id and not force_unlock:
            raise SuperdeskApiError.forbiddenError('The item was locked by another user')

        updates['versioncreated'] = utcnow()
        set_item_expiry(updates, original)
        updates['version_creator'] = str_user_id
        set_sign_off(updates, original=original)
        update_word_count(updates)

        if force_unlock:
            del updates['force_unlock']

        # create crops
        crop_service = ArchiveCropService()
        crop_service.validate_multiple_crops(updates, original)
        crop_service.create_multiple_crops(updates, original)

        if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            self.packageService.on_update(updates, original)

        update_version(updates, original)

        # Do the validation after Circular Reference check passes in Package Service
        updated = original.copy()
        updated.update(updates)
        self.validate_embargo(updated)
 def is_last_take(self, article):
     article[config.ID_FIELD] = article.get('item_id',
                                            article.get(config.ID_FIELD))
     return TakesPackageService().is_last_takes_package_item(article)
Beispiel #35
0
 def _validate_take(self, original):
     takes_service = TakesPackageService()
     if not takes_service.is_last_takes_package_item(original):
         raise SuperdeskApiError.badRequestError(message="Only last take of the package can be spiked.")
Beispiel #36
0
    def on_update(self, updates, original):
        updates[ITEM_OPERATION] = ITEM_UPDATE
        is_update_allowed(original)
        user = get_user()

        if 'publish_schedule' in updates and original['state'] == 'scheduled':
            # this is an deschedule action
            self.deschedule_item(updates, original)
            # check if there is a takes package and deschedule the takes package.
            package = TakesPackageService().get_take_package(original)
            if package and package.get('state') == 'scheduled':
                package_updates = {
                    'publish_schedule': None,
                    'groups': package.get('groups')
                }
                self.patch(package.get(config.ID_FIELD), package_updates)
            return

        if updates.get('publish_schedule'):

            if datetime.datetime.fromtimestamp(0).date() == updates.get(
                    'publish_schedule').date():
                # publish_schedule field will be cleared
                updates['publish_schedule'] = None
            else:
                # validate the schedule
                if is_item_in_package(original):
                    raise SuperdeskApiError.\
                        badRequestError(message='This item is in a package' +
                                                ' it needs to be removed before the item can be scheduled!')
                package = TakesPackageService().get_take_package(
                    original) or {}
                validate_schedule(updates.get('publish_schedule'),
                                  package.get(SEQUENCE, 1))

        if 'unique_name' in updates and not is_admin(user) \
                and (user['active_privileges'].get('metadata_uniquename', 0) == 0):
            raise SuperdeskApiError.forbiddenError(
                "Unauthorized to modify Unique Name")

        remove_unwanted(updates)

        if self.__is_req_for_save(updates):
            update_state(original, updates)

        lock_user = original.get('lock_user', None)
        force_unlock = updates.get('force_unlock', False)

        updates.setdefault('original_creator',
                           original.get('original_creator'))

        str_user_id = str(user.get('_id')) if user else None
        if lock_user and str(lock_user) != str_user_id and not force_unlock:
            raise SuperdeskApiError.forbiddenError(
                'The item was locked by another user')

        updates['versioncreated'] = utcnow()
        set_item_expiry(updates, original)
        updates['version_creator'] = str_user_id
        set_sign_off(updates, original=original)
        update_word_count(updates)

        if force_unlock:
            del updates['force_unlock']

        # create crops
        crop_service = ArchiveCropService()
        crop_service.validate_multiple_crops(updates, original)
        crop_service.create_multiple_crops(updates, original)

        if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            self.packageService.on_update(updates, original)

        update_version(updates, original)

        # Do the validation after Circular Reference check passes in Package Service
        updated = original.copy()
        updated.update(updates)
        self.validate_embargo(updated)
Beispiel #37
0
class BasePublishService(BaseService):
    """Base service for different "publish" services."""

    publish_type = 'publish'
    published_state = 'published'

    non_digital = partial(filter, lambda s: s.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE)
    digital = partial(filter, lambda s: (s.get('subscriber_type', '') in {SUBSCRIBER_TYPES.DIGITAL,
                                                                          SUBSCRIBER_TYPES.ALL}))
    takes_package_service = TakesPackageService()
    package_service = PackageService()

    def on_update(self, updates, original):
        self._refresh_associated_items(original)
        self._validate(original, updates)
        self._set_updates(original, updates, updates.get(config.LAST_UPDATED, utcnow()))
        convert_task_attributes_to_objectId(updates)  # ???
        self._process_publish_updates(original, updates)
        self._mark_media_item_as_used(updates, original)

    def on_updated(self, updates, original):
        original = get_resource_service(ARCHIVE).find_one(req=None, _id=original[config.ID_FIELD])
        updates.update(original)

        if updates[ITEM_OPERATION] != ITEM_KILL and \
                original.get(ITEM_TYPE) in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]:
            get_resource_service('archive_broadcast').on_broadcast_master_updated(updates[ITEM_OPERATION], original)

        get_resource_service('archive_broadcast').reset_broadcast_status(updates, original)
        push_content_notification([updates])
        self._import_into_legal_archive(updates)
        CropService().update_media_references(updates, original, True)
        superdesk.item_published.send(self, item=original)

    def update(self, id, updates, original):
        """
        Handles workflow of each Publish, Corrected and Killed.
        """
        try:
            user = get_user()
            auto_publish = updates.get('auto_publish', False)

            if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                self._publish_package_items(original, updates)
                self._update_archive(original, updates, should_insert_into_versions=auto_publish)
            else:
                self._refresh_associated_items(original)
                updated = deepcopy(original)
                updated.update(deepcopy(updates))

                if updates.get(ASSOCIATIONS):
                    self._refresh_associated_items(updated)  # updates got lost with update

                # process takes package for published or corrected items
                # if no_takes is true but takes package exists then process takes package.
                if self.published_state != CONTENT_STATE.KILLED and \
                        (not app.config.get('NO_TAKES', False) or
                         self.takes_package_service.get_take_package_id(updated)):
                    self._process_takes_package(original, updated, updates)

                self._update_archive(original, updates, should_insert_into_versions=auto_publish)
                self.update_published_collection(published_item_id=original[config.ID_FIELD], updated=updated)

            from apps.publish.enqueue import enqueue_published
            enqueue_published.apply_async()

            push_notification('item:publish', item=str(id),
                              unique_name=original['unique_name'],
                              desk=str(original.get('task', {}).get('desk', '')),
                              user=str(user.get(config.ID_FIELD, '')))
        except SuperdeskApiError as e:
            raise
        except KeyError as e:
            logger.exception(e)
            raise SuperdeskApiError.badRequestError(
                message="Key is missing on article to be published: {}".format(str(e))
            )
        except Exception as e:
            raise SuperdeskApiError.internalError(message="Failed to publish the item: {}".format(str(id)), exception=e)

    def _process_takes_package(self, original, updated, updates):
        if original[ITEM_TYPE] in {CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED} \
                and not is_genre(original, BROADCAST_GENRE):
            # check if item is in a digital package
            last_updated = updates.get(config.LAST_UPDATED, utcnow())
            package = self.takes_package_service.get_take_package(original)
            if not package:
                '''
                If type of the item is text or preformatted then item need to be sent to
                digital subscribers, so package the item as a take.
                '''
                package_id = self.takes_package_service.package_story_as_a_take(updated, {}, None)
                package = get_resource_service(ARCHIVE).find_one(req=None, _id=package_id)
                updates[LINKED_IN_PACKAGES] = updated[LINKED_IN_PACKAGES]

            package_id = package[config.ID_FIELD]

            package_updates = self.process_takes(updates_of_take_to_be_published=updates,
                                                 original_of_take_to_be_published=original,
                                                 package=package)

            # If the original package is corrected then the next take shouldn't change it
            # back to 'published'
            preserve_state = package.get(ITEM_STATE, '') == CONTENT_STATE.CORRECTED and \
                updates.get(ITEM_OPERATION, ITEM_PUBLISH) == ITEM_PUBLISH
            self._set_updates(package, package_updates, last_updated, preserve_state)
            package_updates.setdefault(ITEM_OPERATION, updates.get(ITEM_OPERATION, ITEM_PUBLISH))

            if self.published_state == CONTENT_STATE.KILLED:
                package_copy = deepcopy(package)
                package_copy.update(package_updates)
                self.apply_kill_override(package_copy, package_updates)

            self._update_archive(package, package_updates)
            package.update(package_updates)
            self.update_published_collection(published_item_id=package_id)
            self._import_into_legal_archive(package)

    def is_targeted(self, article, target=None):
        """Checks if article is targeted.

        Returns True if the given article has been targeted by region or
        subscriber type or specific subscribers.

        :param article: Article to check
        :param target: Optional specific target to check if exists
        :return:
        """
        if target:
            return len(article.get(target, [])) > 0
        else:
            return len(article.get('target_regions', []) +
                       article.get('target_types', []) +
                       article.get('target_subscribers', [])) > 0

    def _validate(self, original, updates):
        self.raise_if_invalid_state_transition(original)

        updated = original.copy()
        updated.update(updates)

        self.raise_if_not_marked_for_publication(updated)

        takes_package = self.takes_package_service.get_take_package(original)

        if self.publish_type == 'publish':
            # validate if take can be published
            if takes_package and not self.takes_package_service.can_publish_take(
                    takes_package, updates.get(SEQUENCE, original.get(SEQUENCE, 1))):
                raise PublishQueueError.previous_take_not_published_error(
                    Exception("Previous takes are not published."))

            update_schedule_settings(updated, PUBLISH_SCHEDULE, updated.get(PUBLISH_SCHEDULE))
            validate_schedule(updated.get(SCHEDULE_SETTINGS, {}).get('utc_{}'.format(PUBLISH_SCHEDULE)),
                              takes_package.get(SEQUENCE, 1) if takes_package else 1)

        if original[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE and updates.get(EMBARGO):
            update_schedule_settings(updated, EMBARGO, updated.get(EMBARGO))
            get_resource_service(ARCHIVE).validate_embargo(updated)

        if self.publish_type in [ITEM_CORRECT, ITEM_KILL]:
            if updates.get(EMBARGO) and not original.get(EMBARGO):
                raise SuperdeskApiError.badRequestError("Embargo can't be set after publishing")

        if self.publish_type in [ITEM_CORRECT, ITEM_KILL]:
            if updates.get('dateline'):
                raise SuperdeskApiError.badRequestError("Dateline can't be modified after publishing")

        if self.publish_type == ITEM_PUBLISH and updated.get('rewritten_by'):
            # if update is published then user cannot publish the takes
            rewritten_by = get_resource_service(ARCHIVE).find_one(req=None, _id=updated.get('rewritten_by'))
            if rewritten_by and rewritten_by.get(ITEM_STATE) in PUBLISH_STATES:
                raise SuperdeskApiError.badRequestError("Cannot publish the story after Update is published.!")

        publish_type = 'auto_publish' if updates.get('auto_publish') else self.publish_type
        validate_item = {'act': publish_type, 'type': original['type'], 'validate': updated}
        validation_errors = get_resource_service('validate').post([validate_item])
        if validation_errors[0]:
            raise ValidationError(validation_errors)

        validation_errors = []
        self._validate_associated_items(original, takes_package, validation_errors)

        if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            self._validate_package(original, updates, validation_errors)

        if len(validation_errors) > 0:
            raise ValidationError(validation_errors)

    def _validate_package(self, package, updates, validation_errors):
        # make sure package is not scheduled or spiked
        if package[ITEM_STATE] in (CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED):
            validation_errors.append('Package cannot be {}'.format(package[ITEM_STATE]))

        if package.get(EMBARGO):
            validation_errors.append('Package cannot have Embargo')

        items = self.package_service.get_residrefs(package)
        if self.publish_type in [ITEM_CORRECT, ITEM_KILL]:
            removed_items, added_items = self._get_changed_items(items, updates)
            # we raise error if correction is done on a empty package. Kill is fine.
            if len(removed_items) == len(items) and len(added_items) == 0 and self.publish_type == ITEM_CORRECT:
                validation_errors.append("Corrected package cannot be empty!")

    def raise_if_not_marked_for_publication(self, original):
        if original.get('flags', {}).get('marked_for_not_publication', False):
            raise SuperdeskApiError.badRequestError('Cannot publish an item which is marked as Not for Publication')

    def raise_if_invalid_state_transition(self, original):
        if not is_workflow_state_transition_valid(self.publish_type, original[ITEM_STATE]):
            error_message = "Can't {} as item state is {}" if original[ITEM_TYPE] == CONTENT_TYPE.TEXT else \
                "Can't {} as either package state or one of the items state is {}"
            raise InvalidStateTransitionError(error_message.format(self.publish_type, original[ITEM_STATE]))

    def get_digital_id_for_package_item(self, package_item):
        """Finds the digital item id for a given item in a package.

        :param package_item: item in a package
        :return string: Digital item id if there's one otherwise id of package_item
        """
        if package_item[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]:
            return package_item[config.ID_FIELD]
        else:
            package_item_takes_package_id = self.takes_package_service.get_take_package_id(package_item)
            if not package_item_takes_package_id:
                return package_item[config.ID_FIELD]
            return package_item_takes_package_id

    def _process_publish_updates(self, original, updates):
        """Common updates for published items."""
        desk = None
        if original.get('task', {}).get('desk'):
            desk = get_resource_service('desks').find_one(req=None, _id=original['task']['desk'])
        if not original.get('ingest_provider'):
            updates['source'] = desk['source'] if desk and desk.get('source', '') \
                else app.settings['DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES']
        updates['pubstatus'] = PUB_STATUS.CANCELED if self.publish_type == 'kill' else PUB_STATUS.USABLE
        self._set_item_expiry(updates, original)

    def _set_item_expiry(self, updates, original):
        """Set the expiry for the item.

        :param dict updates: doc on which publishing action is performed
        """
        desk_id = original.get('task', {}).get('desk')
        stage_id = original.get('task', {}).get('stage')

        if EMBARGO in updates or PUBLISH_SCHEDULE in updates:
            offset = get_utc_schedule(updates, PUBLISH_SCHEDULE) or get_utc_schedule(updates, EMBARGO)
        elif EMBARGO in original or PUBLISH_SCHEDULE in original:
            offset = get_utc_schedule(original, PUBLISH_SCHEDULE) or get_utc_schedule(original, EMBARGO)

        if app.settings.get('PUBLISHED_CONTENT_EXPIRY_MINUTES'):
            updates['expiry'] = get_expiry_date(app.settings['PUBLISHED_CONTENT_EXPIRY_MINUTES'], offset=offset)
        else:
            updates['expiry'] = get_expiry(desk_id, stage_id, offset=offset)

    def _is_take_item(self, item):
        """Returns True if the item was a take."""
        return item[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE and \
            (not (self.is_targeted(item) or is_genre(item, BROADCAST_GENRE)))

    def process_takes(self, updates_of_take_to_be_published, package, original_of_take_to_be_published=None):
        """Process takes for publishing

        Primary rule for publishing a Take in Takes Package is: all previous takes must be published before a take
        can be published.

        Also, generates body_html of the takes package and make sure the metadata for the package is the same as the
        metadata of the take to be published.

        :param dict updates_of_take_to_be_published: updates for the take to be published
        :param dict package: Takes package to publish
        :param dict original_of_take_to_be_published: original of the take to be published
        :return: Takes Package Updates
        """

        takes = self.takes_package_service.get_published_takes(package)
        body_html = updates_of_take_to_be_published.get('body_html',
                                                        original_of_take_to_be_published.get('body_html', ''))
        package_updates = {}

        groups = package.get(GROUPS, [])
        if groups:
            take_refs = [ref for group in groups if group['id'] == 'main' for ref in group.get('refs')]
            sequence_num_of_take_to_be_published = 0
            take_article_id = updates_of_take_to_be_published.get(
                config.ID_FIELD, original_of_take_to_be_published[config.ID_FIELD])

            for r in take_refs:
                if r[GUID_FIELD] == take_article_id:
                    sequence_num_of_take_to_be_published = r[SEQUENCE]
                    r['is_published'] = True
                    break

            if takes and self.published_state != 'killed':
                body_html_list = [take.get('body_html', '') for take in takes]
                if self.published_state == CONTENT_STATE.PUBLISHED:
                    body_html_list.append(body_html)
                else:
                    body_html_list[sequence_num_of_take_to_be_published - 1] = body_html

                package_updates['body_html'] = '<br>'.join(body_html_list)
            else:
                package_updates['body_html'] = body_html

            metadata_tobe_copied = self.takes_package_service.fields_for_creating_take.copy()
            metadata_tobe_copied.extend([PUBLISH_SCHEDULE, SCHEDULE_SETTINGS, 'byline', EMBARGO])
            if 'auto_publish' in updates_of_take_to_be_published:
                metadata_tobe_copied.extend(['auto_publish'])
            updated_take = original_of_take_to_be_published.copy()
            updated_take.update(updates_of_take_to_be_published)
            metadata_from = updated_take

            # only the copy the abstract from the take when there is a change in abstract or it is non-empty.
            if metadata_from.get('abstract', '') != '' and \
                    metadata_from.get('abstract') != package.get('abstract'):
                metadata_tobe_copied.append('abstract')

            # this rules has changed to use the last published metadata
            # per ticket SD-3885
            # if self.published_state == 'corrected' and len(takes) > 1:
            #     # get the last take metadata only if there are more than one takes
            #     metadata_from = takes[-1]

            for metadata in metadata_tobe_copied:
                if metadata in metadata_from:
                    package_updates[metadata] = metadata_from.get(metadata)

            # rewire the takes_package to the take_packages of 'rewrite_of' item
            if sequence_num_of_take_to_be_published == 1 and \
                    original_of_take_to_be_published.get('rewrite_of'):
                rewrite_of = self.find_one(req=None,
                                           _id=original_of_take_to_be_published.get('rewrite_of'))
                if rewrite_of:
                    rewrite_package = self.takes_package_service.get_take_package(rewrite_of)

                    if rewrite_package:
                        package_updates['rewrite_of'] = rewrite_package.get(config.ID_FIELD)

            if self.published_state == CONTENT_STATE.KILLED:
                # if published then update the groups in the take
                # to reflect the correct version, headline and slugline
                package_updates[ASSOCIATIONS] = None
                archive_service = get_resource_service(ARCHIVE)
                for ref in take_refs:
                    if ref.get(RESIDREF) != take_article_id:
                        archive_item = archive_service.find_one(req=None, _id=ref.get(RESIDREF))
                        ref['headline'] = archive_item.get('headline')
                        ref['slugline'] = archive_item.get('slugline')
                        ref[config.VERSION] = archive_item.get(config.VERSION)
            else:
                # update association for takes.
                self.takes_package_service.update_associations(package_updates, package, metadata_from)

            take_ref = next((ref for ref in take_refs if ref.get(RESIDREF) == take_article_id), None)
            if take_ref:
                # for published take update the version, headline and slugline
                take_ref['headline'] = updated_take.get('headline')
                take_ref['slugline'] = updated_take.get('slugline')
                take_ref[config.VERSION] = updated_take.get(config.VERSION)

            package_updates[GROUPS] = groups
            self._set_item_expiry(package_updates, package)

        return package_updates

    def _publish_package_items(self, package, updates):
        """Publishes all items of a package recursively then publishes the package itself.

        :param package: package to publish
        :param updates: payload
        """
        items = self.package_service.get_residrefs(package)

        if len(items) == 0 and self.publish_type == ITEM_PUBLISH:
            raise SuperdeskApiError.badRequestError("Empty package cannot be published!")

        removed_items = []
        if self.publish_type in [ITEM_CORRECT, ITEM_KILL]:
            removed_items, added_items = self._get_changed_items(items, updates)
            # we raise error if correction is done on a empty package. Kill is fine.
            if len(removed_items) == len(items) and len(added_items) == 0 and self.publish_type == ITEM_CORRECT:
                raise SuperdeskApiError.badRequestError("Corrected package cannot be empty!")
            items.extend(added_items)

        if not updates.get('groups') and package.get('groups'):  # this saves some typing in tests
            updates['groups'] = package.get('groups')

        if items:
            archive_publish = get_resource_service('archive_publish')
            for guid in items:
                package_item = super().find_one(req=None, _id=guid)

                if not package_item:
                    raise SuperdeskApiError.badRequestError(
                        "Package item with id: {} does not exist.".format(guid))

                if package_item[ITEM_STATE] not in PUBLISH_STATES:  # if the item is not published then publish it
                    if package_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                        # if the item is a package do recursion to publish
                        sub_updates = {i: updates[i] for i in ['state', 'operation'] if i in updates}
                        sub_updates['groups'] = list(package_item['groups'])
                        self._publish_package_items(package_item, sub_updates)
                        self._update_archive(original=package_item, updates=sub_updates,
                                             should_insert_into_versions=False)
                    else:
                        # publish the item
                        package_item[PUBLISHED_IN_PACKAGE] = package[config.ID_FIELD]
                        archive_publish.patch(id=package_item.pop(config.ID_FIELD), updates=package_item)

                    insert_into_versions(id_=guid)

                elif guid in removed_items:
                    # remove the package information from the package item.
                    linked_in_packages = [linked for linked in package_item.get(LINKED_IN_PACKAGES)
                                          if linked.get(PACKAGE) != package.get(config.ID_FIELD)]
                    super().system_update(guid, {LINKED_IN_PACKAGES: linked_in_packages}, package_item)

                package_item = super().find_one(req=None, _id=guid)

                self.package_service.update_field_in_package(updates, package_item[config.ID_FIELD],
                                                             config.VERSION, package_item[config.VERSION])

                if package_item.get(ASSOCIATIONS):
                    self.package_service.update_field_in_package(
                        updates,
                        package_item[config.ID_FIELD],
                        ASSOCIATIONS,
                        package_item[ASSOCIATIONS]
                    )

        updated = deepcopy(package)
        updated.update(updates)
        self.update_published_collection(published_item_id=package[config.ID_FIELD], updated=updated)

    def update_published_collection(self, published_item_id, updated=None):
        """Updates the published collection with the published item.

        Set the last_published_version to false for previous versions of the published items.

        :param: str published_item_id: _id of the document.
        """
        published_item = super().find_one(req=None, _id=published_item_id)
        published_item = copy(published_item)
        if updated:
            published_item.update(updated)
        published_item['is_take_item'] = self.takes_package_service.get_take_package_id(published_item) is not None
        if not published_item.get('digital_item_id'):
            published_item['digital_item_id'] = self.get_digital_id_for_package_item(published_item)
        get_resource_service(PUBLISHED).update_published_items(published_item_id, LAST_PUBLISHED_VERSION, False)
        return get_resource_service(PUBLISHED).post([published_item])

    def set_state(self, original, updates):
        """Set the state of the document based on the action (publish, correction, kill)

        :param dict original: original document
        :param dict updates: updates related to document
        """
        updates[PUBLISH_SCHEDULE] = None
        updates[SCHEDULE_SETTINGS] = {}
        updates[ITEM_STATE] = self.published_state

    def _set_updates(self, original, updates, last_updated, preserve_state=False):
        """Sets config.VERSION, config.LAST_UPDATED, ITEM_STATE in updates document.

        If item is being published and embargo is available then append Editorial Note with 'Embargoed'.

        :param dict original: original document
        :param dict updates: updates related to the original document
        :param datetime last_updated: datetime of the updates.
        """
        if not preserve_state:
            self.set_state(original, updates)
        updates.setdefault(config.LAST_UPDATED, last_updated)

        if original[config.VERSION] == updates.get(config.VERSION, original[config.VERSION]):
            resolve_document_version(document=updates, resource=ARCHIVE, method='PATCH', latest_doc=original)

        user = get_user()
        if user and user.get(config.ID_FIELD):
            updates['version_creator'] = user[config.ID_FIELD]

    def _update_archive(self, original, updates, versioned_doc=None, should_insert_into_versions=True):
        """Updates the articles into archive collection and inserts the latest into archive_versions.

        Also clears autosaved versions if any.

        :param: versioned_doc: doc which can be inserted into archive_versions
        :param: should_insert_into_versions if True inserts the latest document into versions collection
        """

        self.backend.update(self.datasource, original[config.ID_FIELD], updates, original)
        app.on_archive_item_updated(updates, original, updates[ITEM_OPERATION])

        if should_insert_into_versions:
            if versioned_doc is None:
                insert_into_versions(id_=original[config.ID_FIELD])
            else:
                insert_into_versions(doc=versioned_doc)

        get_component(ItemAutosave).clear(original[config.ID_FIELD])

    def _get_changed_items(self, existing_items, updates):
        """Returns the added and removed items from existing_items.

        :param existing_items: Existing list
        :param updates: Changes
        :return: list of removed items and list of added items
        """
        if 'groups' in updates:
            new_items = self.package_service.get_residrefs(updates)
            removed_items = list(set(existing_items) - set(new_items))
            added_items = list(set(new_items) - set(existing_items))
            return removed_items, added_items
        else:
            return [], []

    def _validate_associated_items(self, original_item, takes_package, validation_errors=[]):
        """Validates associated items.

        This function will ensure that the unpublished content validates and none of
        the content is locked by other than the publishing session, also do not allow
        any killed or spiked content.

        :param package:
        :param takes_package:
        :param validation_errors: validation errors are appended if there are any.
        """
        items = [value for value in (original_item.get(ASSOCIATIONS) or {}).values()]
        if original_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and \
                not takes_package and self.publish_type == ITEM_PUBLISH:
            items.extend(self.package_service.get_residrefs(original_item))

        for item in items:
            if type(item) == dict:
                doc = item
            elif item:
                doc = super().find_one(req=None, _id=item)
            else:
                continue

            if not doc:
                continue

            if original_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                digital = self.takes_package_service.get_take_package(doc) or {}
                self._validate_associated_items(doc, digital, validation_errors)

            # make sure no items are killed or spiked or scheduled
            doc_item_state = doc.get(ITEM_STATE, CONTENT_STATE.PUBLISHED)
            if doc_item_state in (CONTENT_STATE.KILLED, CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED):
                validation_errors.append('Item cannot contain associated {} item'.format(doc[ITEM_STATE]))

            if doc.get(EMBARGO):
                validation_errors.append('Item cannot have associated items with Embargo')

            # don't validate items that already have published
            if doc_item_state not in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]:
                validate_item = {'act': self.publish_type, 'type': doc[ITEM_TYPE], 'validate': doc}
                if type(item) == dict:
                    validate_item['embedded'] = True
                errors = get_resource_service('validate').post([validate_item], headline=True)
                if errors[0]:
                    pre_errors = ['Associated item %s %s' % (doc.get('slugline', ''), error) for error in errors[0]]
                    validation_errors.extend(pre_errors)

            # check the locks on the items
            if doc.get('lock_session', None) and original_item['lock_session'] != doc['lock_session']:
                validation_errors.extend(['{}: packaged item cannot be locked'.format(doc['headline'])])

    def _import_into_legal_archive(self, doc):
        """Import into legal archive async

        :param {dict} doc: document to be imported
        """

        if doc.get(ITEM_STATE) != CONTENT_STATE.SCHEDULED:
            kwargs = {
                'item_id': doc.get(config.ID_FIELD)
            }

            # countdown=3 is for elasticsearch to be refreshed with archive and published changes
            import_into_legal_archive.apply_async(countdown=3, kwargs=kwargs)  # @UndefinedVariable

    def _apply_kill_template(self, item):
        # apply the kill template
        updates = render_content_template_by_name(item, 'kill')
        return updates

    def apply_kill_override(self, item, updates):
        """Applies kill override.

        Kill requires content to be generate based on the item getting killed (and not the
        item that is being actioned on).

        :param dict item: item to kill
        :param dict updates: updates that needs to be modified based on the template
        :return:
        """
        try:
            desk_name = get_resource_service('desks').get_desk_name(item.get('task', {}).get('desk'))
            city = get_dateline_city(item.get('dateline'))
            kill_header = json.loads(render_template('article_killed_override.json',
                                                     slugline=item.get('slugline', ''),
                                                     headline=item.get('headline', ''),
                                                     desk_name=desk_name,
                                                     city=city,
                                                     versioncreated=item.get('versioncreated',
                                                                             item.get(config.LAST_UPDATED)),
                                                     body_html=updates.get('body_html', ''),
                                                     update_headline=updates.get('headline', '')), strict=False)
            for key, value in kill_header.items():
                kill_header[key] = html.unescape(value)

            updates.update(kill_header)
        except:
            logger.exception('Failed to apply kill header template to item {}.'.format(item))

    def _refresh_associated_items(self, original):
        """Refresh associated items before publishing

        Any further updates made to basic metadata done after item was associated will be carried on and
        used when validating those items.
        """
        associations = original.get(ASSOCIATIONS) or {}
        for _, item in associations.items():
            if type(item) == dict and item.get(config.ID_FIELD):
                keys = DEFAULT_SCHEMA.keys()
                if app.settings.get('COPY_METADATA_FROM_PARENT') and item.get(ITEM_TYPE) in MEDIA_TYPES:
                    updates = original
                    keys = FIELDS_TO_COPY_FOR_ASSOCIATED_ITEM
                else:
                    updates = super().find_one(req=None, _id=item[config.ID_FIELD]) or {}

                update_item_data(item, updates, keys)

    def _mark_media_item_as_used(self, updates, original):
        if ASSOCIATIONS not in updates or not updates.get(ASSOCIATIONS):
            return

        for item_name, item_obj in updates.get(ASSOCIATIONS).items():
            if not (item_obj and config.ID_FIELD in item_obj):
                continue

            item_id = item_obj[config.ID_FIELD]
            media_item = {}
            if app.settings.get('COPY_METADATA_FROM_PARENT') and item_obj.get(ITEM_TYPE) in MEDIA_TYPES:
                stored_item = (original.get(ASSOCIATIONS) or {}).get(item_name) or item_obj
            else:
                media_item = stored_item = self.find_one(req=None, _id=item_id)
                if not stored_item:
                    continue

            # If the media item is not marked as 'used', mark it as used
            if original.get(ITEM_TYPE) == CONTENT_TYPE.TEXT and \
                    (item_obj is not stored_item or not stored_item.get('used')):
                archive_service = get_resource_service('archive')
                if media_item is not stored_item:
                    media_item = archive_service.find_one(req=None, _id=item_id)

                if media_item and not media_item.get('used'):
                    archive_service.system_update(media_item['_id'], {'used': True}, media_item)

                stored_item['used'] = True
Beispiel #38
0
class ArchiveBroadcastService(BaseService):

    takesService = TakesPackageService()
    packageService = PackageService()

    def create(self, docs):
        service = get_resource_service(SOURCE)
        item_id = request.view_args['item_id']
        item = service.find_one(req=None, _id=item_id)
        doc = docs[0]

        self._valid_broadcast_item(item)

        desk_id = doc.get('desk')
        desk = None

        if desk_id:
            desk = get_resource_service('desks').find_one(req=None, _id=desk_id)

        doc.pop('desk', None)
        doc['task'] = {}
        if desk:
            doc['task']['desk'] = desk.get(config.ID_FIELD)
            doc['task']['stage'] = desk.get('working_stage')

        doc['task']['user'] = get_user().get('_id')
        genre_list = get_resource_service('vocabularies').find_one(req=None, _id='genre') or {}
        broadcast_genre = [{'qcode': genre.get('qcode'), 'name': genre.get('name')}
                           for genre in genre_list.get('items', [])
                           if genre.get('qcode') == BROADCAST_GENRE and genre.get('is_active')]

        if not broadcast_genre:
            raise SuperdeskApiError.badRequestError(message="Cannot find the {} genre.".format(BROADCAST_GENRE))

        doc['broadcast'] = {
            'status': '',
            'master_id': item_id,
            'takes_package_id': self.takesService.get_take_package_id(item),
            'rewrite_id': item.get('rewritten_by')
        }

        doc['genre'] = broadcast_genre
        doc['family_id'] = item.get('family_id')

        for key in FIELDS_TO_COPY:
            doc[key] = item.get(key)

        resolve_document_version(document=doc, resource=SOURCE, method='POST')
        service.post(docs)
        insert_into_versions(id_=doc[config.ID_FIELD])
        build_custom_hateoas(CUSTOM_HATEOAS, doc)
        return [doc[config.ID_FIELD]]

    def _valid_broadcast_item(self, item):
        """Validates item for broadcast.

        Broadcast item can only be created for Text or Pre-formatted item.
        Item state needs to be Published or Corrected

        :param dict item: Item from which the broadcast item will be created
        """
        if not item:
            raise SuperdeskApiError.notFoundError(
                message="Cannot find the requested item id.")

        if not item.get(ITEM_TYPE) in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]:
            raise SuperdeskApiError.badRequestError(message="Invalid content type.")

        if item.get(ITEM_STATE) not in [CONTENT_STATE.CORRECTED, CONTENT_STATE.PUBLISHED]:
            raise SuperdeskApiError.badRequestError(message="Invalid content state.")

    def _get_broadcast_items(self, ids, include_archived_repo=False):
        """Returns list of broadcast items.

        Get the broadcast items for the master_id and takes_package_id

        :param list ids: list of item ids
        :param include_archived_repo True if archived repo needs to be included in search, default is False
        :return list: list of broadcast items
        """
        query = {
            'query': {
                'filtered': {
                    'filter': {
                        'bool': {
                            'must': {'term': {'genre.name': BROADCAST_GENRE}},
                            'should': [
                                {'terms': {'broadcast.master_id': ids}},
                                {'terms': {'broadcast.takes_package_id': ids}}
                            ]
                        }
                    }
                }
            }
        }

        req = ParsedRequest()
        repos = 'archive,published'
        if include_archived_repo:
            repos = 'archive,published,archived'

        req.args = {'source': json.dumps(query), 'repo': repos}
        return get_resource_service('search').get(req=req, lookup=None)

    def get_broadcast_items_from_master_story(self, item, include_archived_repo=False):
        """Get the broadcast items from the master story.

        :param dict item: master story item
        :param include_archived_repo True if archived repo needs to be included in search, default is False
        :return list: returns list of broadcast items
        """
        if is_genre(item, BROADCAST_GENRE):
            return []

        ids = [str(item.get(config.ID_FIELD))]
        if self.takesService.get_take_package_id(item):
            ids.append(str(self.takesService.get_take_package_id(item)))

        return list(self._get_broadcast_items(ids, include_archived_repo))

    def on_broadcast_master_updated(self, item_event, item,
                                    takes_package_id=None, rewrite_id=None):
        """Runs when master item is updated.

        This event is called when the master story is corrected, published, re-written, new take/re-opened

        :param str item_event: Item operations
        :param dict item: item on which operation performed.
        :param str takes_package_id: takes_package_id.
        :param str rewrite_id: re-written story id.
        """
        status = ''

        if not item or is_genre(item, BROADCAST_GENRE):
            return

        if item_event == ITEM_CREATE and takes_package_id:
            if RE_OPENS.lower() in str(item.get('anpa_take_key', '')).lower():
                status = 'Story Re-opened'
            else:
                status = 'New Take Created'

        elif item_event == ITEM_CREATE and rewrite_id:
            status = 'Master Story Re-written'
        elif item_event == ITEM_PUBLISH:
            status = 'Master Story Published'
        elif item_event == ITEM_CORRECT:
            status = 'Master Story Corrected'

        broadcast_items = self.get_broadcast_items_from_master_story(item)

        if not broadcast_items:
            return

        processed_ids = set()
        for broadcast_item in broadcast_items:
            try:
                if broadcast_item.get('lock_user'):
                    continue

                updates = {
                    'broadcast': broadcast_item.get('broadcast'),
                }

                if status:
                    updates['broadcast']['status'] = status

                if not updates['broadcast']['takes_package_id'] and takes_package_id:
                    updates['broadcast']['takes_package_id'] = takes_package_id

                if not updates['broadcast']['rewrite_id'] and rewrite_id:
                    updates['broadcast']['rewrite_id'] = rewrite_id

                if not broadcast_item.get(config.ID_FIELD) in processed_ids:
                    self._update_broadcast_status(broadcast_item, updates)
                    # list of ids that are processed.
                    processed_ids.add(broadcast_item.get(config.ID_FIELD))
            except:
                logger.exception('Failed to update status for the broadcast item {}'.
                                 format(broadcast_item.get(config.ID_FIELD)))

    def _update_broadcast_status(self, item, updates):
        """Update the status of the broadcast item.

        :param dict item: broadcast item to be updated
        :param dict updates: broadcast updates
        """
        # update the published collection as well as archive.
        if item.get(ITEM_STATE) in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED]:
            get_resource_service('published').update_published_items(item.get(config.ID_FIELD),
                                                                     'broadcast', updates.get('broadcast'))

        archive_item = get_resource_service(SOURCE).find_one(req=None, _id=item.get(config.ID_FIELD))
        get_resource_service(SOURCE).system_update(archive_item.get(config.ID_FIELD), updates, archive_item)

    def remove_rewrite_refs(self, item):
        """Remove the rewrite references from the broadcast item if the re-write is spiked.

        :param dict item: Re-written article of the original story
        """
        if is_genre(item, BROADCAST_GENRE):
            return

        query = {
            'query': {
                'filtered': {
                    'filter': {
                        'and': [
                            {'term': {'genre.name': BROADCAST_GENRE}},
                            {'term': {'broadcast.rewrite_id': item.get(config.ID_FIELD)}}
                        ]
                    }
                }
            }
        }

        req = ParsedRequest()
        req.args = {'source': json.dumps(query)}
        broadcast_items = list(get_resource_service(SOURCE).get(req=req, lookup=None))

        for broadcast_item in broadcast_items:
            try:
                updates = {
                    'broadcast': broadcast_item.get('broadcast', {})
                }

                updates['broadcast']['rewrite_id'] = None

                if 'Re-written' in updates['broadcast']['status']:
                    updates['broadcast']['status'] = ''

                self._update_broadcast_status(broadcast_item, updates)
            except:
                logger.exception('Failed to remove rewrite id for the broadcast item {}'.
                                 format(broadcast_item.get(config.ID_FIELD)))

    def reset_broadcast_status(self, updates, original):
        """Reset the broadcast status if the broadcast item is updated.

        :param dict updates: updates to the original document
        :param dict original: original document
        """
        if original.get('broadcast') and original.get('broadcast').get('status', ''):
            broadcast_updates = {
                'broadcast': original.get('broadcast'),
            }

            broadcast_updates['broadcast']['status'] = ''
            self._update_broadcast_status(original, broadcast_updates)
            updates.update(broadcast_updates)

    def spike_item(self, original):
        """If Original item is re-write then it will remove the reference from the broadcast item.

        :param: dict original: original document
        """
        broadcast_items = [item for item in self.get_broadcast_items_from_master_story(original)
                           if item.get(ITEM_STATE) not in PUBLISH_STATES]
        spike_service = get_resource_service('archive_spike')

        for item in broadcast_items:
            id_ = item.get(config.ID_FIELD)
            try:
                self.packageService.remove_spiked_refs_from_package(id_)
                updates = {ITEM_STATE: CONTENT_STATE.SPIKED}
                resolve_document_version(updates, SOURCE, 'PATCH', item)
                spike_service.patch(id_, updates)
                insert_into_versions(id_=id_)
            except:
                logger.exception(message="Failed to spike the related broadcast item {}.".format(id_))

        if original.get('rewrite_of') and original.get(ITEM_STATE) not in PUBLISH_STATES:
            self.remove_rewrite_refs(original)

    def kill_broadcast(self, updates, original):
        """Kill the broadcast items

        :param dict updates:
        :param dict original:
        :return:
        """
        broadcast_items = [item for item in self.get_broadcast_items_from_master_story(original)
                           if item.get(ITEM_STATE) in PUBLISH_STATES]

        correct_service = get_resource_service('archive_correct')
        kill_service = get_resource_service('archive_kill')

        for item in broadcast_items:
            item_id = item.get(config.ID_FIELD)
            packages = self.packageService.get_packages(item_id)

            processed_packages = set()
            for package in packages:
                if str(package[config.ID_FIELD]) in processed_packages:
                    continue
                try:
                    if package.get(ITEM_STATE) in {CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED}:
                        package_updates = {
                            config.LAST_UPDATED: utcnow(),
                            GROUPS: self.packageService.remove_group_ref(package, item_id)
                        }

                        refs = self.packageService.get_residrefs(package_updates)
                        if refs:
                            correct_service.patch(package.get(config.ID_FIELD), package_updates)
                        else:
                            package_updates['body_html'] = updates.get('body_html', '')
                            kill_service.patch(package.get(config.ID_FIELD), package_updates)

                        processed_packages.add(package.get(config.ID_FIELD))
                    else:
                        package_list = self.packageService.remove_refs_in_package(package,
                                                                                  item_id, processed_packages)

                        processed_packages = processed_packages.union(set(package_list))
                except:
                    logger.exception('Failed to remove the broadcast item {} from package {}'.format(
                        item_id, package.get(config.ID_FIELD)
                    ))

            kill_service.kill_item(updates, item)